Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -1008,6 +1008,12 @@ return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE; } + /// Return true if this is a setcc instruction that performs a greater-than + /// comparison when used with integer operands (signed or unsigned). + inline bool isGreaterThanIntSetCC(CondCode Code) { + return Code == SETGT || Code == SETGE || Code == SETUGT || Code == SETUGE; + } + /// Return true if the specified condition returns true if the two operands to /// the condition are equal. Note that if one of the two operands is a NaN, /// this value is meaningless. Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -724,6 +724,16 @@ /// time proportional to the number of uses. size_t use_size() const { return std::distance(use_begin(), use_end()); } + /// Return the number of uses of this node. This method takes + /// time proportional to the number of uses, limited to MaxDistance. + size_t use_size(size_t MaxDistance) const { + size_t Dist = 0; + for (auto I = use_begin(), E = use_end(); I != E; ++I) + if (Dist++ == MaxDistance) + break; + return Dist; + } + /// Return the unique node id. int getNodeId() const { return NodeId; } Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -151,6 +151,13 @@ // mask (ex: x86 blends). }; + /// Enum that describes the behavior of SHIFT instructions + enum ShiftBehavior { + ShiftStrict, // What LLVM IR defines, range 0 to bitwidth - 1 + ShiftMasked, + ShiftOutOfRangeToZeroOrSignBit, + }; + /// Enum that specifies what an atomic load/AtomicRMWInst is expanded /// to, if at all. Exists because different targets have different levels of /// support for these atomic instructions, and also have different options @@ -665,6 +672,10 @@ return true; } + /// Return true if SHIFT instructions return 0 when the operand is equal or + /// greater than the bit width. + virtual enum ShiftBehavior getShiftBehavior() const { return ShiftStrict; } + /// Return true if the target has native support for the specified value type. /// This means that it has a register that directly holds it without /// promotions or expansions. Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -464,6 +464,7 @@ SDValue N1, SDNodeFlags Flags); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + bool UnguardShift(SDNode *N, SDValue *Out); SDValue foldSelectOfConstants(SDNode *N); SDValue foldVSelectOfConstants(SDNode *N); @@ -7654,6 +7655,81 @@ return SDValue(); } +// If this select is guarding a shift from undefined behavior, +// and on this platform that shift is well-defined, remove the select. +bool DAGCombiner::UnguardShift(SDNode *N, SDValue *Out) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + if (DAG.getTargetLoweringInfo().getShiftBehavior() != + TargetLoweringBase::ShiftOutOfRangeToZeroOrSignBit) + return false; + + // Are we selecting from a shift? + SDValue *Shift; + if (N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) { + Shift = &N1; + if (!(N2.getOpcode() == ISD::Constant && N->getConstantOperandVal(2) == 0)) + return false; + } else if (N2.getOpcode() == ISD::SHL || N2.getOpcode() == ISD::SRL) { + Shift = &N2; + if (!(N1.getOpcode() == ISD::Constant && N->getConstantOperandVal(1) == 0)) + return false; + } else if (N1.getOpcode() == ISD::SRA) { + return false; // FIXME + } else if (N2.getOpcode() == ISD::SRA) { + return false; // FIXME + } else + return false; + + // Is N0 a check? + enum ISD::NodeType O = (enum ISD::NodeType)N0->getOpcode(); + if (O != ISD::SETCC) + return false; + CondCodeSDNode *Cond = dyn_cast(N0->getOperand(2)); + if (!Cond) + return false; + enum ISD::CondCode CC = Cond->get(); + SDValue Walk = N0->getOperand(1); + if (Walk->getOpcode() == ISD::ZERO_EXTEND) + Walk = Walk->getOperand(0); + if (Walk->getOpcode() != ISD::Constant) + return false; + uint64_t Const = N0->getConstantOperandVal(1); + + // Does the check guard against UB? + bool Succeeded = false; + if (ISD::isGreaterThanIntSetCC(CC)) { + if (!ISD::isTrueWhenEqual(CC)) + Const++; + if (VT.getScalarSizeInBits() == Const) { + Walk = Shift->getOperand(1); + if (Walk->getOpcode() == ISD::ZERO_EXTEND) + Walk = Walk->getOperand(0); + if (N0->getOperand(0) == Walk) + Succeeded = true; + } + } else if (CC == ISD::SETEQ && Const == 0) { + Walk = Shift->getOperand(1); + if (Walk->getOpcode() == ISD::ZERO_EXTEND) + Walk = Walk->getOperand(0); + if (Walk->getOpcode() != ISD::SUB) + return false; + // Why is the constant on the left here, when in LLVM-IR it is always on the + // right? + if (Walk->getOperand(0)->getOpcode() == ISD::Constant && + Walk->getConstantOperandVal(0) == VT.getScalarSizeInBits()) + if (N0->getOperand(0) == Walk->getOperand(1)) + Succeeded = true; + } + + if (!Succeeded) + return false; + *Out = *Shift; + return true; +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -7673,6 +7749,10 @@ if (SDValue V = foldSelectOfConstants(N)) return V; + SDValue Shift; + if (UnguardShift(N, &Shift)) + return Shift; + // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -479,6 +479,10 @@ return true; } + enum ShiftBehavior getShiftBehavior() const override { + return ShiftOutOfRangeToZeroOrSignBit; + } + bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override { // For vectors, we don't have a preference.. Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -573,6 +573,10 @@ bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; + enum ShiftBehavior getShiftBehavior() const override { + return ShiftOutOfRangeToZeroOrSignBit; + } + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -846,6 +846,8 @@ return true; } + enum ShiftBehavior getShiftBehavior() const override { return ShiftMasked; } + bool shouldSplatInsEltVarIndex(EVT VT) const override; bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { Index: test/CodeGen/AArch64/unguard-shifts.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/unguard-shifts.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; ModuleID = 'shr.c' +source_filename = "shr.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @lshr(i32) local_unnamed_addr #0 { +; CHECK-LABEL: lshr: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ret + %2 = icmp eq i32 %0, 0 + %3 = sub nsw i32 32, %0 + %4 = lshr i32 -1, %3 + %5 = select i1 %2, i32 0, i32 %4 + ret i32 %5 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @lshr2(i32) local_unnamed_addr #0 { +; CHECK-LABEL: lshr2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsr w0, w8, w0 +; CHECK-NEXT: ret + %2 = icmp sgt i32 %0, 31 + %3 = lshr i32 -1, %0 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @lshr3(i32) local_unnamed_addr #0 { +; CHECK-LABEL: lshr3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsr w0, w8, w0 +; CHECK-NEXT: ret + %2 = icmp sgt i32 %0, 31 + %3 = lshr i32 -1, %0 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @shl(i32) local_unnamed_addr #0 { +; CHECK-LABEL: shl: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsl w0, w9, w8 +; CHECK-NEXT: ret + %2 = icmp eq i32 %0, 0 + %3 = sub nsw i32 32, %0 + %4 = shl i32 -1, %3 + %5 = select i1 %2, i32 0, i32 %4 + ret i32 %5 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @shl2(i32) local_unnamed_addr #0 { +; CHECK-LABEL: shl2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w0, w8, w0 +; CHECK-NEXT: ret + %2 = icmp sgt i32 %0, 31 + %3 = shl i32 -1, %0 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @shl3(i32) local_unnamed_addr #0 { +; CHECK-LABEL: shl3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w0, w8, w0 +; CHECK-NEXT: ret + %2 = icmp sgt i32 %0, 31 + %3 = shl i32 -1, %0 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 7.0.1-8 (tags/RELEASE_701/final)"} Index: test/CodeGen/ARM/unguard-shifts.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/unguard-shifts.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi | FileCheck %s +; ModuleID = 'shr.c' +source_filename = "shr.c" +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t--linux-gnueabi" + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @lshr(i32) local_unnamed_addr #0 { +; CHECK-LABEL: lshr: +; CHECK: @ %bb.0: +; CHECK-NEXT: rsb r0, r0, #32 +; CHECK-NEXT: mvn r1, #0 +; CHECK-NEXT: lsr r0, r1, r0 +; CHECK-NEXT: bx lr + %2 = icmp eq i32 %0, 0 + %3 = sub nsw i32 32, %0 + %4 = lshr i32 -1, %3 + %5 = select i1 %2, i32 0, i32 %4 + ret i32 %5 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @lshr2(i32) local_unnamed_addr #0 { +; CHECK-LABEL: lshr2: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #0 +; CHECK-NEXT: lsr r0, r1, r0 +; CHECK-NEXT: bx lr + %2 = icmp sgt i32 %0, 31 + %3 = lshr i32 -1, %0 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @lshr3(i32) local_unnamed_addr #0 { +; CHECK-LABEL: lshr3: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #0 +; CHECK-NEXT: lsr r0, r1, r0 +; CHECK-NEXT: bx lr + %2 = icmp sgt i32 %0, 31 + %3 = lshr i32 -1, %0 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @shl(i32) local_unnamed_addr #0 { +; CHECK-LABEL: shl: +; CHECK: @ %bb.0: +; CHECK-NEXT: rsb r0, r0, #32 +; CHECK-NEXT: mvn r1, #0 +; CHECK-NEXT: lsl r0, r1, r0 +; CHECK-NEXT: bx lr + %2 = icmp eq i32 %0, 0 + %3 = sub nsw i32 32, %0 + %4 = shl i32 -1, %3 + %5 = select i1 %2, i32 0, i32 %4 + ret i32 %5 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @shl2(i32) local_unnamed_addr #0 { +; CHECK-LABEL: shl2: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #0 +; CHECK-NEXT: lsl r0, r1, r0 +; CHECK-NEXT: bx lr + %2 = icmp sgt i32 %0, 31 + %3 = shl i32 -1, %0 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i32 @shl3(i32) local_unnamed_addr #0 { +; CHECK-LABEL: shl3: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #0 +; CHECK-NEXT: lsl r0, r1, r0 +; CHECK-NEXT: bx lr + %2 = icmp sgt i32 %0, 31 + %3 = shl i32 -1, %0 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+armv4t,+soft-float,+strict-align,-crypto,-d16,-fp-armv8,-fp-only-sp,-fp16,-neon,-thumb-mode,-vfp2,-vfp3,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="true" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 7.0.1-8 (tags/RELEASE_701/final)"}