Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -1314,6 +1314,25 @@ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; + // If any bits in the shift amount make that value greater than or equal to + // the number of bits in the type, the shift is undefined. + unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (KnownOne.getLimitedValue() >= BitWidth) + return UndefValue::get(Op0->getType()); + + // If all valid bits in the shift amount are known zero, the first operand is + // unchanged. Don't try to take the log2 of 1 though; the log calculation + // doesn't do what we want. + if (BitWidth > 1) { + unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth); + APInt ShiftAmountMask = APInt::getLowBitsSet(BitWidth, NumValidShiftBits); + if ((KnownZero & ShiftAmountMask) == ShiftAmountMask) + return Op0; + } + return nullptr; } Index: test/Transforms/InstSimplify/shift-knownbits.ll =================================================================== --- test/Transforms/InstSimplify/shift-knownbits.ll +++ test/Transforms/InstSimplify/shift-knownbits.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instsimplify -S | FileCheck %s + +; If any bits of the shift amount are known to make it exceed or equal +; the number of bits in the type, the shift causes undefined behavior. + +define i32 @shl_amount_is_known_bogus(i32 %a, i32 %b) { +; CHECK-LABEL: @shl_amount_is_known_bogus( +; CHECK-NEXT: ret i32 undef +; + %or = or i32 %b, 32 + %shl = shl i32 %a, %or + ret i32 %shl +} + +; Check some weird types and the other shift ops. + +define i31 @lshr_amount_is_known_bogus(i31 %a, i31 %b) { +; CHECK-LABEL: @lshr_amount_is_known_bogus( +; CHECK-NEXT: ret i31 undef +; + %or = or i31 %b, 31 + %shr = lshr i31 %a, %or + ret i31 %shr +} + +define i33 @ashr_amount_is_known_bogus(i33 %a, i33 %b) { +; CHECK-LABEL: @ashr_amount_is_known_bogus( +; CHECK-NEXT: ret i33 undef +; + %or = or i33 %b, 33 + %shr = ashr i33 %a, %or + ret i33 %shr +} + + +; If all valid bits of the shift amount are known 0, there's no shift. +; It doesn't matter if high bits are set because that would be undefined. +; Therefore, the only possible valid result of these shifts is %a. + +define i16 @ashr_amount_is_zero(i16 %a, i16 %b) { +; CHECK-LABEL: @ashr_amount_is_zero( +; CHECK-NEXT: ret i16 %a +; + %and = and i16 %b, 65520 ; 0xfff0 + %shr = ashr i16 %a, %and + ret i16 %shr +} + +define i300 @lshr_amount_is_zero(i300 %a, i300 %b) { +; CHECK-LABEL: @lshr_amount_is_zero( +; CHECK-NEXT: ret i300 %a +; + %and = and i300 %b, 2048 + %shr = lshr i300 %a, %and + ret i300 %shr +} + +define i9 @shl_amount_is_zero(i9 %a, i9 %b) { +; CHECK-LABEL: @shl_amount_is_zero( +; CHECK-NEXT: ret i9 %a +; + %and = and i9 %b, 496 ; 0x1f0 + %shl = shl i9 %a, %and + ret i9 %shl +} + + +; Verify that we've calculated the log2 boundary of valid bits correctly for a weird type. + +define i9 @shl_amount_is_not_known_zero(i9 %a, i9 %b) { +; CHECK-LABEL: @shl_amount_is_not_known_zero( +; CHECK-NEXT: [[AND:%.*]] = and i9 %b, -8 +; CHECK-NEXT: [[SHL:%.*]] = shl i9 %a, [[AND]] +; CHECK-NEXT: ret i9 [[SHL]] +; + %and = and i9 %b, 504 ; 0x1f8 + %shl = shl i9 %a, %and + ret i9 %shl +} + + +; For vectors, we need all scalar elements to meet the requirements to optimize. + +define <2 x i32> @ashr_vector_bogus(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: @ashr_vector_bogus( +; CHECK-NEXT: ret <2 x i32> undef +; + %or = or <2 x i32> %b, + %shr = ashr <2 x i32> %a, %or + ret <2 x i32> %shr +} + +; FIXME: This is undef, but computeKnownBits doesn't handle the union. +define <2 x i32> @shl_vector_bogus(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: @shl_vector_bogus( +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> %b, +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> %a, [[OR]] +; CHECK-NEXT: ret <2 x i32> [[SHL]] +; + %or = or <2 x i32> %b, + %shl = shl <2 x i32> %a, %or + ret <2 x i32> %shl +} + +define <2 x i32> @lshr_vector_zero(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: @lshr_vector_zero( +; CHECK-NEXT: ret <2 x i32> %a +; + %and = and <2 x i32> %b, + %shr = lshr <2 x i32> %a, %and + ret <2 x i32> %shr +} + +; FIXME: This is not a shift, but computeKnownBits doesn't handle weird vector types. +define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) { +; CHECK-LABEL: @shl_vector_zero( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i15> %b, +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i15> %a, [[AND]] +; CHECK-NEXT: ret <2 x i15> [[SHL]] +; + %and = and <2 x i15> %b, + %shl = shl <2 x i15> %a, %and + ret <2 x i15> %shl +} + +define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: @shl_vector_for_real( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> %b, +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> %a, [[AND]] +; CHECK-NEXT: ret <2 x i32> [[SHL]] +; + %and = and <2 x i32> %b, ; a necessary mask op + %shl = shl <2 x i32> %a, %and + ret <2 x i32> %shl +} + + +; Don't get tripped up by log2 of 1. + +define i1 @shl_i1(i1 %a, i1 %b) { +; CHECK-LABEL: @shl_i1( +; CHECK-NEXT: [[SHL:%.*]] = shl i1 %a, %b +; CHECK-NEXT: ret i1 [[SHL]] +; + %shl = shl i1 %a, %b + ret i1 %shl +} +