Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3379,7 +3379,7 @@ // we should move shifts to the same hand of 'and', i.e. rewrite as // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x) // We are only interested in opposite logical shifts here. -// One of the shifts can be truncated. For now, it can only be 'shl'. +// One of the shifts can be truncated. // If we can, we want to end up creating 'lshr' shift. static Value * foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, @@ -3413,14 +3413,6 @@ "We did not look past any shifts while matching XShift though."); bool HadTrunc = WidestTy != I.getOperand(0)->getType(); - if (HadTrunc) { - // We did indeed have a truncation. For now, let's only proceed if the 'shl' - // was truncated, since that does not require any extra legality checks. - // FIXME: trunc-of-lshr. - if (!match(YShift, m_Shl(m_Value(), m_Value()))) - return nullptr; - } - // If YShift is a 'lshr', swap the shifts around. if (match(YShift, m_LShr(m_Value(), m_Value()))) std::swap(XShift, YShift); @@ -3462,16 +3454,68 @@ /*isNUW=*/false, SQ.getWithInstruction(&I))); if (!NewShAmt) return nullptr; + NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy); + unsigned WidestBitWidth = WidestTy->getScalarSizeInBits(); + // Is the new shift amount smaller than the bit width? // FIXME: could also rely on ConstantRange. - if (!match(NewShAmt, m_SpecificInt_ICMP( - ICmpInst::Predicate::ICMP_ULT, - APInt(NewShAmt->getType()->getScalarSizeInBits(), - WidestTy->getScalarSizeInBits())))) - return nullptr; + if (!match(NewShAmt, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, + APInt(WidestBitWidth, WidestBitWidth)))) + return nullptr; + + // An extra legality check is needed if we had trunc-of-lshr. + if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) { + auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ, + WidestShift]() { + // It isn't obvious whether it's worth it to analyze non-constants here. + // Also, let's basically give up on non-splat cases, pessimizing vectors. + // If *any* of these preconditions matches we can perform the fold. + Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy() + ? NewShAmt->getSplatValue() + : NewShAmt; + // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold. + if (NewShAmtSplat && + (NewShAmtSplat->isNullValue() || + NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1)) + return true; + // We consider *min* leading zeros so a single outlier + // blocks the transform as opposed to allowing it. + if (auto *C = dyn_cast(NarrowestShift->getOperand(0))) { + KnownBits Known = computeKnownBits(C, SQ.DL); + unsigned MinLeadZero = Known.countMinLeadingZeros(); + // If the value being shifted has at most lowest bit set we can fold. + unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; + if (MaxActiveBits <= 1) + return true; + // Precondition: NewShAmt u<= countLeadingZeros(C) + if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero)) + return true; + } + if (auto *C = dyn_cast(WidestShift->getOperand(0))) { + KnownBits Known = computeKnownBits(C, SQ.DL); + unsigned MinLeadZero = Known.countMinLeadingZeros(); + // If the value being shifted has at most lowest bit set we can fold. + unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; + if (MaxActiveBits <= 1) + return true; + // Precondition: ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C) + if (NewShAmtSplat) { + APInt AdjNewShAmt = + (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger(); + if (AdjNewShAmt.ule(MinLeadZero)) + return true; + } + } + return false; // Can't tell if it's ok. + }; + if (!CanFold()) + return nullptr; + } + // All good, we can do this fold. - NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy); X = Builder.CreateZExt(X, WidestTy); + Y = Builder.CreateZExt(Y, WidestTy); // The shift is the same that was for X. Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr ? Builder.CreateLShr(X, NewShAmt) Index: llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll +++ llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll @@ -42,15 +42,9 @@ ; New shift amount would be 16, %x has 16 leading zeros - can fold. define i1 @t1(i64 %y, i32 %len) { ; CHECK-LABEL: @t1( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl i32 65535, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 -; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[Y:%.*]], 4294901760 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %t0 = sub i32 32, %len %t1 = shl i32 65535, %t0 @@ -65,15 +59,9 @@ ; Note that we indeed look at leading zeros! define i1 @t1_single_bit(i64 %y, i32 %len) { ; CHECK-LABEL: @t1_single_bit( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl i32 32768, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 -; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[Y:%.*]], 2147483648 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %t0 = sub i32 32, %len %t1 = shl i32 32768, %t0 @@ -112,15 +100,9 @@ ; New shift amount would be 16, %y has 47 leading zeros - can fold. define i1 @t3(i32 %x, i32 %len) { ; CHECK-LABEL: @t3( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 -; CHECK-NEXT: [[T3:%.*]] = lshr i64 131071, [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %t0 = sub i32 32, %len %t1 = shl i32 %x, %t0 @@ -135,15 +117,9 @@ ; Note that we indeed look at leading zeros! define i1 @t3_singlebit(i32 %x, i32 %len) { ; CHECK-LABEL: @t3_singlebit( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 -; CHECK-NEXT: [[T3:%.*]] = lshr i64 65536, [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %t0 = sub i32 32, %len %t1 = shl i32 %x, %t0 @@ -189,15 +165,10 @@ ; New shift amount would be 16, minimal count of leading zeros in %x is 16. Ok. define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t5_vec( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[TMP3]] ; %t0 = sub <2 x i32> , %len %t1 = shl <2 x i32> , %t0 @@ -236,15 +207,9 @@ ; New shift amount would be 16, minimal count of leading zeros in %x is 47. Ok. define <2 x i1> @t7_vec(<2 x i32> %x, <2 x i32> %len) { ; CHECK-LABEL: @t7_vec( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> , [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %t0 = sub <2 x i32> , %len %t1 = shl <2 x i32> %x, %t0 @@ -285,15 +250,11 @@ ; Ok if the final shift amount is exactly one less than widest bit width. define i1 @t9_highest_bit(i32 %x, i64 %y, i32 %len) { ; CHECK-LABEL: @t9_highest_bit( -; CHECK-NEXT: [[T0:%.*]] = sub i32 64, [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -1 -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 -; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[Y:%.*]], 63 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: ret i1 [[TMP4]] ; %t0 = sub i32 64, %len %t1 = shl i32 %x, %t0 @@ -332,15 +293,10 @@ ; Ok if the final shift amount is zero. define i1 @t11_no_shift(i32 %x, i64 %y, i32 %len) { ; CHECK-LABEL: @t11_no_shift( -; CHECK-NEXT: [[T0:%.*]] = sub i32 64, [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -64 -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 -; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[TMP3]] ; %t0 = sub i32 64, %len %t1 = shl i32 %x, %t0 @@ -427,15 +383,9 @@ ; Ok if one of the values being shifted is 1 define i1 @t13_x_is_one(i64 %y, i32 %len) { ; CHECK-LABEL: @t13_x_is_one( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 -; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[Y:%.*]], [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[Y:%.*]], 65536 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] ; %t0 = sub i32 32, %len %t1 = shl i32 1, %t0 @@ -449,15 +399,7 @@ } define i1 @t14_x_is_one(i32 %x, i32 %len) { ; CHECK-LABEL: @t14_x_is_one( -; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -16 -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64 -; CHECK-NEXT: [[T3:%.*]] = lshr i64 1, [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0 -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: ret i1 false ; %t0 = sub i32 32, %len %t1 = shl i32 %x, %t0 @@ -472,15 +414,10 @@ define <2 x i1> @t15_vec_x_is_one_or_zero(<2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t15_vec_x_is_one_or_zero( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[TMP3]] ; %t0 = sub <2 x i32> , %len %t1 = shl <2 x i32> , %t0 @@ -494,15 +431,7 @@ } define <2 x i1> @t16_vec_y_is_one_or_zero(<2 x i32> %x, <2 x i32> %len) { ; CHECK-LABEL: @t16_vec_y_is_one_or_zero( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] -; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], -; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64> -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> , [[T2_WIDE]] -; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]] -; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[T5]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %t0 = sub <2 x i32> , %len %t1 = shl <2 x i32> %x, %t0 @@ -524,15 +453,8 @@ ; And that's the main motivational pattern: define i1 @rawspeed_signbit(i64 %storage, i32 %nbits) { ; CHECK-LABEL: @rawspeed_signbit( -; CHECK-NEXT: [[SKIPNBITS:%.*]] = sub nsw i32 64, [[NBITS:%.*]] -; CHECK-NEXT: [[SKIPNBITSWIDE:%.*]] = zext i32 [[SKIPNBITS]] to i64 -; CHECK-NEXT: [[DATAWIDE:%.*]] = lshr i64 [[STORAGE:%.*]], [[SKIPNBITSWIDE]] -; CHECK-NEXT: [[DATA:%.*]] = trunc i64 [[DATAWIDE]] to i32 -; CHECK-NEXT: [[NBITSMINUSONE:%.*]] = add nsw i32 [[NBITS]], -1 -; CHECK-NEXT: [[BITMASK:%.*]] = shl i32 1, [[NBITSMINUSONE]] -; CHECK-NEXT: [[BITMASKED:%.*]] = and i32 [[BITMASK]], [[DATA]] -; CHECK-NEXT: [[ISBITUNSET:%.*]] = icmp eq i32 [[BITMASKED]], 0 -; CHECK-NEXT: ret i1 [[ISBITUNSET]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[STORAGE:%.*]], -1 +; CHECK-NEXT: ret i1 [[TMP1]] ; %skipnbits = sub nsw i32 64, %nbits %skipnbitswide = zext i32 %skipnbits to i64 Index: llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll +++ llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll @@ -6,8 +6,8 @@ ; we should move shifts to the same hand of 'and', i.e. e.g. rewrite as ; icmp eq/ne (and (((x shift Q) shift K), y)), 0 ; We are only interested in opposite logical shifts here. -; We still can handle the case where there is a truncation between a shift -; and an 'and', but for now only if it's 'shl' - simpler legality check. +; We still can handle the case where there is a truncation between a shift and +; an 'and'. If it's trunc-of-shl - no extra legality check is needed. ;------------------------------------------------------------------------------- ; Basic scalar tests