Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -346,25 +346,28 @@ } break; } - case Instruction::Shl: + case Instruction::Shl: { // If we are truncating the result of this SHL, and if it's a shift of a // constant amount, we can always perform a SHL in a smaller type. - if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + const APInt *Amt; + if (match(I->getOperand(1), m_APInt(Amt))) { uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (CI->getLimitedValue(BitWidth) < BitWidth) + if (Amt->getLimitedValue(BitWidth) < BitWidth) return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); } break; + } case Instruction::LShr: // If this is a truncate of a logical shr, we can truncate it to a smaller // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. - if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + const APInt *Amt; + if (match(I->getOperand(1), m_APInt(Amt))) { uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); uint32_t BitWidth = Ty->getScalarSizeInBits(); if (IC.MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) && - CI->getLimitedValue(BitWidth) < BitWidth) { + Amt->getLimitedValue(BitWidth) < BitWidth) { return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); } } @@ -936,10 +939,11 @@ // Otherwise, we don't know how to analyze this BitsToClear case yet. return false; - case Instruction::Shl: + case Instruction::Shl: { // We can promote shl(x, cst) if we can promote x. Since shl overwrites the // upper bits we can reduce BitsToClear by the shift amount. - if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { + const APInt *Amt; + if (match(I->getOperand(1), m_APInt(Amt))) { if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) return false; uint64_t ShiftAmt = Amt->getZExtValue(); @@ -947,10 +951,12 @@ return true; } return false; - case Instruction::LShr: + } + case Instruction::LShr: { // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. - if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { + const APInt *Amt; + if (match(I->getOperand(1), m_APInt(Amt))) { if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) return false; BitsToClear += Amt->getZExtValue(); @@ -960,6 +966,7 @@ } // Cannot promote variable LSHR. return false; + } case Instruction::Select: if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) || !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) || Index: test/Transforms/InstCombine/cast.ll =================================================================== --- test/Transforms/InstCombine/cast.ll +++ test/Transforms/InstCombine/cast.ll @@ -492,6 +492,21 @@ ret i16 %tmp.upgrd.3 } +define <2 x i16> @test40vec(<2 x i16> %a) { +; CHECK-LABEL: @test40vec( +; CHECK-NEXT: [[TMP21:%.*]] = lshr <2 x i16> [[A:%.*]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i16> [[A]], +; CHECK-NEXT: [[TMP_UPGRD_32:%.*]] = or <2 x i16> [[TMP21]], [[TMP5]] +; CHECK-NEXT: ret <2 x i16> [[TMP_UPGRD_32]] +; + %tmp = zext <2 x i16> %a to <2 x i32> + %tmp21 = lshr <2 x i32> %tmp, + %tmp5 = shl <2 x i32> %tmp, + %tmp.upgrd.32 = or <2 x i32> %tmp21, %tmp5 + %tmp.upgrd.3 = trunc <2 x i32> %tmp.upgrd.32 to <2 x i16> + ret <2 x i16> %tmp.upgrd.3 +} + ; PR1263 define i32* @test41(i32* %tmp1) { ; CHECK-LABEL: @test41( @@ -585,6 +600,19 @@ ret i64 %E } +define <2 x i64> @test46vec(<2 x i64> %A) { +; CHECK-LABEL: @test46vec( +; CHECK-NEXT: [[C:%.*]] = shl <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[C]], +; CHECK-NEXT: ret <2 x i64> [[D]] +; + %B = trunc <2 x i64> %A to <2 x i32> + %C = and <2 x i32> %B, + %D = shl <2 x i32> %C, + %E = zext <2 x i32> %D to <2 x i64> + ret <2 x i64> %E +} + define i64 @test47(i8 %A) { ; CHECK-LABEL: @test47( ; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[A:%.*]], 42 @@ -729,6 +757,19 @@ ret i64 %tmp355 } +define <2 x i64> @test56vec(<2 x i16> %A) nounwind { +; CHECK-LABEL: @test56vec( +; CHECK-NEXT: [[TMP353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP354:%.*]] = lshr <2 x i64> [[TMP353]], +; CHECK-NEXT: [[TMP355:%.*]] = and <2 x i64> [[TMP354]], +; CHECK-NEXT: ret <2 x i64> [[TMP355]] +; + %tmp353 = sext <2 x i16> %A to <2 x i32> + %tmp354 = lshr <2 x i32> %tmp353, + %tmp355 = zext <2 x i32> %tmp354 to <2 x i64> + ret <2 x i64> %tmp355 +} + define i64 @test57(i64 %A) nounwind { ; CHECK-LABEL: @test57( ; CHECK-NEXT: [[C:%.*]] = lshr i64 %A, 8 @@ -741,6 +782,18 @@ ret i64 %E } +define <2 x i64> @test57vec(<2 x i64> %A) nounwind { +; CHECK-LABEL: @test57vec( +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[C]], +; CHECK-NEXT: ret <2 x i64> [[E]] +; + %B = trunc <2 x i64> %A to <2 x i32> + %C = lshr <2 x i32> %B, + %E = zext <2 x i32> %C to <2 x i64> + ret <2 x i64> %E +} + define i64 @test58(i64 %A) nounwind { ; CHECK-LABEL: @test58( ; CHECK-NEXT: [[C:%.*]] = lshr i64 %A, 8 Index: test/Transforms/InstCombine/select-with-bitwise-ops.ll =================================================================== --- test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -410,12 +410,10 @@ define <2 x i32> @test71vec(<2 x i32> %x) { ; CHECK-LABEL: @test71vec( -; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i8> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i32> %x, %2 = icmp ne <2 x i32> %1, @@ -438,12 +436,10 @@ define <2 x i32> @test72vec(<2 x i32> %x) { ; CHECK-LABEL: @test72vec( -; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i8> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i32> %x, %2 = icmp eq <2 x i32> %1, @@ -466,12 +462,10 @@ define <2 x i32> @test73vec(<2 x i32> %x) { ; CHECK-LABEL: @test73vec( -; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i8> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = trunc <2 x i32> %x to <2 x i8> %2 = icmp sgt <2 x i8> %1,