Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -2196,6 +2196,16 @@ if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; + case Instruction::Mul: { + // The output of the Mul can at most twice the valid bits in the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp2 == 1) return 1; + unsigned OutValidBits = (TyBits - Tmp + 1) + (TyBits - Tmp2 + 1); + return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; + } + case Instruction::PHI: { const PHINode *PN = cast(U); unsigned NumIncomingValues = PN->getNumIncomingValues(); Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -369,6 +369,21 @@ } } break; + case Instruction::AShr: + // If this is a truncate of an arithmetic shr, we can truncate it to a + // smaller ashr iff we know that all the bits from the sign bit of the + // original type and the sign bit of the truncate type are similar. + // TODO: It is enough to check that the bits we would be shifting in are + // similar to sign bit of the truncate type. + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (CI->getLimitedValue(BitWidth) < BitWidth && + OrigBitWidth - BitWidth < + IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI)) + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); + } + break; case Instruction::Trunc: // trunc(trunc(x)) -> trunc(x) return true; Index: lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineShifts.cpp +++ lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -560,7 +560,9 @@ } // (X >>u C) << C --> X & (-1 << C) - if (match(Op0, m_LShr(m_Value(X), m_Specific(Op1)))) { + // (X >>s C) << C --> X & (-1 << C) + if (match(Op0, m_LShr(m_Value(X), m_Specific(Op1))) || + match(Op0, m_AShr(m_Value(X), m_Specific(Op1)))) { APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)); return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -465,7 +465,7 @@ case Instruction::LShr: { const APInt *SA; if (match(I->getOperand(1), m_APInt(SA))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Unsigned shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); @@ -521,9 +521,12 @@ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; + unsigned SignBits = ComputeNumSignBits(I->getOperand(0), 0, CxtI); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + APInt HighBits(APInt::getHighBitsSet( + BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth))); Known.Zero.lshrInPlace(ShiftAmt); Known.One.lshrInPlace(ShiftAmt); Index: test/Transforms/InstCombine/shift.ll =================================================================== --- test/Transforms/InstCombine/shift.ll +++ test/Transforms/InstCombine/shift.ll @@ -221,6 +221,19 @@ ret i32 %C } +;; ((A | 0xC0000000) >> 8) << 8 === (A & 3FFFFF00) | 0xC0000000 +define i32 @test12a(i32 %A) { +; CHECK-LABEL: @test12a( +; CHECK-NEXT: [[a:%.*]] = and i32 %A, 1073741568 +; CHECK-NEXT: [[C:%.*]] = or i32 [[a]], -1073741824 +; CHECK-NEXT: ret i32 [[C]] +; + %a = or i32 %A, -1073741824 + %B = ashr i32 %a, 8 + %C = shl i32 %B, 8 + ret i32 %C +} + ;; This transformation is deferred to DAGCombine: ;; (A >> 3) << 4 === (A & -8) * 2 ;; The shl may be valuable to scalar evolution. Index: test/Transforms/InstCombine/trunc.ll =================================================================== --- test/Transforms/InstCombine/trunc.ll +++ test/Transforms/InstCombine/trunc.ll @@ -77,6 +77,17 @@ ret i32 %D } +define i32 @test5_ashr(i32 %A) { +; CHECK-LABEL: @test5_ashr( +; CHECK-NEXT: [[C:%.*]] = ashr i32 %A, 16 +; CHECK-NEXT: ret i32 [[C]] +; + %B = sext i32 %A to i128 + %C = ashr i128 %B, 16 + %D = trunc i128 %C to i32 + ret i32 %D +} + define i32 @test6(i64 %A) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[C:%.*]] = lshr i64 %A, 32 @@ -89,6 +100,44 @@ ret i32 %D } +define i32 @test6_lshr(i64 %A) { +; CHECK-LABEL: @test6_lshr( +; CHECK-NEXT: [[C:%.*]] = lshr i64 %A, 32 +; CHECK-NEXT: [[D:%.*]] = trunc i64 [[C]] to i32 +; CHECK-NEXT: ret i32 [[D]] +; + %B = sext i64 %A to i128 + %C = ashr i128 %B, 32 + %D = trunc i128 %C to i32 + ret i32 %D +} + +define i64 @test6_ashr(i64 %A) { +; CHECK-LABEL: @test6_ashr( +; CHECK-NEXT: [[C:%.*]] = ashr i64 %A, 33 +; CHECK-NEXT: ret i64 [[C]] +; + %B = sext i64 %A to i128 + %C = ashr i128 %B, 33 + %D = trunc i128 %C to i64 + ret i64 %D +} + +define i16 @test6_ashr_mul(i8 %X, i8 %Y) { +; CHECK-LABEL: @test6_ashr_mul( +; CHECK-NEXT: [[A:%.*]] = sext i8 %X to i16 +; CHECK-NEXT: [[B:%.*]] = sext i8 %Y to i16 +; CHECK-NEXT: [[C:%.*]] = mul nsw i16 [[A]], [[B]] +; CHECK-NEXT: [[D:%.*]] = ashr i16 %C, 15 +; CHECK-NEXT: ret i16 %D + %A = sext i8 %X to i32 + %B = sext i8 %Y to i32 + %C = mul i32 %A, %B + %D = ashr i32 %C, 15 + %E = trunc i32 %D to i16 + ret i16 %E +} + define i92 @test7(i64 %A) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 %A, 32 @@ -101,6 +150,18 @@ ret i92 %D } +define i92 @test7_ashr(i64 %A) { +; CHECK-LABEL: @test7_ashr( +; CHECK-NEXT: [[B:%.*]] = sext i64 %A to i92 +; CHECK-NEXT: [[C:%.*]] = ashr i92 [[B]], 32 +; CHECK-NEXT: ret i92 [[C]] +; + %B = sext i64 %A to i128 + %C = ashr i128 %B, 32 + %D = trunc i128 %C to i92 + ret i92 %D +} + define i64 @test8(i32 %A, i32 %B) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[TMP38:%.*]] = zext i32 %A to i64