diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -65,6 +65,7 @@ case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: + case Instruction::AShr: Ops.push_back(I->getOperand(0)); Ops.push_back(I->getOperand(1)); break; @@ -133,6 +134,7 @@ case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: + case Instruction::AShr: case Instruction::Select: { SmallVector Operands; getRelevantOperands(I, Operands); @@ -143,8 +145,7 @@ // TODO: Can handle more cases here: // 1. shufflevector, extractelement, insertelement // 2. udiv, urem - // 3. ashr - // 4. phi node(and loop handling) + // 3. phi node(and loop handling) // ... return false; } @@ -277,14 +278,18 @@ CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits(); // Initialize MinBitWidth for shift instructions with the minimum number - // that is greater than shift amount (i.e. shift amount + 1). For `lshr` - // adjust MinBitWidth so that all potentially truncated bits of - // the value-to-be-shifted are zeros. - // Also normalize MinBitWidth not to be greater than source bitwidth. + // that is greater than shift amount (i.e. shift amount + 1). + // For `lshr` adjust MinBitWidth so that all potentially truncated + // bits of the value-to-be-shifted are zeros. + // For `ashr` adjust MinBitWidth so that all potentially truncated + // bits of the value-to-be-shifted are sign bits (all zeros or ones) + // and even one (first) untruncated bit is sign bit. + // Exit early if MinBitWidth is not less than original bitwidth. for (auto &Itr : InstInfoMap) { Instruction *I = Itr.first; if (I->getOpcode() == Instruction::Shl || - I->getOpcode() == Instruction::LShr) { + I->getOpcode() == Instruction::LShr || + I->getOpcode() == Instruction::AShr) { KnownBits KnownRHS = computeKnownBits(I->getOperand(1), DL); unsigned MinBitWidth = KnownRHS.getMaxValue() .uadd_sat(APInt(OrigBitWidth, 1)) @@ -295,9 +300,13 @@ KnownBits KnownLHS = computeKnownBits(I->getOperand(0), DL); MinBitWidth = std::max(MinBitWidth, KnownLHS.getMaxValue().getActiveBits()); - if (MinBitWidth >= OrigBitWidth) - return nullptr; } + if (I->getOpcode() == Instruction::AShr) { + unsigned NumSignBits = ComputeNumSignBits(I->getOperand(0), DL); + MinBitWidth = std::max(MinBitWidth, OrigBitWidth - NumSignBits + 1); + } + if (MinBitWidth >= OrigBitWidth) + return nullptr; Itr.second.MinBitWidth = MinBitWidth; } } @@ -390,12 +399,13 @@ case Instruction::Or: case Instruction::Xor: case Instruction::Shl: - case Instruction::LShr: { + case Instruction::LShr: + case Instruction::AShr: { Value *LHS = getReducedOperand(I->getOperand(0), SclTy); Value *RHS = getReducedOperand(I->getOperand(1), SclTy); Res = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS); // Preserve `exact` flag since truncation doesn't change exactness - if (Opc == Instruction::LShr) + if (Opc == Instruction::LShr || Opc == Instruction::AShr) if (auto *ResI = dyn_cast(Res)) ResI->setIsExact(I->isExact()); break; diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_ashr.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_ashr.ll --- a/llvm/test/Transforms/AggressiveInstCombine/trunc_ashr.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_ashr.ll @@ -19,10 +19,8 @@ define i16 @ashr_sext_15(i16 %x) { ; CHECK-LABEL: @ashr_sext_15( -; CHECK-NEXT: [[SEXT:%.*]] = sext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[SEXT]], 15 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[ASHR]] to i16 -; CHECK-NEXT: ret i16 [[TRUNC]] +; CHECK-NEXT: [[ASHR:%.*]] = ashr i16 [[X:%.*]], 15 +; CHECK-NEXT: ret i16 [[ASHR]] ; %sext = sext i16 %x to i32 %ashr = ashr i32 %sext, 15 @@ -68,14 +66,13 @@ define i16 @ashr_var_bounded_shift_amount(i8 %x, i8 %amt) { ; CHECK-LABEL: @ashr_var_bounded_shift_amount( -; CHECK-NEXT: [[Z:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[ZA:%.*]] = zext i8 [[AMT:%.*]] to i32 -; CHECK-NEXT: [[ZA2:%.*]] = and i32 [[ZA]], 15 -; CHECK-NEXT: [[S:%.*]] = ashr i32 [[Z]], [[ZA2]] -; CHECK-NEXT: [[A:%.*]] = add i32 [[S]], [[Z]] -; CHECK-NEXT: [[S2:%.*]] = ashr i32 [[A]], 2 -; CHECK-NEXT: [[T:%.*]] = trunc i32 [[S2]] to i16 -; CHECK-NEXT: ret i16 [[T]] +; CHECK-NEXT: [[Z:%.*]] = zext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[ZA:%.*]] = zext i8 [[AMT:%.*]] to i16 +; CHECK-NEXT: [[ZA2:%.*]] = and i16 [[ZA]], 15 +; CHECK-NEXT: [[S:%.*]] = ashr i16 [[Z]], [[ZA2]] +; CHECK-NEXT: [[A:%.*]] = add i16 [[S]], [[Z]] +; CHECK-NEXT: [[S2:%.*]] = ashr i16 [[A]], 2 +; CHECK-NEXT: ret i16 [[S2]] ; %z = zext i8 %x to i32 %za = zext i8 %amt to i32 @@ -108,16 +105,15 @@ define void @ashr_big_dag(i16* %a, i8 %b, i8 %c) { ; CHECK-LABEL: @ashr_big_dag( -; CHECK-NEXT: [[ZEXT1:%.*]] = zext i8 [[B:%.*]] to i32 -; CHECK-NEXT: [[ZEXT2:%.*]] = zext i8 [[C:%.*]] to i32 -; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ZEXT1]], [[ZEXT2]] -; CHECK-NEXT: [[SFT1:%.*]] = and i32 [[ADD1]], 15 -; CHECK-NEXT: [[SHR1:%.*]] = ashr i32 [[ADD1]], [[SFT1]] -; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[SHR1]] -; CHECK-NEXT: [[SFT2:%.*]] = and i32 [[ADD2]], 7 -; CHECK-NEXT: [[SHR2:%.*]] = ashr i32 [[ADD2]], [[SFT2]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR2]] to i16 -; CHECK-NEXT: store i16 [[TRUNC]], i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[ZEXT1:%.*]] = zext i8 [[B:%.*]] to i16 +; CHECK-NEXT: [[ZEXT2:%.*]] = zext i8 [[C:%.*]] to i16 +; CHECK-NEXT: [[ADD1:%.*]] = add i16 [[ZEXT1]], [[ZEXT2]] +; CHECK-NEXT: [[SFT1:%.*]] = and i16 [[ADD1]], 15 +; CHECK-NEXT: [[SHR1:%.*]] = ashr i16 [[ADD1]], [[SFT1]] +; CHECK-NEXT: [[ADD2:%.*]] = add i16 [[ADD1]], [[SHR1]] +; CHECK-NEXT: [[SFT2:%.*]] = and i16 [[ADD2]], 7 +; CHECK-NEXT: [[SHR2:%.*]] = ashr i16 [[ADD2]], [[SFT2]] +; CHECK-NEXT: store i16 [[SHR2]], i16* [[A:%.*]], align 2 ; CHECK-NEXT: ret void ; %zext1 = zext i8 %b to i32 @@ -152,13 +148,12 @@ define <2 x i16> @ashr_vector(<2 x i8> %x) { ; CHECK-LABEL: @ashr_vector( -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[ZA:%.*]] = and <2 x i32> [[Z]], -; CHECK-NEXT: [[S:%.*]] = ashr <2 x i32> [[Z]], [[ZA]] -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[S]], [[Z]] -; CHECK-NEXT: [[S2:%.*]] = ashr <2 x i32> [[A]], -; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[S2]] to <2 x i16> -; CHECK-NEXT: ret <2 x i16> [[T]] +; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16> +; CHECK-NEXT: [[ZA:%.*]] = and <2 x i16> [[Z]], +; CHECK-NEXT: [[S:%.*]] = ashr <2 x i16> [[Z]], [[ZA]] +; CHECK-NEXT: [[A:%.*]] = add <2 x i16> [[S]], [[Z]] +; CHECK-NEXT: [[S2:%.*]] = ashr <2 x i16> [[A]], +; CHECK-NEXT: ret <2 x i16> [[S2]] ; %z = zext <2 x i8> %x to <2 x i32> %za = and <2 x i32> %z, @@ -213,11 +208,9 @@ define i16 @ashr_exact(i16 %x) { ; CHECK-LABEL: @ashr_exact( -; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[ZEXT]], 32767 -; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i32 [[AND]], 15 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[ASHR]] to i16 -; CHECK-NEXT: ret i16 [[TRUNC]] +; CHECK-NEXT: [[AND:%.*]] = and i16 [[X:%.*]], 32767 +; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i16 [[AND]], 15 +; CHECK-NEXT: ret i16 [[ASHR]] ; %zext = zext i16 %x to i32 %and = and i32 %zext, 32767 @@ -245,12 +238,10 @@ define i16 @ashr_negative_operand_but_short(i16 %x) { ; CHECK-LABEL: @ashr_negative_operand_but_short( -; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[ZEXT]], 32767 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 -1, [[AND]] -; CHECK-NEXT: [[LSHR2:%.*]] = ashr i32 [[XOR]], 2 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[LSHR2]] to i16 -; CHECK-NEXT: ret i16 [[TRUNC]] +; CHECK-NEXT: [[AND:%.*]] = and i16 [[X:%.*]], 32767 +; CHECK-NEXT: [[XOR:%.*]] = xor i16 -1, [[AND]] +; CHECK-NEXT: [[LSHR2:%.*]] = ashr i16 [[XOR]], 2 +; CHECK-NEXT: ret i16 [[LSHR2]] ; %zext = zext i16 %x to i32 %and = and i32 %zext, 32767