diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -630,6 +630,35 @@ return &I; } + /// A combiner-aware RAUW-like routine. + /// + /// This method is to be used when an instruction is found to be partially + /// dead, where selected uses can be adjusted to use another preexisting + /// expression. Here we add *all* uses of I to the worklist, + /// conditionally replace all uses of I with the new value, then return I, + /// so that the inst combiner will know that I was modified. + Instruction * + replaceInstUsesWithIf(Instruction &I, Value *V, + llvm::function_ref ShouldReplace) { + // If there are no uses to replace, then we return nullptr to indicate that + // no changes were made to the program. + if (I.use_empty()) + return nullptr; + + Worklist.AddUsersToWorkList(I); // Add *all* instrs to worklist. + + // If we are replacing the instruction with itself, this must be in a + // segment of unreachable code, so just clobber the instruction. + if (&I == V) + V = UndefValue::get(I.getType()); + + LLVM_DEBUG(dbgs() << "IC: Conditionally replacing " << I << "\n" + << " with " << *V << '\n'); + + I.replaceUsesWithIf(V, ShouldReplace); + return &I; + } + /// Creates a result tuple for an overflow intrinsic \p II with a given /// \p Result and a constant \p Overflow value. Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -241,11 +241,23 @@ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); - // If the shift amount is a one-use `sext`, we can demote it to `zext`. + // If the shift amount is a `sext`, we can demote it to `zext`. + // We don't limit this to single-use `sext`, or ensure that we can change + // all uses of `sext` to `zext`, since we believe that `zext` is *much* + // better for further analysis. We also perform sudo-CSE to avoid producing + // multiple `zext` per each eligible user and obscruing the fact that their + // shift amounts are actually the same Value. Though that does not help if + // there was a pre-existing identical `zext`, that one is left in place. Value *Y; - if (match(Op1, m_OneUse(m_SExt(m_Value(Y))))) { - Value *NewExt = Builder.CreateZExt(Y, I.getType(), Op1->getName()); - return BinaryOperator::Create(I.getOpcode(), Op0, NewExt); + if (isa(Op1) && match(Op1, m_SExt(m_Value(Y)))) { + BuilderTy::InsertPointGuard Guard(Builder); + Instruction *OldExt = cast(Op1); + Builder.SetInsertPoint(OldExt); + Value *NewExt = Builder.CreateZExt(Y, I.getType(), Y->getName() + ".zext"); + replaceInstUsesWithIf(*OldExt, NewExt, [](Use &U) { + return match(U.getUser(), m_Shift(m_Value(), m_Value())); + }); + return &I; } // See if we can fold away this shift. diff --git a/llvm/test/Transforms/InstCombine/shift-by-signext.ll b/llvm/test/Transforms/InstCombine/shift-by-signext.ll --- a/llvm/test/Transforms/InstCombine/shift-by-signext.ll +++ b/llvm/test/Transforms/InstCombine/shift-by-signext.ll @@ -6,8 +6,8 @@ define i32 @t0_shl(i32 %x, i8 %shamt) { ; CHECK-LABEL: @t0_shl( -; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext i8 [[SHAMT:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE1]] +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], [[SHAMT_ZEXT]] ; CHECK-NEXT: ret i32 [[R]] ; %shamt_wide = sext i8 %shamt to i32 @@ -16,8 +16,8 @@ } define i32 @t1_lshr(i32 %x, i8 %shamt) { ; CHECK-LABEL: @t1_lshr( -; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext i8 [[SHAMT:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = lshr i32 [[X:%.*]], [[SHAMT_WIDE1]] +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[X:%.*]], [[SHAMT_ZEXT]] ; CHECK-NEXT: ret i32 [[R]] ; %shamt_wide = sext i8 %shamt to i32 @@ -26,8 +26,8 @@ } define i32 @t2_ashr(i32 %x, i8 %shamt) { ; CHECK-LABEL: @t2_ashr( -; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext i8 [[SHAMT:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = ashr i32 [[X:%.*]], [[SHAMT_WIDE1]] +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[X:%.*]], [[SHAMT_ZEXT]] ; CHECK-NEXT: ret i32 [[R]] ; %shamt_wide = sext i8 %shamt to i32 @@ -37,8 +37,8 @@ define <2 x i32> @t3_vec_shl(<2 x i32> %x, <2 x i8> %shamt) { ; CHECK-LABEL: @t3_vec_shl( -; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], [[SHAMT_WIDE1]] +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], [[SHAMT_ZEXT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shamt_wide = sext <2 x i8> %shamt to <2 x i32> @@ -47,8 +47,8 @@ } define <2 x i32> @t4_vec_lshr(<2 x i32> %x, <2 x i8> %shamt) { ; CHECK-LABEL: @t4_vec_lshr( -; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], [[SHAMT_WIDE1]] +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], [[SHAMT_ZEXT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shamt_wide = sext <2 x i8> %shamt to <2 x i32> @@ -57,8 +57,8 @@ } define <2 x i32> @t5_vec_ashr(<2 x i32> %x, <2 x i8> %shamt) { ; CHECK-LABEL: @t5_vec_ashr( -; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], [[SHAMT_WIDE1]] +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], [[SHAMT_ZEXT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shamt_wide = sext <2 x i8> %shamt to <2 x i32> @@ -69,13 +69,13 @@ define i32 @t6_twoshifts(i32 %x, i8 %shamt) { ; CHECK-LABEL: @t6_twoshifts( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext i8 [[SHAMT:%.*]] to i32 ; CHECK-NEXT: br label [[WORK:%.*]] ; CHECK: work: ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: end: -; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]] -; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_ZEXT]] +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_ZEXT]] ; CHECK-NEXT: ret i32 [[R]] ; bb: @@ -89,6 +89,21 @@ ret i32 %r } +define i32 @t6_twoshifts_2(i32 %x, i8 %shamt) { +; CHECK-LABEL: @t6_twoshifts_2( +; CHECK-NEXT: [[SHAMT_WIDE_UNSIGNED:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext i8 [[SHAMT]] to i32 +; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE_UNSIGNED]] +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_ZEXT]] +; CHECK-NEXT: ret i32 [[R]] +; + %shamt_wide_unsigned = zext i8 %shamt to i32 + %shamt_wide_signed = sext i8 %shamt to i32 + %n0 = shl i32 %x, %shamt_wide_unsigned + %r = ashr i32 %n0, %shamt_wide_signed + ret i32 %r +} + ; This is not valid for funnel shifts in general declare i7 @llvm.fshl.i7(i7 %a, i7 %b, i7 %c) declare i7 @llvm.fshr.i7(i7 %a, i7 %b, i7 %c) @@ -139,9 +154,10 @@ declare void @use32(i32) define i32 @n11_extrause(i32 %x, i8 %shamt) { ; CHECK-LABEL: @n11_extrause( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT]] to i32 ; CHECK-NEXT: call void @use32(i32 [[SHAMT_WIDE]]) -; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], [[SHAMT_ZEXT]] ; CHECK-NEXT: ret i32 [[R]] ; %shamt_wide = sext i8 %shamt to i32 @@ -151,13 +167,14 @@ } define i32 @n12_twoshifts_and_extrause(i32 %x, i8 %shamt) { ; CHECK-LABEL: @n12_twoshifts_and_extrause( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[SHAMT_ZEXT:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT]] to i32 ; CHECK-NEXT: br label [[WORK:%.*]] ; CHECK: work: ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: end: -; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]] -; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_ZEXT]] +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_ZEXT]] ; CHECK-NEXT: call void @use32(i32 [[SHAMT_WIDE]]) ; CHECK-NEXT: ret i32 [[R]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll b/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll --- a/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll +++ b/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll @@ -29,9 +29,9 @@ define i32 @two_shifts_by_sext(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_sext( -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] -; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] +; CHECK-NEXT: [[LEN_ZEXT:%.*]] = zext i8 [[LEN:%.*]] to i32 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[LEN_ZEXT]] +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[LEN_ZEXT]] ; CHECK-NEXT: ret i32 [[SHR]] ; %val.addr = alloca i32, align 4 @@ -50,9 +50,9 @@ define i32 @two_shifts_by_same_sext(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_same_sext( -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] -; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] +; CHECK-NEXT: [[LEN_ZEXT:%.*]] = zext i8 [[LEN:%.*]] to i32 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[LEN_ZEXT]] +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[LEN_ZEXT]] ; CHECK-NEXT: ret i32 [[SHR]] ; %val.addr = alloca i32, align 4 @@ -73,10 +73,11 @@ define i32 @two_shifts_by_sext_with_extra_use(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_sext_with_extra_use( -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 +; CHECK-NEXT: [[LEN_ZEXT:%.*]] = zext i8 [[LEN:%.*]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN]] to i32 ; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]]) -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] -; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[LEN_ZEXT]] +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[LEN_ZEXT]] ; CHECK-NEXT: ret i32 [[SHR]] ; %val.addr = alloca i32, align 4 @@ -100,10 +101,11 @@ define i32 @two_shifts_by_same_sext_with_extra_use(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_same_sext_with_extra_use( -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 +; CHECK-NEXT: [[LEN_ZEXT:%.*]] = zext i8 [[LEN:%.*]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN]] to i32 ; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]]) -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] -; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[LEN_ZEXT]] +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[LEN_ZEXT]] ; CHECK-NEXT: ret i32 [[SHR]] ; %val.addr = alloca i32, align 4