Diff 300977

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Show First 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	struct PtrAddChain {
Register Base;		Register Base;
};		};

struct ShiftChain {		struct ShiftChain {
int64_t Imm;		int64_t Imm;
Register Base;		Register Base;
};		};

		struct ShiftOfShiftedLogic {
		MachineInstr *Logic;
		MachineInstr *Shift2;
		Register LogicNonShiftReg;
		uint64_t ValSum;
		};

struct RegisterImmPair {		struct RegisterImmPair {
Register Reg;		Register Reg;
int64_t Imm;		int64_t Imm;
};		};

using OperandBuildSteps =		using OperandBuildSteps =
SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;		SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
struct InstructionBuildSteps {		struct InstructionBuildSteps {
▲ Show 20 Lines • Show All 161 Lines • ▼ Show 20 Lines	public:
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);		bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);

bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);		bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
bool applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);		bool applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);

bool matchShiftImmedChain(MachineInstr &MI, ShiftChain &MatchInfo);		bool matchShiftImmedChain(MachineInstr &MI, ShiftChain &MatchInfo);
bool applyShiftImmedChain(MachineInstr &MI, ShiftChain &MatchInfo);		bool applyShiftImmedChain(MachineInstr &MI, ShiftChain &MatchInfo);

		/// If we have a shift-by-constant of a bitwise logic op that itself has a
		/// shift-by-constant operand with identical opcode, we may be able to convert
		/// that into 2 independent shifts followed by the logic op. This is a
		/// throughput improvement.
		foadUnsubmitted Not Done Reply Inline Actions I know you copied this comment from SelectionDAG but I think it really improves latency, not throughput, because the new shifts are independent. Also you don't mention the other reason for doing this, which is that one of the new shifts might constant-fold away. foad: I know you copied this comment from SelectionDAG but I think it really improves latency, not…
		bool matchShiftOfShiftedLogic(MachineInstr &MI,
		ShiftOfShiftedLogic &MatchInfo);
		bool applyShiftOfShiftedLogic(MachineInstr &MI,
		ShiftOfShiftedLogic &MatchInfo);

/// Transform a multiply by a power-of-2 value to a left shift.		/// Transform a multiply by a power-of-2 value to a left shift.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);		bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
bool applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);		bool applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);

// Transform a G_SHL with an extended source into a narrower shift if		// Transform a G_SHL with an extended source into a narrower shift if
// possible.		// possible.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);		bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
bool applyCombineShlOfExtend(MachineInstr &MI,		bool applyCombineShlOfExtend(MachineInstr &MI,
▲ Show 20 Lines • Show All 227 Lines • Show Last 20 Lines

llvm/include/llvm/Target/GlobalISel/Combine.td

Show First 20 Lines • Show All 159 Lines • ▼ Show 20 Lines

def shift_immed_matchdata : GIDefMatchData<"ShiftChain">;		def shift_immed_matchdata : GIDefMatchData<"ShiftChain">;
def shift_immed_chain : GICombineRule<		def shift_immed_chain : GICombineRule<
(defs root:$d, shift_immed_matchdata:$matchinfo),		(defs root:$d, shift_immed_matchdata:$matchinfo),
(match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$d,		(match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$d,
[{ return Helper.matchShiftImmedChain(*${d}, ${matchinfo}); }]),		[{ return Helper.matchShiftImmedChain(*${d}, ${matchinfo}); }]),
(apply [{ Helper.applyShiftImmedChain(*${d}, ${matchinfo}); }])>;		(apply [{ Helper.applyShiftImmedChain(*${d}, ${matchinfo}); }])>;

		def shift_of_shifted_logic_matchdata : GIDefMatchData<"ShiftOfShiftedLogic">;
		def shift_of_shifted_logic_chain : GICombineRule<
		(defs root:$d, shift_of_shifted_logic_matchdata:$matchinfo),
		(match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$d,
		[{ return Helper.matchShiftOfShiftedLogic(*${d}, ${matchinfo}); }]),
		(apply [{ Helper.applyShiftOfShiftedLogic(*${d}, ${matchinfo}); }])>;

def mul_to_shl_matchdata : GIDefMatchData<"unsigned">;		def mul_to_shl_matchdata : GIDefMatchData<"unsigned">;
def mul_to_shl : GICombineRule<		def mul_to_shl : GICombineRule<
(defs root:$d, mul_to_shl_matchdata:$matchinfo),		(defs root:$d, mul_to_shl_matchdata:$matchinfo),
(match (G_MUL $d, $op1, $op2):$mi,		(match (G_MUL $d, $op1, $op2):$mi,
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),		[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;		(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;

// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int		// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
▲ Show 20 Lines • Show All 369 Lines • ▼ Show 20 Lines	def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
hoist_logic_op_with_same_opcode_hands,		hoist_logic_op_with_same_opcode_hands,
shl_ashr_to_sext_inreg, sext_inreg_of_load,		shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,		width_reduction_combines, select_combines,
known_bits_simplifications, ext_ext_fold,		known_bits_simplifications, ext_ext_fold,
not_cmp_fold, opt_brcond_by_inverting_cond,		not_cmp_fold, opt_brcond_by_inverting_cond,
unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc,		unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc,
unmerge_zext_to_zext, trunc_ext_fold, trunc_shl,		unmerge_zext_to_zext, trunc_ext_fold, trunc_shl,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,		const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain]>;		shift_immed_chain, shift_of_shifted_logic_chain]>;

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Show First 20 Lines • Show All 1,600 Lines • ▼ Show 20 Lines	bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
Register NewImm = MIB.buildConstant(ImmTy, Imm).getReg(0);		Register NewImm = MIB.buildConstant(ImmTy, Imm).getReg(0);
Observer.changingInstr(MI);		Observer.changingInstr(MI);
MI.getOperand(1).setReg(MatchInfo.Base);		MI.getOperand(1).setReg(MatchInfo.Base);
MI.getOperand(2).setReg(NewImm);		MI.getOperand(2).setReg(NewImm);
Observer.changedInstr(MI);		Observer.changedInstr(MI);
return true;		return true;
}		}

		bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
		ShiftOfShiftedLogic &MatchInfo) {
		// We're trying to match the following pattern with any of G_SHL/G_ASHR/G_LSHR
		foadUnsubmitted Not Done Reply Inline Actions Also handle G_USHLSAT and G_SSHLSAT? foad: Also handle G_USHLSAT and G_SSHLSAT?
		foadUnsubmitted Not Done Reply Inline Actions Comment still needs updating. foad: Comment still needs updating.
		// shift instructions in combination with any of G_AND/G_OR/G_XOR logic
		// instructions.
		// %t1 = SHIFT %X, G_CONSTANT C0
		// %t2 = LOGIC %t1, %Y
		// %root = SHIFT %t2, G_CONSTANT C1
		// -->
		// %t3 = SHIFT %X, G_CONSTANT (C0+C1)
		// %t4 = SHIFT %Y, G_CONSTANT C1
		// %root = LOGIC %t3, %t4
		unsigned ShiftOpcode = MI.getOpcode();
		if (ShiftOpcode != TargetOpcode::G_SHL &&
		ShiftOpcode != TargetOpcode::G_ASHR &&
		ShiftOpcode != TargetOpcode::G_LSHR)
		return false;

		// Match a one-use bitwise logic op.
		Register LogicDest = MI.getOperand(1).getReg();
		if (!MRI.hasOneNonDBGUse(LogicDest))
		return false;

		MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
		unsigned LogicOpcode = LogicMI->getOpcode();
		if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
		LogicOpcode != TargetOpcode::G_XOR)
		return false;

		// Find a matching one-use shift by constant.
		const Register C1 = MI.getOperand(2).getReg();
		auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
		if (!MaybeImmVal)
		return false;

		const uint64_t C1Val = MaybeImmVal->Value;
		const unsigned BitWidth = MRI.getType(C1).getScalarSizeInBits();

		auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
		// Shift should match and should be a one-use.
		if (MI->getOpcode() != ShiftOpcode \|\|
		!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
		return false;

		const Register Imm = MI->getOperand(2).getReg();

		// Shift amount types do not have to match their operand type, so check that
		// the constants are the same width.
		if (MRI.getType(Imm).getScalarSizeInBits() != BitWidth)
		return false;
		foadUnsubmitted Done Reply Inline Actions I don't see why this check is necessary. All we care about is the shift amount, as a uint64_t. foad: I don't see why this check is necessary. All we care about is the shift amount, as a uint64_t.

		// Must be a constant.
		auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm, MRI);
		if (!MaybeImmVal)
		return false;
		ShiftVal = MaybeImmVal->Value;

		// The fold is not valid if the sum of the shift values exceeds bitwidth.
		if ((ShiftVal + C1Val) > BitWidth)
		return false;
		foadUnsubmitted Done Reply Inline Actions First, it would be simpler to check this by line 1690 where you're already computing the sum. Second, I would expect the `>` to be `>=`. Third, you're comparing against BitWidth which is the width of the shift amount, not the value being shifted. foad: First, it would be simpler to check this by line 1690 where you're already computing the sum.

		return true;
		};

		// Logic ops are commutative, so check each operand for a match.
		Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
		MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
		Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
		MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
		uint64_t C0Val;

		if (matchFirstShift(LogicMIOp1, C0Val)) {
		MatchInfo.LogicNonShiftReg = LogicMIReg2;
		MatchInfo.Shift2 = LogicMIOp1;
		} else if (matchFirstShift(LogicMIOp2, C0Val)) {
		MatchInfo.LogicNonShiftReg = LogicMIReg1;
		MatchInfo.Shift2 = LogicMIOp2;
		} else
		return false;

		MatchInfo.Logic = LogicMI;
		MatchInfo.ValSum = C0Val + C1Val;

		return true;
		}

		bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
		ShiftOfShiftedLogic &MatchInfo) {
		unsigned Opcode = MI.getOpcode();
		assert((Opcode == TargetOpcode::G_SHL \|\| Opcode == TargetOpcode::G_ASHR \|\|
		Opcode == TargetOpcode::G_LSHR) &&
		"Expected G_SHL, G_ASHR or G_LSHR");

		MachineIRBuilder MIB(MI);
		foadUnsubmitted Done Reply Inline Actions Don't create this. CombinerHelper already has a MIRBuilder. foad: Don't create this. CombinerHelper already has a MIRBuilder.
		LLT ShlType = MRI.getType(MI.getOperand(2).getReg());

		Register Const = MIB.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);

		Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
		Register Shift1 =
		MIB.buildInstr(Opcode, {ShlType}, {Shift1Base, Const}).getReg(0);
		foadUnsubmitted Done Reply Inline Actions ShlType is wrong here and below. You want the type of the result, not the type of the shift amount. foad: ShlType is wrong here and below. You want the type of the result, not the type of the shift…

		Register Shift2Const = MI.getOperand(2).getReg();
		Register Shift2 = MIB.buildInstr(Opcode, {ShlType},
		{MatchInfo.LogicNonShiftReg, Shift2Const})
		.getReg(0);

		Register Dest = MI.getOperand(0).getReg();
		MIB.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});

		Register ConstC0 = MatchInfo.Shift2->getOperand(2).getReg();
		if (MRI.hasOneNonDBGUse(ConstC0))
		MRI.getUniqueVRegDef(ConstC0)->eraseFromParent();
		foadUnsubmitted Done Reply Inline Actions It's probably not worth doing this. I think you can rely on dead code elimination. foad: It's probably not worth doing this. I think you can rely on dead code elimination.

		// These were one use so it's safe to remove them.
		MatchInfo.Shift2->eraseFromParent();
		MatchInfo.Logic->eraseFromParent();

		MI.eraseFromParent();
		return true;
		}

bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,		bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {		unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");		assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =		auto MaybeImmVal =
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);		getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal \|\| !isPowerOf2_64(MaybeImmVal->Value))		if (!MaybeImmVal \|\| !isPowerOf2_64(MaybeImmVal->Value))
return false;		return false;
ShiftVal = Log2_64(MaybeImmVal->Value);		ShiftVal = Log2_64(MaybeImmVal->Value);
▲ Show 20 Lines • Show All 1,283 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s \| FileCheck %s

				define amdgpu_cs i32 @test_shl_and_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_and_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 4
				; CHECK-NEXT: s_and_b32 s0, s0, -16
				; CHECK-NEXT: ; return to shader part epilog
				mbrkusaninAuthorUnsubmitted Done Reply Inline Actions This can be simplified to a single instruction: s_lshl_b32 s0, s0, 4 but that will be a different combine. Same goes for a few more cases below. mbrkusanin: This can be simplified to a single instruction: s_lshl_b32 s0, s0, 4 but that will be a…
				.entry:
				%z1 = shl i32 %arg1, 2
				%z2 = and i32 %z1, 1073741820
				%z3 = shl i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_shl_and_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_and_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 8
				; CHECK-NEXT: s_and_b32 s0, s0, 0xffffff00
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = shl i32 %arg1, 5
				%z2 = and i32 %z1, 536870880
				%z3 = shl i32 %z2, 3
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_shl_and_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_and_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 5
				; CHECK-NEXT: s_and_b32 s0, s0, 0x7ffffff0
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = shl i32 %arg1, 3
				%z2 = and i32 %z1, 536870908
				%z3 = shl i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_and_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_and_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 4
				; CHECK-NEXT: s_and_b32 s0, s0, 0xfffffff
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 2
				%z2 = and i32 %z1, 1073741820
				%z3 = lshr i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_and_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_and_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 8
				; CHECK-NEXT: s_and_b32 s0, s0, 0x3fffffc
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 5
				%z2 = and i32 %z1, 536870880
				%z3 = lshr i32 %z2, 3
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_and_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_and_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 5
				; CHECK-NEXT: s_and_b32 s0, s0, 0x7ffffff
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 3
				%z2 = and i32 %z1, 536870908
				%z3 = lshr i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_and_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_and_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 4
				; CHECK-NEXT: s_and_b32 s0, s0, 0xfffffff
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 2
				%z2 = and i32 %z1, 1073741820
				%z3 = ashr i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_and_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_and_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 8
				; CHECK-NEXT: s_and_b32 s0, s0, 0x3fffffc
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 5
				%z2 = and i32 %z1, 536870880
				%z3 = ashr i32 %z2, 3
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_and_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_and_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 5
				; CHECK-NEXT: s_and_b32 s0, s0, 0x7ffffff
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 3
				%z2 = and i32 %z1, 536870908
				%z3 = ashr i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_shl_or_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_or_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 4
				; CHECK-NEXT: s_or_b32 s0, s0, 12
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = shl i32 %arg1, 2
				%z2 = or i32 %z1, 3221225475
				%z3 = shl i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_shl_or_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_or_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 8
				; CHECK-NEXT: s_or_b32 s0, s0, 0xfffffc00
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = shl i32 %arg1, 3
				%z2 = or i32 %z1, 536870880
				%z3 = shl i32 %z2, 5
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_shl_or_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_or_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 5
				; CHECK-NEXT: s_or_b32 s0, s0, 0x7fffff80
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = shl i32 %arg1, 2
				%z2 = or i32 %z1, 268435440
				%z3 = shl i32 %z2, 3
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_or_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_or_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 4
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 2
				%z2 = or i32 %z1, 3
				%z3 = lshr i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_or_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_or_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 8
				; CHECK-NEXT: s_or_b32 s0, s0, 0xffffff
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 3
				%z2 = or i32 %z1, 536870880
				%z3 = lshr i32 %z2, 5
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_or_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_or_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 5
				; CHECK-NEXT: s_or_b32 s0, s0, 0x1fffffe
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 2
				%z2 = or i32 %z1, 268435440
				%z3 = lshr i32 %z2, 3
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_or_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_or_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 4
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 2
				%z2 = or i32 %z1, 3
				%z3 = ashr i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_or_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_or_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 8
				; CHECK-NEXT: s_or_b32 s0, s0, 0xffffff
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 3
				%z2 = or i32 %z1, 536870880
				%z3 = ashr i32 %z2, 5
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_or_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_or_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 5
				; CHECK-NEXT: s_or_b32 s0, s0, 0x1fffffe
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 2
				%z2 = or i32 %z1, 268435440
				%z3 = ashr i32 %z2, 3
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_shl_xor_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_xor_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 4
				; CHECK-NEXT: s_xor_b32 s0, s0, -16
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = shl i32 %arg1, 2
				%z2 = xor i32 %z1, 1073741820
				%z3 = shl i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_shl_xor_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_xor_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 6
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = shl i32 %arg1, 1
				%z2 = xor i32 %z1, 4160749568
				%z3 = shl i32 %z2, 5
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_shl_xor_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_shl_xor_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshl_b32 s0, s0, 5
				; CHECK-NEXT: s_xor_b32 s0, s0, 56
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = shl i32 %arg1, 2
				%z2 = xor i32 %z1, 3221225479
				%z3 = shl i32 %z2, 3
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_xor_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_xor_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 4
				; CHECK-NEXT: s_xor_b32 s0, s0, 0xfffffff
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 2
				%z2 = xor i32 %z1, 1073741820
				%z3 = lshr i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_xor_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_xor_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 6
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 1
				%z2 = xor i32 %z1, 31
				%z3 = lshr i32 %z2, 5
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_lshr_xor_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_lshr_xor_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_lshr_b32 s0, s0, 5
				; CHECK-NEXT: s_xor_b32 s0, s0, 0x18000000
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = lshr i32 %arg1, 2
				%z2 = xor i32 %z1, 3221225479
				%z3 = lshr i32 %z2, 3
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_xor_1(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_xor_1:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 4
				; CHECK-NEXT: s_xor_b32 s0, s0, 0xfffffff
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 2
				%z2 = xor i32 %z1, 1073741820
				%z3 = ashr i32 %z2, 2
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_xor_2(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_xor_2:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 6
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 1
				%z2 = xor i32 %z1, 31
				%z3 = ashr i32 %z2, 5
				ret i32 %z3
				}

				define amdgpu_cs i32 @test_ashr_xor_3(i32 inreg %arg1) {
				; CHECK-LABEL: test_ashr_xor_3:
				; CHECK: ; %bb.0: ; %.entry
				; CHECK-NEXT: s_ashr_i32 s0, s0, 5
				; CHECK-NEXT: s_xor_b32 s0, s0, 0xf8000000
				; CHECK-NEXT: ; return to shader part epilog
				.entry:
				%z1 = ashr i32 %arg1, 2
				%z2 = xor i32 %z1, 3221225479
				%z3 = ashr i32 %z2, 3
				ret i32 %z3
				}

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][GlobalISel] Combine shift + logic + shift with constant operands
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 300977

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

llvm/include/llvm/Target/GlobalISel/Combine.td

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][GlobalISel] Combine shift + logic + shift with constant operandsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 300977

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

llvm/include/llvm/Target/GlobalISel/Combine.td

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll

[AMDGPU][GlobalISel] Combine shift + logic + shift with constant operands
ClosedPublic