Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -59,6 +59,13 @@ int64_t Imm; }; +struct ShiftOfShiftedLogic { + MachineInstr *Logic; + MachineInstr *Shift2; + Register LogicNonShiftReg; + uint64_t ValSum; +}; + using OperandBuildSteps = SmallVector, 4>; struct InstructionBuildSteps { @@ -232,6 +239,14 @@ bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo); bool applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo); + /// If we have a shift-by-constant of a bitwise logic op that itself has a + /// shift-by-constant operand with identical opcode, we may be able to convert + /// that into 2 independent shifts followed by the logic op. + bool matchShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo); + bool applyShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo); + /// Transform a multiply by a power-of-2 value to a left shift. bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal); bool applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -166,6 +166,15 @@ [{ return Helper.matchShiftImmedChain(*${d}, ${matchinfo}); }]), (apply [{ Helper.applyShiftImmedChain(*${d}, ${matchinfo}); }])>; +// Transform shift (logic (shift X, C0), Y), C1 +// -> logic (shift X, (C0+C1)), (shift Y, C1), if shifts are same +def shift_of_shifted_logic_matchdata : GIDefMatchData<"ShiftOfShiftedLogic">; +def shift_of_shifted_logic_chain : GICombineRule< + (defs root:$d, shift_of_shifted_logic_matchdata:$matchinfo), + (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR, G_USHLSAT, G_SSHLSAT):$d, + [{ return Helper.matchShiftOfShiftedLogic(*${d}, ${matchinfo}); }]), + (apply [{ Helper.applyShiftOfShiftedLogic(*${d}, ${matchinfo}); }])>; + def mul_to_shl_matchdata : GIDefMatchData<"unsigned">; def mul_to_shl : GICombineRule< (defs root:$d, mul_to_shl_matchdata:$matchinfo), @@ -562,4 +571,4 @@ unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, - shift_immed_chain]>; + shift_immed_chain, shift_of_shifted_logic_chain]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1617,6 +1617,122 @@ return true; } +bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo) { + // We're trying to match the following pattern with any of + // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination + // with any of G_AND/G_OR/G_XOR logic instructions. + // %t1 = SHIFT %X, G_CONSTANT C0 + // %t2 = LOGIC %t1, %Y + // %root = SHIFT %t2, G_CONSTANT C1 + // --> + // %t3 = SHIFT %X, G_CONSTANT (C0+C1) + // %t4 = SHIFT %Y, G_CONSTANT C1 + // %root = LOGIC %t3, %t4 + unsigned ShiftOpcode = MI.getOpcode(); + assert((ShiftOpcode == TargetOpcode::G_SHL || + ShiftOpcode == TargetOpcode::G_ASHR || + ShiftOpcode == TargetOpcode::G_LSHR || + ShiftOpcode == TargetOpcode::G_USHLSAT || + ShiftOpcode == TargetOpcode::G_SSHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); + + // Match a one-use bitwise logic op. + Register LogicDest = MI.getOperand(1).getReg(); + if (!MRI.hasOneNonDBGUse(LogicDest)) + return false; + + MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); + unsigned LogicOpcode = LogicMI->getOpcode(); + if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && + LogicOpcode != TargetOpcode::G_XOR) + return false; + + // Find a matching one-use shift by constant. + const Register C1 = MI.getOperand(2).getReg(); + auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); + if (!MaybeImmVal) + return false; + + const uint64_t C1Val = MaybeImmVal->Value; + + auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { + // Shift should match previous one and should be a one-use. + if (MI->getOpcode() != ShiftOpcode || + !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) + return false; + + // Must be a constant. + auto MaybeImmVal = + getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); + if (!MaybeImmVal) + return false; + + ShiftVal = MaybeImmVal->Value; + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); + MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); + Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); + MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); + uint64_t C0Val; + + if (matchFirstShift(LogicMIOp1, C0Val)) { + MatchInfo.LogicNonShiftReg = LogicMIReg2; + MatchInfo.Shift2 = LogicMIOp1; + } else if (matchFirstShift(LogicMIOp2, C0Val)) { + MatchInfo.LogicNonShiftReg = LogicMIReg1; + MatchInfo.Shift2 = LogicMIOp2; + } else + return false; + + MatchInfo.ValSum = C0Val + C1Val; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) + return false; + + MatchInfo.Logic = LogicMI; + return true; +} + +bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo) { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || + Opcode == TargetOpcode::G_SSHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); + + LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); + LLT DestType = MRI.getType(MI.getOperand(0).getReg()); + Builder.setInstrAndDebugLoc(MI); + + Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); + + Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); + Register Shift1 = + Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); + + Register Shift2Const = MI.getOperand(2).getReg(); + Register Shift2 = Builder + .buildInstr(Opcode, {DestType}, + {MatchInfo.LogicNonShiftReg, Shift2Const}) + .getReg(0); + + Register Dest = MI.getOperand(0).getReg(); + Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); + + // These were one use so it's safe to remove them. + MatchInfo.Shift2->eraseFromParent(); + MatchInfo.Logic->eraseFromParent(); + + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic-shlsat.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic-shlsat.mir @@ -0,0 +1,405 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ushlsat_and_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ushlsat_and_1 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1073741820 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C1]], [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[USHLSAT]], [[USHLSAT1]] + ; CHECK: $sgpr0 = COPY [[AND]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 2 + %4:_(s32) = G_CONSTANT i32 1073741820 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %5:_(s32) = G_AND %3, %4 + %6:_(s32) = G_USHLSAT %5, %2(s32) + $sgpr0 = COPY %6(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: ushlsat_and_2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ushlsat_and_2 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 536870880 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[USHLSAT]], [[USHLSAT1]] + ; CHECK: $sgpr0 = COPY [[AND]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 5 + %4:_(s32) = G_CONSTANT i32 536870880 + %6:_(s32) = G_CONSTANT i32 3 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %5:_(s32) = G_AND %3, %4 + %7:_(s32) = G_USHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: ushlsat_and_3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ushlsat_and_3 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[USHLSAT]], [[USHLSAT1]] + ; CHECK: $sgpr0 = COPY [[AND]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 65536 + %6:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %5:_(s32) = G_AND %3, %4 + %7:_(s32) = G_USHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: ushlsat_or_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ushlsat_or_1 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1073741821 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[USHLSAT]], [[USHLSAT1]] + ; CHECK: $sgpr0 = COPY [[OR]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 2 + %4:_(s32) = G_CONSTANT i32 -1073741821 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %5:_(s32) = G_OR %3, %4 + %6:_(s32) = G_USHLSAT %5, %2(s32) + $sgpr0 = COPY %6(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: ushlsat_or_2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ushlsat_or_2 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -536870881 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[USHLSAT]], [[USHLSAT1]] + ; CHECK: $sgpr0 = COPY [[OR]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 5 + %4:_(s32) = G_CONSTANT i32 -536870881 + %6:_(s32) = G_CONSTANT i32 3 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %5:_(s32) = G_OR %3, %4 + %7:_(s32) = G_USHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: ushlsat_or_3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ushlsat_or_3 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[USHLSAT]], [[USHLSAT1]] + ; CHECK: $sgpr0 = COPY [[OR]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 65536 + %6:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %5:_(s32) = G_OR %3, %4 + %7:_(s32) = G_USHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: ushlsat_xor +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ushlsat_xor + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 43690 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[USHLSAT1:%[0-9]+]]:_(s32) = G_USHLSAT [[C]], [[C1]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[USHLSAT]], [[USHLSAT1]] + ; CHECK: $sgpr0 = COPY [[XOR]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 43690 + %6:_(s32) = G_CONSTANT i32 5 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %5:_(s32) = G_XOR %3, %4 + %7:_(s32) = G_USHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: sshlsat_and_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: sshlsat_and_1 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1073741820 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C1]], [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK: $sgpr0 = COPY [[AND]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 2 + %4:_(s32) = G_CONSTANT i32 1073741820 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %5:_(s32) = G_AND %3, %4 + %6:_(s32) = G_SSHLSAT %5, %2(s32) + $sgpr0 = COPY %6(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: sshlsat_and_2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: sshlsat_and_2 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 536870880 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK: $sgpr0 = COPY [[AND]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 5 + %4:_(s32) = G_CONSTANT i32 536870880 + %6:_(s32) = G_CONSTANT i32 3 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %5:_(s32) = G_AND %3, %4 + %7:_(s32) = G_SSHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: sshlsat_and_3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: sshlsat_and_3 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK: $sgpr0 = COPY [[AND]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 65536 + %6:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %5:_(s32) = G_AND %3, %4 + %7:_(s32) = G_SSHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: sshlsat_or_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: sshlsat_or_1 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1073741821 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK: $sgpr0 = COPY [[OR]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 2 + %4:_(s32) = G_CONSTANT i32 -1073741821 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %5:_(s32) = G_OR %3, %4 + %6:_(s32) = G_SSHLSAT %5, %2(s32) + $sgpr0 = COPY %6(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: sshlsat_or_2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: sshlsat_or_2 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -536870881 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK: $sgpr0 = COPY [[OR]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 5 + %4:_(s32) = G_CONSTANT i32 -536870881 + %6:_(s32) = G_CONSTANT i32 3 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %5:_(s32) = G_OR %3, %4 + %7:_(s32) = G_SSHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: sshlsat_or_3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: sshlsat_or_3 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK: $sgpr0 = COPY [[OR]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 65536 + %6:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %5:_(s32) = G_OR %3, %4 + %7:_(s32) = G_SSHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: sshlsat_xor +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: sshlsat_xor + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 43690 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C2]](s32) + ; CHECK: [[SSHLSAT1:%[0-9]+]]:_(s32) = G_SSHLSAT [[C]], [[C1]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SSHLSAT]], [[SSHLSAT1]] + ; CHECK: $sgpr0 = COPY [[XOR]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 43690 + %6:_(s32) = G_CONSTANT i32 5 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %5:_(s32) = G_XOR %3, %4 + %7:_(s32) = G_SSHLSAT %5, %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll @@ -0,0 +1,348 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +define amdgpu_cs i32 @test_shl_and_1(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_and_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 4 +; CHECK-NEXT: s_and_b32 s0, s0, -16 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 2 + %z2 = and i32 %z1, 1073741820 + %z3 = shl i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_shl_and_2(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_and_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 8 +; CHECK-NEXT: s_and_b32 s0, s0, 0xffffff00 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 5 + %z2 = and i32 %z1, 536870880 + %z3 = shl i32 %z2, 3 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_shl_and_3(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_and_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 5 +; CHECK-NEXT: s_and_b32 s0, s0, 0x7ffffff0 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 3 + %z2 = and i32 %z1, 536870908 + %z3 = shl i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_and_1(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_and_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 4 +; CHECK-NEXT: s_and_b32 s0, s0, 0xfffffff +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 2 + %z2 = and i32 %z1, 1073741820 + %z3 = lshr i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_and_2(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_and_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 8 +; CHECK-NEXT: s_and_b32 s0, s0, 0x3fffffc +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 5 + %z2 = and i32 %z1, 536870880 + %z3 = lshr i32 %z2, 3 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_and_3(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_and_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 5 +; CHECK-NEXT: s_and_b32 s0, s0, 0x7ffffff +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 3 + %z2 = and i32 %z1, 536870908 + %z3 = lshr i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_and_1(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_and_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 4 +; CHECK-NEXT: s_and_b32 s0, s0, 0xfffffff +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 2 + %z2 = and i32 %z1, 1073741820 + %z3 = ashr i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_and_2(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_and_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 8 +; CHECK-NEXT: s_and_b32 s0, s0, 0x3fffffc +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 5 + %z2 = and i32 %z1, 536870880 + %z3 = ashr i32 %z2, 3 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_and_3(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_and_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 5 +; CHECK-NEXT: s_and_b32 s0, s0, 0x7ffffff +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 3 + %z2 = and i32 %z1, 536870908 + %z3 = ashr i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_shl_or_1(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_or_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 4 +; CHECK-NEXT: s_or_b32 s0, s0, 12 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 2 + %z2 = or i32 %z1, 3221225475 + %z3 = shl i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_shl_or_2(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_or_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 8 +; CHECK-NEXT: s_or_b32 s0, s0, 0xfffffc00 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 3 + %z2 = or i32 %z1, 536870880 + %z3 = shl i32 %z2, 5 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_shl_or_3(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_or_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 5 +; CHECK-NEXT: s_or_b32 s0, s0, 0x7fffff80 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 2 + %z2 = or i32 %z1, 268435440 + %z3 = shl i32 %z2, 3 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_or_1(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_or_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 4 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 2 + %z2 = or i32 %z1, 3 + %z3 = lshr i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_or_2(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_or_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 8 +; CHECK-NEXT: s_or_b32 s0, s0, 0xffffff +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 3 + %z2 = or i32 %z1, 536870880 + %z3 = lshr i32 %z2, 5 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_or_3(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_or_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 5 +; CHECK-NEXT: s_or_b32 s0, s0, 0x1fffffe +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 2 + %z2 = or i32 %z1, 268435440 + %z3 = lshr i32 %z2, 3 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_or_1(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_or_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 4 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 2 + %z2 = or i32 %z1, 3 + %z3 = ashr i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_or_2(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_or_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 8 +; CHECK-NEXT: s_or_b32 s0, s0, 0xffffff +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 3 + %z2 = or i32 %z1, 536870880 + %z3 = ashr i32 %z2, 5 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_or_3(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_or_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 5 +; CHECK-NEXT: s_or_b32 s0, s0, 0x1fffffe +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 2 + %z2 = or i32 %z1, 268435440 + %z3 = ashr i32 %z2, 3 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_shl_xor_1(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_xor_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 4 +; CHECK-NEXT: s_xor_b32 s0, s0, -16 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 2 + %z2 = xor i32 %z1, 1073741820 + %z3 = shl i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_shl_xor_2(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_xor_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 6 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 1 + %z2 = xor i32 %z1, 4160749568 + %z3 = shl i32 %z2, 5 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_shl_xor_3(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_xor_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 5 +; CHECK-NEXT: s_xor_b32 s0, s0, 56 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 2 + %z2 = xor i32 %z1, 3221225479 + %z3 = shl i32 %z2, 3 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_xor_1(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_xor_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 4 +; CHECK-NEXT: s_xor_b32 s0, s0, 0xfffffff +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 2 + %z2 = xor i32 %z1, 1073741820 + %z3 = lshr i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_xor_2(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_xor_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 6 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 1 + %z2 = xor i32 %z1, 31 + %z3 = lshr i32 %z2, 5 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_lshr_xor_3(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_xor_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 5 +; CHECK-NEXT: s_xor_b32 s0, s0, 0x18000000 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 2 + %z2 = xor i32 %z1, 3221225479 + %z3 = lshr i32 %z2, 3 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_xor_1(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_xor_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 4 +; CHECK-NEXT: s_xor_b32 s0, s0, 0xfffffff +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 2 + %z2 = xor i32 %z1, 1073741820 + %z3 = ashr i32 %z2, 2 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_xor_2(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_xor_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 6 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 1 + %z2 = xor i32 %z1, 31 + %z3 = ashr i32 %z2, 5 + ret i32 %z3 +} + +define amdgpu_cs i32 @test_ashr_xor_3(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_xor_3: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 5 +; CHECK-NEXT: s_xor_b32 s0, s0, 0xf8000000 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 2 + %z2 = xor i32 %z1, 3221225479 + %z3 = ashr i32 %z2, 3 + ret i32 %z3 +}