Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -228,6 +228,10 @@ bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); bool applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); + /// Fold (shift (shift base, x), y) -> (shift base (x+y)) + bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo); + bool applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo); + /// Transform a multiply by a power-of-2 value to a left shift. bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal); bool applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -158,6 +158,14 @@ [{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]), (apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>; +// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same +def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">; +def shift_immed_chain : GICombineRule< + (defs root:$d, shift_immed_matchdata:$matchinfo), + (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR, G_SSHLSAT, G_USHLSAT):$d, + [{ return Helper.matchShiftImmedChain(*${d}, ${matchinfo}); }]), + (apply [{ Helper.applyShiftImmedChain(*${d}, ${matchinfo}); }])>; + def mul_to_shl_matchdata : GIDefMatchData<"unsigned">; def mul_to_shl : GICombineRule< (defs root:$d, mul_to_shl_matchdata:$matchinfo), @@ -553,4 +561,5 @@ not_cmp_fold, opt_brcond_by_inverting_cond, unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, - const_combines, xor_of_and_with_same_reg, ptr_add_with_zero]>; + const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, + shift_immed_chain]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1539,6 +1539,77 @@ return true; } +bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, + RegisterImmPair &MatchInfo) { + // We're trying to match the following pattern with any of + // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: + // %t1 = SHIFT %base, G_CONSTANT imm1 + // %root = SHIFT %t1, G_CONSTANT imm2 + // --> + // %root = SHIFT %base, G_CONSTANT (imm1 + imm2) + + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || + Opcode == TargetOpcode::G_USHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); + + Register Shl2 = MI.getOperand(1).getReg(); + Register Imm1 = MI.getOperand(2).getReg(); + auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); + if (!MaybeImmVal) + return false; + + MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); + if (Shl2Def->getOpcode() != Opcode) + return false; + + Register Base = Shl2Def->getOperand(1).getReg(); + Register Imm2 = Shl2Def->getOperand(2).getReg(); + auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); + if (!MaybeImm2Val) + return false; + + // Pass the combined immediate to the apply function. + MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value; + MatchInfo.Reg = Base; + return true; +} + +bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, + RegisterImmPair &MatchInfo) { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || + Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || + Opcode == TargetOpcode::G_USHLSAT) && + "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); + + Builder.setInstrAndDebugLoc(MI); + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); + auto Imm = MatchInfo.Imm; + + if (Imm >= ScalarSizeInBits) { + // Any logical shift that exceeds scalar size will produce zero. + if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { + Builder.buildConstant(MI.getOperand(0), 0); + MI.eraseFromParent(); + return true; + } + // Arithmetic shift and saturating signed left or right shift have no effect + // beyond scalar size. + Imm = ScalarSizeInBits - 1; + } + + LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); + Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(MatchInfo.Reg); + MI.getOperand(2).setReg(NewImm); + Observer.changedInstr(MI); + return true; +} + bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -40,8 +40,7 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 7 -; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ashr_i8_7: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir @@ -0,0 +1,354 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_ashr_i44 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_ashr_i44 + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 43 + ; CHECK: [[ASHR:%[0-9]+]]:_(s44) = G_ASHR [[TRUNC]], [[C]](s44) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR]](s44) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s44) = G_TRUNC %3(s64) + %5:_(s44) = G_CONSTANT i44 22 + %6:_(s44) = G_ASHR %4, %5(s44) + %7:_(s44) = G_ASHR %6, %5(s44) + %8:_(s64) = G_ANYEXT %7(s44) + %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) + $vgpr0 = COPY %9(s32) + $vgpr1 = COPY %10(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_ashr_i55 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: test_ashr_i55 + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 + ; CHECK: [[ASHR:%[0-9]+]]:_(s55) = G_ASHR [[TRUNC]], [[C]](s55) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASHR]](s55) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $sgpr0 = COPY [[UV]](s32) + ; CHECK: $sgpr1 = COPY [[UV1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s55) = G_TRUNC %3(s64) + %5:_(s55) = G_CONSTANT i55 50 + %7:_(s55) = G_CONSTANT i55 3 + %6:_(s55) = G_ASHR %4, %5(s55) + %8:_(s55) = G_ASHR %6, %7(s55) + %9:_(s64) = G_ANYEXT %8(s55) + %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) + $sgpr0 = COPY %10(s32) + $sgpr1 = COPY %11(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... +--- +name: test_lshr_i44 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: test_lshr_i44 + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s44) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $sgpr0 = COPY [[UV]](s32) + ; CHECK: $sgpr1 = COPY [[UV1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s44) = G_TRUNC %3(s64) + %5:_(s44) = G_CONSTANT i44 22 + %6:_(s44) = G_LSHR %4, %5(s44) + %7:_(s44) = G_LSHR %6, %5(s44) + %8:_(s64) = G_ANYEXT %7(s44) + %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) + $sgpr0 = COPY %9(s32) + $sgpr1 = COPY %10(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... +--- +name: test_lshr_i55 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_lshr_i55 + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 + ; CHECK: [[LSHR:%[0-9]+]]:_(s55) = G_LSHR [[TRUNC]], [[C]](s55) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s55) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s55) = G_TRUNC %3(s64) + %5:_(s55) = G_CONSTANT i55 50 + %7:_(s55) = G_CONSTANT i55 3 + %6:_(s55) = G_LSHR %4, %5(s55) + %8:_(s55) = G_LSHR %6, %7(s55) + %9:_(s64) = G_ANYEXT %8(s55) + %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) + $vgpr0 = COPY %10(s32) + $vgpr1 = COPY %11(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_shl_i44 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_shl_i44 + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s44) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s44) = G_TRUNC %3(s64) + %5:_(s44) = G_CONSTANT i44 22 + %6:_(s44) = G_SHL %4, %5(s44) + %7:_(s44) = G_SHL %6, %5(s44) + %8:_(s64) = G_ANYEXT %7(s44) + %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) + $vgpr0 = COPY %9(s32) + $vgpr1 = COPY %10(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_shl_i55 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: test_shl_i55 + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 + ; CHECK: [[SHL:%[0-9]+]]:_(s55) = G_SHL [[TRUNC]], [[C]](s55) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SHL]](s55) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $sgpr0 = COPY [[UV]](s32) + ; CHECK: $sgpr1 = COPY [[UV1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s55) = G_TRUNC %3(s64) + %5:_(s55) = G_CONSTANT i55 50 + %7:_(s55) = G_CONSTANT i55 3 + %6:_(s55) = G_SHL %4, %5(s55) + %8:_(s55) = G_SHL %6, %7(s55) + %9:_(s64) = G_ANYEXT %8(s55) + %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) + $sgpr0 = COPY %10(s32) + $sgpr1 = COPY %11(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... +--- +name: sshlsat_i44 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: sshlsat_i44 + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 43 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s44) = G_SSHLSAT [[TRUNC]], [[C]](s44) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s44) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK: $sgpr0 = COPY [[INT]](s32) + ; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK: $sgpr1 = COPY [[INT1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %0:_(s44) = G_TRUNC %3(s64) + %5:_(s44) = G_CONSTANT i44 22 + %6:_(s44) = G_SSHLSAT %0, %5(s44) + %7:_(s44) = G_SSHLSAT %6, %5(s44) + %8:_(s64) = G_ANYEXT %7(s44) + %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) + %11:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32) + $sgpr0 = COPY %11(s32) + %12:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %10(s32) + $sgpr1 = COPY %12(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... +--- +name: sshlsat_i55 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: sshlsat_i55 + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s55) = G_SSHLSAT [[TRUNC]], [[C]](s55) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s55) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK: $vgpr1 = COPY [[INT1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %0:_(s55) = G_TRUNC %3(s64) + %5:_(s55) = G_CONSTANT i55 50 + %7:_(s55) = G_CONSTANT i55 3 + %6:_(s55) = G_SSHLSAT %0, %5(s55) + %8:_(s55) = G_SSHLSAT %6, %7(s55) + %9:_(s64) = G_ANYEXT %8(s55) + %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) + %12:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %10(s32) + $vgpr0 = COPY %12(s32) + %13:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %11(s32) + $vgpr1 = COPY %13(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + +... +--- +name: ushlsat_i44 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: ushlsat_i44 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 43 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s44) = G_USHLSAT [[TRUNC]], [[C]](s44) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[USHLSAT]](s44) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %0:_(s44) = G_TRUNC %4(s64) + %5:_(s44) = G_CONSTANT i44 22 + %6:_(s44) = G_USHLSAT %0, %5(s44) + %7:_(s44) = G_USHLSAT %6, %5(s44) + %9:_(s64) = G_ANYEXT %7(s44) + %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) + $vgpr0 = COPY %10(s32) + $vgpr1 = COPY %11(s32) + %8:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %8, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: ushlsat_i55 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: ushlsat_i55 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s55) = G_USHLSAT [[TRUNC]], [[C]](s55) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[USHLSAT]](s55) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %1:sgpr_64 = COPY $sgpr30_sgpr31 + %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %0:_(s55) = G_TRUNC %4(s64) + %5:_(s55) = G_CONSTANT i55 50 + %7:_(s55) = G_CONSTANT i55 3 + %6:_(s55) = G_USHLSAT %0, %5(s55) + %8:_(s55) = G_USHLSAT %6, %7(s55) + %10:_(s64) = G_ANYEXT %8(s55) + %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %10(s64) + $vgpr0 = COPY %11(s32) + $vgpr1 = COPY %12(s32) + %9:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %9, implicit $vgpr0, implicit $vgpr1 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-shlsat.mir @@ -0,0 +1,222 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: sshlsat_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: sshlsat_1 + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32) + ; CHECK: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %4:_(s32) = G_SSHLSAT %3, %2(s32) + $vgpr0 = COPY %4(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0 + +... +--- +name: sshlsat_2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: sshlsat_2 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32) + ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](s32) + ; CHECK: $sgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 1 + %4:_(s32) = G_CONSTANT i32 2 + %6:_(s32) = G_CONSTANT i32 3 + %8:_(s32) = G_CONSTANT i32 4 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %5:_(s32) = G_SSHLSAT %3, %4(s32) + %7:_(s32) = G_SSHLSAT %5, %6(s32) + %9:_(s32) = G_SSHLSAT %7, %8(s32) + %10:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32) + $sgpr0 = COPY %10(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: sshlsat_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: sshlsat_i32 + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32) + ; CHECK: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = G_CONSTANT i32 10 + %3:_(s32) = G_SSHLSAT %0, %2(s32) + %4:_(s32) = G_SSHLSAT %3, %2(s32) + %5:_(s32) = G_SSHLSAT %4, %2(s32) + %6:_(s32) = G_SSHLSAT %5, %2(s32) + $vgpr0 = COPY %6(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0 + +... +--- +name: sshlsat_i64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: sshlsat_i64 + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 62 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[C]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) + ; CHECK: $sgpr0 = COPY [[UV]](s32) + ; CHECK: $sgpr1 = COPY [[UV1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s64) = G_CONSTANT i64 10 + %11:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_SSHLSAT %0, %4(s64) + %6:_(s64) = G_SSHLSAT %5, %4(s64) + %7:_(s64) = G_SSHLSAT %6, %4(s64) + %8:_(s64) = G_SSHLSAT %7, %4(s64) + %9:_(s64) = G_SSHLSAT %8, %4(s64) + %10:_(s64) = G_SSHLSAT %9, %4(s64) + %12:_(s64) = G_SSHLSAT %10, %11(s64) + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %12(s64) + $sgpr0 = COPY %13(s32) + $sgpr1 = COPY %14(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... +--- +name: ushlsat_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ushlsat_1 + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C]](s32) + ; CHECK: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %4:_(s32) = G_USHLSAT %3, %2(s32) + $vgpr0 = COPY %4(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0 + +... +--- +name: ushlsat_2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ushlsat_2 + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C]](s32) + ; CHECK: $sgpr0 = COPY [[USHLSAT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = G_CONSTANT i32 1 + %4:_(s32) = G_CONSTANT i32 2 + %6:_(s32) = G_CONSTANT i32 3 + %8:_(s32) = G_CONSTANT i32 4 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %5:_(s32) = G_USHLSAT %3, %4(s32) + %7:_(s32) = G_USHLSAT %5, %6(s32) + %9:_(s32) = G_USHLSAT %7, %8(s32) + $sgpr0 = COPY %9(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: ushlsat_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ushlsat_i32 + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[C]](s32) + ; CHECK: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = G_CONSTANT i32 10 + %3:_(s32) = G_USHLSAT %0, %2(s32) + %4:_(s32) = G_USHLSAT %3, %2(s32) + %5:_(s32) = G_USHLSAT %4, %2(s32) + %6:_(s32) = G_USHLSAT %5, %2(s32) + $vgpr0 = COPY %6(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0 + +... +--- +name: ushlsat_i64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: ushlsat_i64 + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[C]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) + ; CHECK: $sgpr0 = COPY [[UV]](s32) + ; CHECK: $sgpr1 = COPY [[UV1]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s64) = G_CONSTANT i64 10 + %5:_(s64) = G_USHLSAT %0, %4(s64) + %6:_(s64) = G_USHLSAT %5, %4(s64) + %7:_(s64) = G_USHLSAT %6, %4(s64) + %8:_(s64) = G_USHLSAT %7, %4(s64) + %9:_(s64) = G_USHLSAT %8, %4(s64) + %10:_(s64) = G_USHLSAT %9, %4(s64) + %11:_(s64) = G_USHLSAT %10, %4(s64) + %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %11(s64) + $sgpr0 = COPY %12(s32) + $sgpr1 = COPY %13(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +define amdgpu_cs i32 @test_shl_1(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 5 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 2 + %z2 = shl i32 %z1, 3 + ret i32 %z2 +} + +define amdgpu_cs i32 @test_shl_2(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshl_b32 s0, s0, 10 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 1 + %z2 = shl i32 %z1, 2 + %z3 = shl i32 %z2, 3 + %z4 = shl i32 %z3, 4 + ret i32 %z4 +} + +define amdgpu_cs i32 @test_shl_i32(i32 inreg %arg1) { +; CHECK-LABEL: test_shl_i32: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i32 %arg1, 10 + %z2 = shl i32 %z1, 10 + %z3 = shl i32 %z2, 10 + %z4 = shl i32 %z3, 10 + ret i32 %z4 +} + +define amdgpu_cs i64 @test_shl_i64(i64 inreg %arg1) { +; CHECK-LABEL: test_shl_i64: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = shl i64 %arg1, 10 + %z2 = shl i64 %z1, 10 + %z3 = shl i64 %z2, 10 + %z4 = shl i64 %z3, 10 + %z5 = shl i64 %z4, 10 + %z6 = shl i64 %z5, 10 + %z7 = shl i64 %z6, 10 + ret i64 %z7 +} + +define amdgpu_cs i32 @test_ashr_1(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 5 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 2 + %z2 = ashr i32 %z1, 3 + ret i32 %z2 +} + +define amdgpu_cs i32 @test_ashr_2(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 10 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 1 + %z2 = ashr i32 %z1, 2 + %z3 = ashr i32 %z2, 3 + %z4 = ashr i32 %z3, 4 + ret i32 %z4 +} + +define amdgpu_cs i32 @test_ashr_i32(i32 inreg %arg1) { +; CHECK-LABEL: test_ashr_i32: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s0, 31 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i32 %arg1, 10 + %z2 = ashr i32 %z1, 10 + %z3 = ashr i32 %z2, 10 + %z4 = ashr i32 %z3, 10 + ret i32 %z4 +} + +define amdgpu_cs i64 @test_ashr_i64(i64 inreg %arg1) { +; CHECK-LABEL: test_ashr_i64: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_ashr_i32 s0, s1, 31 +; CHECK-NEXT: s_mov_b32 s1, s0 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = ashr i64 %arg1, 10 + %z2 = ashr i64 %z1, 10 + %z3 = ashr i64 %z2, 10 + %z4 = ashr i64 %z3, 10 + %z5 = ashr i64 %z4, 10 + %z6 = ashr i64 %z5, 10 + %z7 = ashr i64 %z6, 10 + ret i64 %z7 +} + +define amdgpu_cs i32 @test_lshr_1(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 5 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 2 + %z2 = lshr i32 %z1, 3 + ret i32 %z2 +} + +define amdgpu_cs i32 @test_lshr_2(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_lshr_b32 s0, s0, 10 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 1 + %z2 = lshr i32 %z1, 2 + %z3 = lshr i32 %z2, 3 + %z4 = lshr i32 %z3, 4 + ret i32 %z4 +} + +define amdgpu_cs i32 @test_lshr_i32(i32 inreg %arg1) { +; CHECK-LABEL: test_lshr_i32: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i32 %arg1, 10 + %z2 = lshr i32 %z1, 10 + %z3 = lshr i32 %z2, 10 + %z4 = lshr i32 %z3, 10 + ret i32 %z4 +} + +define amdgpu_cs i64 @test_lshr_i64(i64 inreg %arg1) { +; CHECK-LABEL: test_lshr_i64: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ; return to shader part epilog +.entry: + %z1 = lshr i64 %arg1, 10 + %z2 = lshr i64 %z1, 10 + %z3 = lshr i64 %z2, 10 + %z4 = lshr i64 %z3, 10 + %z5 = lshr i64 %z4, 10 + %z6 = lshr i64 %z5, 10 + %z7 = lshr i64 %z6, 10 + ret i64 %z7 +}