diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -245,6 +245,15 @@ %2:_(s32) = G_ADD %0:_(s32), %1:_(s32) +G_SDIVREM, G_UDIVREM +^^^^^^^^^^^^^^^^^^^^ + +Perform integer division and remainder thereby producing two results. + +.. code-block:: none + + %div:_(s32), %rem:_(s32) = G_SDIVREM %0:_(s32), %1:_(s32) + G_SADDSAT, G_UADDSAT, G_SSUBSAT, G_USUBSAT, G_SSHLSAT, G_USHLSAT ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -156,6 +156,11 @@ bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); bool applySextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); + /// Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM + /// when their source operands are identical. + bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI); + void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI); + /// If a brcond's true block is not the fallthrough, make it so by inverting /// the condition and swapping operands. bool matchOptBrCondByInvertingCond(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -373,7 +373,7 @@ LegalizeResult lowerReadWriteRegister(MachineInstr &MI); LegalizeResult lowerSMULH_UMULH(MachineInstr &MI); LegalizeResult lowerSelect(MachineInstr &MI); - + LegalizeResult lowerDIVREM(MachineInstr &MI); }; /// Helper function that creates a libcall to the given \p Name using the given diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -244,6 +244,12 @@ // Generic unsigned remainder instruction. HANDLE_TARGET_OPCODE(G_UREM) +// Generic signed divrem instruction. +HANDLE_TARGET_OPCODE(G_SDIVREM) + +// Generic unsigned divrem instruction. +HANDLE_TARGET_OPCODE(G_UDIVREM) + /// Generic bitwise and instruction. HANDLE_TARGET_OPCODE(G_AND) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -285,6 +285,22 @@ let isCommutable = false; } +// Generic signed division and remainder. +def G_SDIVREM : GenericInstruction { + let OutOperandList = (outs type0:$div, type0:$rem); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; + let isCommutable = false; +} + +// Generic unsigned division and remainder. +def G_UDIVREM : GenericInstruction { + let OutOperandList = (outs type0:$div, type0:$rem); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; + let isCommutable = false; +} + // Generic bitwise and. def G_AND : GenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -304,6 +304,15 @@ (apply [{ return Helper.applySimplifyURemByPow2(*${root}); }]) >; +// Transform d = [su]div(x, y) and r = [su]rem(x, y) - > d, r = [su]divrem(x, y) +def div_rem_to_divrem_matchdata : GIDefMatchData<"MachineInstr *">; +def div_rem_to_divrem : GICombineRule< + (defs root:$root, div_rem_to_divrem_matchdata:$matchinfo), + (match (wip_match_opcode G_SDIV, G_UDIV, G_SREM, G_UREM):$root, + [{ return Helper.matchCombineDivRem(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyCombineDivRem(*${root}, ${matchinfo}); }]) +>; + // Fold (x op 0) - > 0 def binop_right_to_zero: GICombineRule< (defs root:$root), @@ -605,4 +614,5 @@ unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, - shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine]>; + shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, + div_rem_to_divrem]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -943,6 +943,93 @@ LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); } +bool CombinerHelper::matchCombineDivRem(MachineInstr &MI, + MachineInstr *&OtherMI) { + unsigned Opcode = MI.getOpcode(); + bool IsDiv, IsSigned; + + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: { + IsDiv = true; + IsSigned = Opcode == TargetOpcode::G_SDIV; + break; + } + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + IsDiv = false; + IsSigned = Opcode == TargetOpcode::G_SREM; + break; + } + } + + Register Src1 = MI.getOperand(1).getReg(); + unsigned DivOpcode, RemOpcode, DivremOpcode; + if (IsSigned) { + DivOpcode = TargetOpcode::G_SDIV; + RemOpcode = TargetOpcode::G_SREM; + DivremOpcode = TargetOpcode::G_SDIVREM; + } else { + DivOpcode = TargetOpcode::G_UDIV; + RemOpcode = TargetOpcode::G_UREM; + DivremOpcode = TargetOpcode::G_UDIVREM; + } + + if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}})) + return false; + + // Combine: + // %div:_ = G_[SU]DIV %src1:_, %src2:_ + // %rem:_ = G_[SU]REM %src1:_, %src2:_ + // into: + // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ + + // Combine: + // %rem:_ = G_[SU]REM %src1:_, %src2:_ + // %div:_ = G_[SU]DIV %src1:_, %src2:_ + // into: + // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ + + for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) { + if (MI.getParent() == UseMI.getParent() && + ((IsDiv && UseMI.getOpcode() == RemOpcode) || + (!IsDiv && UseMI.getOpcode() == DivOpcode)) && + matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) { + OtherMI = &UseMI; + return true; + } + } + + return false; +} + +void CombinerHelper::applyCombineDivRem(MachineInstr &MI, + MachineInstr *&OtherMI) { + unsigned Opcode = MI.getOpcode(); + assert(OtherMI && "OtherMI shouldn't be empty."); + + Register DestDivReg, DestRemReg; + if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) { + DestDivReg = MI.getOperand(0).getReg(); + DestRemReg = OtherMI->getOperand(0).getReg(); + } else { + DestDivReg = OtherMI->getOperand(0).getReg(); + DestRemReg = MI.getOperand(0).getReg(); + } + + bool IsSigned = + Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM; + Builder.setInstrAndDebugLoc(MI); + Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM + : TargetOpcode::G_UDIVREM, + {DestDivReg, DestRemReg}, + {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); + MI.eraseFromParent(); + OtherMI->eraseFromParent(); +} + bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_BR) return false; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3148,6 +3148,9 @@ } case G_SELECT: return lowerSelect(MI); + case G_SDIVREM: + case G_UDIVREM: + return lowerDIVREM(MI); } } @@ -6341,3 +6344,19 @@ MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) { + // Split DIVREM into individual instructions. + unsigned Opcode = MI.getOpcode(); + + MIRBuilder.buildInstr( + Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV + : TargetOpcode::G_UDIV, + {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)}); + MIRBuilder.buildInstr( + Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM + : TargetOpcode::G_UREM, + {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)}); + MI.eraseFromParent(); + return Legalized; +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -45,6 +45,14 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # +# DEBUG-NEXT: G_SDIVREM (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_UDIVREM (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# # DEBUG-NEXT: G_AND (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir @@ -0,0 +1,224 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +# Post-legalizer should not generate divrem instruction. +--- +name: test_sdiv_srem +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_sdiv_srem + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_srem_sdiv +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_srem_sdiv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_udiv_urem +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_udiv_urem + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32) = G_UDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %rem:_(s32) = G_UREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_urem_udiv +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_urem_udiv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %rem:_(s32) = G_UREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %div:_(s32) = G_UDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_v2 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_sdiv_srem_v2 + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(<2 x s32>) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: %rem:_(<2 x s32>) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %ptr1:_(p1) = COPY $vgpr4_vgpr5 + %ptr2:_(p1) = COPY $vgpr6_vgpr7 + %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) +... +--- +name: test_udiv_urem_v2 +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_udiv_urem_v2 + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(<2 x s32>) = G_UDIV %src1, %src2 + ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: %rem:_(<2 x s32>) = G_UREM %src1, %src2 + ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %ptr1:_(p1) = COPY $vgpr4_vgpr5 + %ptr2:_(p1) = COPY $vgpr6_vgpr7 + %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_extra_sdiv +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_sdiv_srem_extra_sdiv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr3:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: %div2:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div2(s32), %ptr3(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %ptr3:_(p1) = COPY $vgpr6_vgpr7 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + %div2:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div2:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_extra_srem +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_sdiv_srem_extra_srem + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr3:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: %rem2:_(s32) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem2(s32), %ptr3(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %ptr3:_(p1) = COPY $vgpr6_vgpr7 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + %rem2:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem2:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir @@ -0,0 +1,523 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: test_sdiv_srem +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_sdiv_srem + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_v2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_sdiv_srem_v2 + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(<2 x s32>), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %ptr1:_(p1) = COPY $vgpr4_vgpr5 + %ptr2:_(p1) = COPY $vgpr6_vgpr7 + %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_v4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 + ; CHECK-LABEL: name: test_sdiv_srem_v4 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 + ; CHECK: %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: %ptr1:_(p1) = COPY $vgpr8_vgpr9 + ; CHECK: %ptr2:_(p1) = COPY $vgpr10_vgpr11 + ; CHECK: %div:_(<4 x s32>), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %div(<4 x s32>), %ptr1(p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<4 x s32>), %ptr2(p1) :: (store 16, align 4, addrspace 1) + %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %ptr1:_(p1) = COPY $vgpr8_vgpr9 + %ptr2:_(p1) = COPY $vgpr10_vgpr11 + %div:_(<4 x s32>) = G_SDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) + G_STORE %div:_(<4 x s32>), %ptr1:_(p1) :: (store 16, addrspace 1, align 4) + %rem:_(<4 x s32>) = G_SREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) + G_STORE %rem:_(<4 x s32>), %ptr2:_(p1) :: (store 16, addrspace 1, align 4) +... +--- +name: test_srem_sdiv +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_srem_sdiv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_srem_sdiv_v2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_srem_sdiv_v2 + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(<2 x s32>), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %rem(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %div(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %ptr1:_(p1) = COPY $vgpr4_vgpr5 + %ptr2:_(p1) = COPY $vgpr6_vgpr7 + %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %rem:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %div:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) +... +--- +name: test_srem_sdiv_v4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 + ; CHECK-LABEL: name: test_srem_sdiv_v4 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 + ; CHECK: %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: %ptr1:_(p1) = COPY $vgpr8_vgpr9 + ; CHECK: %ptr2:_(p1) = COPY $vgpr10_vgpr11 + ; CHECK: %div:_(<4 x s32>), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %rem(<4 x s32>), %ptr1(p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE %div(<4 x s32>), %ptr2(p1) :: (store 16, align 4, addrspace 1) + %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %ptr1:_(p1) = COPY $vgpr8_vgpr9 + %ptr2:_(p1) = COPY $vgpr10_vgpr11 + %rem:_(<4 x s32>) = G_SREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) + G_STORE %rem:_(<4 x s32>), %ptr1:_(p1) :: (store 16, addrspace 1, align 4) + %div:_(<4 x s32>) = G_SDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) + G_STORE %div:_(<4 x s32>), %ptr2:_(p1) :: (store 16, addrspace 1, align 4) +... +--- +name: test_udiv_urem +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_udiv_urem + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_udiv_urem_v2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_udiv_urem_v2 + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(<2 x s32>), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %ptr1:_(p1) = COPY $vgpr4_vgpr5 + %ptr2:_(p1) = COPY $vgpr6_vgpr7 + %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) +... +--- +name: test_udiv_urem_v4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 + ; CHECK-LABEL: name: test_udiv_urem_v4 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 + ; CHECK: %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: %ptr1:_(p1) = COPY $vgpr8_vgpr9 + ; CHECK: %ptr2:_(p1) = COPY $vgpr10_vgpr11 + ; CHECK: %div:_(<4 x s32>), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK: G_STORE %div(<4 x s32>), %ptr1(p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<4 x s32>), %ptr2(p1) :: (store 16, align 4, addrspace 1) + %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %ptr1:_(p1) = COPY $vgpr8_vgpr9 + %ptr2:_(p1) = COPY $vgpr10_vgpr11 + %div:_(<4 x s32>) = G_UDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) + G_STORE %div:_(<4 x s32>), %ptr1:_(p1) :: (store 16, addrspace 1, align 4) + %rem:_(<4 x s32>) = G_UREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) + G_STORE %rem:_(<4 x s32>), %ptr2:_(p1) :: (store 16, addrspace 1, align 4) +... +--- +name: test_urem_udiv +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_urem_udiv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_urem_udiv_v2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK-LABEL: name: test_urem_udiv_v2 + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(<2 x s32>), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK: G_STORE %rem(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %div(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %ptr1:_(p1) = COPY $vgpr4_vgpr5 + %ptr2:_(p1) = COPY $vgpr6_vgpr7 + %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %rem:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) + G_STORE %div:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) +... +--- +name: test_urem_udiv_v4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 + ; CHECK-LABEL: name: test_urem_udiv_v4 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11 + ; CHECK: %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: %ptr1:_(p1) = COPY $vgpr8_vgpr9 + ; CHECK: %ptr2:_(p1) = COPY $vgpr10_vgpr11 + ; CHECK: %div:_(<4 x s32>), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK: G_STORE %rem(<4 x s32>), %ptr1(p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE %div(<4 x s32>), %ptr2(p1) :: (store 16, align 4, addrspace 1) + %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %ptr1:_(p1) = COPY $vgpr8_vgpr9 + %ptr2:_(p1) = COPY $vgpr10_vgpr11 + %rem:_(<4 x s32>) = G_UREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) + G_STORE %rem:_(<4 x s32>), %ptr1:_(p1) :: (store 16, addrspace 1, align 4) + %div:_(<4 x s32>) = G_UDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) + G_STORE %div:_(<4 x s32>), %ptr2:_(p1) :: (store 16, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_extra_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_sdiv_srem_extra_use + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = G_IMPLICIT_DEF + ; CHECK: %ptr2:_(p1) = G_IMPLICIT_DEF + ; CHECK: %ptr3:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr4:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: G_STORE %src1(s32), %ptr1(p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE %src2(s32), %ptr2(p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr3(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr4(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = G_IMPLICIT_DEF + %ptr2:_(p1) = G_IMPLICIT_DEF + %ptr3:_(p1) = COPY $vgpr2_vgpr3 + %ptr4:_(p1) = COPY $vgpr4_vgpr5 + G_STORE %src1:_(s32), %ptr1:_(p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + G_STORE %src2:_(s32), %ptr2:_(p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr4:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_extra_sdiv +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; Combine the first sdiv/srem pair into sdivrem and retain the extra + ; sdiv instruction. + ; CHECK-LABEL: name: test_sdiv_srem_extra_sdiv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr3:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: %div2:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div2(s32), %ptr3(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %ptr3:_(p1) = COPY $vgpr6_vgpr7 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + %div2:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div2:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_extra_srem +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; Combine the first sdiv/srem pair into sdivrem and retain the extra + ; srem instruction. + ; CHECK-LABEL: name: test_sdiv_srem_extra_srem + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %ptr3:_(p1) = COPY $vgpr6_vgpr7 + ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: %rem2:_(s32) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem2(s32), %ptr3(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %ptr3:_(p1) = COPY $vgpr6_vgpr7 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + %rem2:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem2:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) +... +# Some negative tests. +--- +name: test_sdiv_srem_different_src_opnd2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3_vgpr4, $vgpr5_vgpr6 + ; CHECK-LABEL: name: test_sdiv_srem_different_src_opnd2 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3_vgpr4, $vgpr5_vgpr6 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %src3:_(s32) = COPY $vgpr2 + ; CHECK: %ptr1:_(p1) = COPY $vgpr3_vgpr4 + ; CHECK: %ptr2:_(p1) = COPY $vgpr5_vgpr6 + ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %rem:_(s32) = G_SREM %src1, %src3 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %src3:_(s32) = COPY $vgpr2 + %ptr1:_(p1) = COPY $vgpr3_vgpr4 + %ptr2:_(p1) = COPY $vgpr5_vgpr6 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src3:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_src_opnds_swapped +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_sdiv_srem_src_opnds_swapped + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %rem:_(s32) = G_SREM %src2, %src1 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src2:_(s32), %src1:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_sdiv_urem +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_sdiv_urem + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %rem:_(s32) = G_UREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_udiv_srem +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_udiv_srem + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32) = G_UDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_sdiv_srem_different_blocks +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_sdiv_srem_different_blocks + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.1: + ; CHECK: liveins: $vgpr4_vgpr5 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + S_BRANCH %bb.1 + bb.1: + liveins: $vgpr4_vgpr5 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -3105,6 +3105,68 @@ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } +// Test lowering of G_SDIVREM into G_SDIV and G_SREM +TEST_F(AArch64GISelMITest, LowerSDIVREM) { + setUp(); + if (!TM) + return; + + // Declare your legalization info + DefineLegalizerInfo( + A, { getActionDefinitionsBuilder(G_SDIVREM).lowerFor({s64}); }); + + LLT S64{LLT::scalar(64)}; + + // Build Instr + auto SDivrem = + B.buildInstr(TargetOpcode::G_SDIVREM, {S64, S64}, {Copies[0], Copies[1]}); + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + // Perform Legalization + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.lower(*SDivrem, 0, S64)); + + const auto *CheckStr = R"( + CHECK: [[DIV:%[0-9]+]]:_(s64) = G_SDIV %0:_, %1:_ + CHECK: [[REM:%[0-9]+]]:_(s64) = G_SREM %0:_, %1:_ + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + +// Test lowering of G_UDIVREM into G_UDIV and G_UREM +TEST_F(AArch64GISelMITest, LowerUDIVREM) { + setUp(); + if (!TM) + return; + + // Declare your legalization info + DefineLegalizerInfo( + A, { getActionDefinitionsBuilder(G_UDIVREM).lowerFor({s64}); }); + + LLT S64{LLT::scalar(64)}; + + // Build Instr + auto UDivrem = + B.buildInstr(TargetOpcode::G_UDIVREM, {S64, S64}, {Copies[0], Copies[1]}); + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + // Perform Legalization + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.lower(*UDivrem, 0, S64)); + + const auto *CheckStr = R"( + CHECK: [[DIV:%[0-9]+]]:_(s64) = G_UDIV %0:_, %1:_ + CHECK: [[REM:%[0-9]+]]:_(s64) = G_UREM %0:_, %1:_ + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + // Test widening of G_UNMERGE_VALUES TEST_F(AArch64GISelMITest, WidenUnmerge) { setUp();