diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -156,6 +156,10 @@ bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); bool applySextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); + /// Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM + /// when their source operands are identical. + bool tryCombineDivRem(MachineInstr &MI); + /// If a brcond's true block is not the fallthrough, make it so by inverting /// the condition and swapping operands. bool matchOptBrCondByInvertingCond(MachineInstr &MI); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -243,6 +243,12 @@ // Generic unsigned remainder instruction. HANDLE_TARGET_OPCODE(G_UREM) +// Generic signed divrem instruction. +HANDLE_TARGET_OPCODE(G_SDIVREM) + +// Generic unsigned divrem instruction. +HANDLE_TARGET_OPCODE(G_UDIVREM) + /// Generic bitwise and instruction. HANDLE_TARGET_OPCODE(G_AND) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -285,6 +285,22 @@ let isCommutable = false; } +// Generic signed division and remainder. +def G_SDIVREM : GenericInstruction { + let OutOperandList = (outs type0:$div, type0:$rem); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; + let isCommutable = false; +} + +// Generic unsigned division and remainder. +def G_UDIVREM : GenericInstruction { + let OutOperandList = (outs type0:$div, type0:$rem); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; + let isCommutable = false; +} + // Generic bitwise and. def G_AND : GenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -941,6 +941,59 @@ LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); } +bool CombinerHelper::tryCombineDivRem(MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + if (Opcode != TargetOpcode::G_SDIV && Opcode != TargetOpcode::G_SREM && + Opcode != TargetOpcode::G_UDIV && Opcode != TargetOpcode::G_UREM) + return false; + + bool IsDiv = Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV; + bool IsSigned = + Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM; + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); + unsigned DivOpcode = IsSigned ? TargetOpcode::G_SDIV : TargetOpcode::G_UDIV; + unsigned RemOpcode = IsSigned ? TargetOpcode::G_SREM : TargetOpcode::G_UREM; + + // Combine: + // %div:_ = G_[SU]DIV %src1:_, %src2:_ + // %rem:_ = G_[SU]REM %src1:_, %src2:_ + // into: + // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ + + // Combine: + // %rem:_ = G_[SU]REM %src1:_, %src2:_ + // %div:_ = G_[SU]DIV %src1:_, %src2:_ + // into: + // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ + + for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) { + if (((IsDiv && UseMI.getOpcode() == RemOpcode) || + (!IsDiv && UseMI.getOpcode() == DivOpcode)) && + matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) { + MachineIRBuilder MIRBuilder(MI); + Register DestDivReg, DestRemReg; + if (IsDiv) { + DestDivReg = MI.getOperand(0).getReg(); + DestRemReg = UseMI.getOperand(0).getReg(); + } else { + DestDivReg = UseMI.getOperand(0).getReg(); + DestRemReg = MI.getOperand(0).getReg(); + } + + MIRBuilder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM + : TargetOpcode::G_UDIVREM, + {DestDivReg, DestRemReg}, {Src1, Src2}); + MI.eraseFromParent(); + UseMI.eraseFromParent(); + + return true; + } + } + + return false; +} + bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_BR) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -69,6 +69,11 @@ return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); + case TargetOpcode::G_SDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_UREM: + return Helper.tryCombineDivRem(MI); } return false; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -45,6 +45,14 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # +# DEBUG-NEXT: G_SDIVREM (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_UDIVREM (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# # DEBUG-NEXT: G_AND (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir @@ -0,0 +1,91 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: test_sdiv_srem +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_sdiv_srem + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_srem_sdiv +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_srem_sdiv + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_udiv_urem +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_udiv_urem + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +... +--- +name: test_urem_udiv +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: test_urem_udiv + ; CHECK: %src1:_(s32) = COPY $vgpr0 + ; CHECK: %src2:_(s32) = COPY $vgpr1 + ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 + ; CHECK: %div:_(s32), %rem:_ = G_UDIVREM %src1, %src2 + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + %src1:_(s32) = COPY $vgpr0 + %src2:_(s32) = COPY $vgpr1 + %ptr1:_(p1) = COPY $vgpr2_vgpr3 + %ptr2:_(p1) = COPY $vgpr4_vgpr5 + %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) +...