Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -47,8 +47,9 @@ bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, bool Signed) const; - bool legalizeBRCOND(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; + }; } // End llvm namespace. #endif Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -147,7 +147,7 @@ LocalPtr, PrivatePtr }; - setAction({G_BRCOND, S1}, Custom); + setAction({G_BRCOND, S1}, Legal); // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more // elements for v3s16 @@ -732,8 +732,6 @@ return legalizeITOFP(MI, MRI, MIRBuilder, true); case TargetOpcode::G_UITOFP: return legalizeITOFP(MI, MRI, MIRBuilder, false); - case TargetOpcode::G_BRCOND: - return legalizeBRCOND(MI, MRI, MIRBuilder); default: return false; } @@ -1041,46 +1039,66 @@ return true; } -bool AMDGPULegalizerInfo::legalizeBRCOND(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { - // TODO: Maybe it would be better to represent this as an exec reg bank only - // produced by the control flow mask intrinsics apply this during - // regbankselect, instead of hoping we can always find the intrinsic defining - // the condition. TODO: Could we lower this on the intrinsic, and replace the - // branch opcode and see if the use/def mismatch? - Register CondReg = MI.getOperand(0).getReg(); - MachineBasicBlock *DestBB = MI.getOperand(1).getMBB(); - - // FIXME: Should we be trying to figure out if the condition was negated like - // in SelectionDAG? - MachineInstr *CondDef = getOpcodeDef(AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS, - CondReg, MRI); - if (!CondDef) - return true; +// Return the use branch instruction, otherwise null if the usage is invalid. +static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, + MachineRegisterInfo &MRI) { + Register CondDef = MI.getOperand(0).getReg(); + if (!MRI.hasOneNonDBGUse(CondDef)) + return nullptr; - B.setInstr(MI); + MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef); + return UseMI.getParent() == MI.getParent() && + UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr; +} - // FIXME: Probably can just directly emit some of the correct exec - // modifications here and get rid of SILowerControlFlow. - switch (CondDef->getOperand(CondDef->getNumExplicitDefs()).getIntrinsicID()) { - case Intrinsic::amdgcn_if: - B.buildInstr(AMDGPU::SI_IF) - .addDef(CondDef->getOperand(1).getReg()) - .addUse(CondDef->getOperand(3).getReg()) - .addMBB(DestBB); - CondDef->eraseFromParent(); - break; - case Intrinsic::amdgcn_loop: - B.buildInstr(AMDGPU::SI_LOOP) - .addUse(CondDef->getOperand(2).getReg()) - .addMBB(DestBB); - CondDef->eraseFromParent(); - break; +bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + // Replace the use G_BRCOND with the exec manipulate and branch pseudos. + switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + case Intrinsic::amdgcn_if: { + if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) { + const SIRegisterInfo *TRI + = static_cast(MRI.getTargetRegisterInfo()); + + B.setInstr(*BrCond); + Register Def = MI.getOperand(1).getReg(); + Register Use = MI.getOperand(3).getReg(); + B.buildInstr(AMDGPU::SI_IF) + .addDef(Def) + .addUse(Use) + .addMBB(BrCond->getOperand(1).getMBB()); + + MRI.setRegClass(Def, TRI->getWaveMaskRegClass()); + MRI.setRegClass(Use, TRI->getWaveMaskRegClass()); + MI.eraseFromParent(); + BrCond->eraseFromParent(); + return true; + } + + return false; + } + case Intrinsic::amdgcn_loop: { + if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) { + const SIRegisterInfo *TRI + = static_cast(MRI.getTargetRegisterInfo()); + + B.setInstr(*BrCond); + Register Reg = MI.getOperand(2).getReg(); + B.buildInstr(AMDGPU::SI_LOOP) + .addUse(Reg) + .addMBB(BrCond->getOperand(1).getMBB()); + MI.eraseFromParent(); + BrCond->eraseFromParent(); + MRI.setRegClass(Reg, TRI->getWaveMaskRegClass()); + return true; + } + + return false; + } default: return true; } - MI.eraseFromParent(); return true; } Index: test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir @@ -0,0 +1,73 @@ +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s + +# Make sure incorrect usage of control flow intrinsics fails to select in case some transform separated the intrinsic from its branch. + +# ERR: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_condition) + + +--- +name: brcond_si_if_different_block +body: | + bb.0: + successors: %bb.1 + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + + bb.1: + G_BRCOND %3, %bb.1 + +... + +--- +name: si_if_not_brcond_user +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(s32) = G_SELECT %3, %0, %1 + S_ENDPGM 0, implicit %5 + +... + +--- +name: si_if_multi_user +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(s32) = G_SELECT %3, %0, %1 + G_BRCOND %3, %bb.1 + + bb.1: + S_ENDPGM 0, implicit %5 + +... + +--- +name: si_if_not_condition +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(s1) = G_CONSTANT i1 true + %6:_(s1) = G_XOR %3, %5 + G_BRCOND %6, %bb.1 + + bb.1: + +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir @@ -1,18 +1,27 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE32 %s --- -name: legal_brcond +name: legal_brcond_vcc body: | - ; CHECK-LABEL: name: legal_brcond - ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: bb.1: - bb.0.entry: + ; WAVE64-LABEL: name: legal_brcond_vcc + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE64: G_BRCOND [[ICMP]](s1), %bb.1 + ; WAVE64: bb.1: + ; WAVE32-LABEL: name: legal_brcond_vcc + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE32: G_BRCOND [[ICMP]](s1), %bb.1 + ; WAVE32: bb.1: + bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1 %0:_(s32) = COPY $vgpr0 @@ -28,14 +37,22 @@ name: legal_brcond_scc body: | - ; CHECK-LABEL: name: legal_brcond_scc - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: bb.1: + ; WAVE64-LABEL: name: legal_brcond_scc + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; WAVE64: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; WAVE64: G_BRCOND [[ICMP]](s1), %bb.1 + ; WAVE64: bb.1: + ; WAVE32-LABEL: name: legal_brcond_scc + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; WAVE32: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; WAVE32: G_BRCOND [[ICMP]](s1), %bb.1 + ; WAVE32: bb.1: bb.0: liveins: $sgpr0, $sgpr1 @@ -51,14 +68,22 @@ --- name: brcond_si_if body: | - ; CHECK-LABEL: name: brcond_si_if - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK: [[SI_IF:%[0-9]+]]:_(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: bb.1: + ; WAVE64-LABEL: name: brcond_si_if + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: bb.1: + ; WAVE32-LABEL: name: brcond_si_if + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: bb.1: bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1 @@ -74,14 +99,22 @@ --- name: brcond_si_loop body: | - ; CHECK-LABEL: name: brcond_si_loop - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 - ; CHECK: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: bb.1: + ; WAVE64-LABEL: name: brcond_si_loop + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: bb.1: + ; WAVE32-LABEL: name: brcond_si_loop + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: bb.1: bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1