Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2625,23 +2625,42 @@ return true; } +// Check that this is a G_XOR x, -1 +static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) { + if (MI.getOpcode() != TargetOpcode::G_XOR) + return false; + auto ConstVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI); + return ConstVal && *ConstVal == -1; +} + // Return the use branch instruction, otherwise null if the usage is invalid. -static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineInstr *&Br, - MachineBasicBlock *&UncondBrTarget) { +static MachineInstr * +verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr *&Br, + MachineBasicBlock *&UncondBrTarget, bool &Negated) { Register CondDef = MI.getOperand(0).getReg(); if (!MRI.hasOneNonDBGUse(CondDef)) return nullptr; MachineBasicBlock *Parent = MI.getParent(); - MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef); - if (UseMI.getParent() != Parent || - UseMI.getOpcode() != AMDGPU::G_BRCOND) + MachineInstr *UseMI = &*MRI.use_instr_nodbg_begin(CondDef); + + if (isNot(MRI, *UseMI)) { + Register NegatedCond = UseMI->getOperand(0).getReg(); + if (!MRI.hasOneNonDBGUse(NegatedCond)) + return nullptr; + + // We're deleting the def of this value, so we need to remove it. + UseMI->eraseFromParent(); + + UseMI = &*MRI.use_instr_nodbg_begin(NegatedCond); + Negated = true; + } + + if (UseMI->getParent() != Parent || UseMI->getOpcode() != AMDGPU::G_BRCOND) return nullptr; // Make sure the cond br is followed by a G_BR, or is the last instruction. - MachineBasicBlock::iterator Next = std::next(UseMI.getIterator()); + MachineBasicBlock::iterator Next = std::next(UseMI->getIterator()); if (Next == Parent->end()) { MachineFunction::iterator NextMBB = std::next(Parent->getIterator()); if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use. @@ -2654,7 +2673,7 @@ UncondBrTarget = Br->getOperand(0).getMBB(); } - return &UseMI; + return UseMI; } bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, @@ -4467,7 +4486,9 @@ case Intrinsic::amdgcn_else: { MachineInstr *Br = nullptr; MachineBasicBlock *UncondBrTarget = nullptr; - if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) { + bool Negated = false; + if (MachineInstr *BrCond = + verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget, Negated)) { const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); @@ -4475,6 +4496,10 @@ Register Use = MI.getOperand(3).getReg(); MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB(); + + if (Negated) + std::swap(CondBrTarget, UncondBrTarget); + B.setInsertPt(B.getMBB(), BrCond->getIterator()); if (IntrID == Intrinsic::amdgcn_if) { B.buildInstr(AMDGPU::SI_IF) @@ -4510,13 +4535,18 @@ case Intrinsic::amdgcn_loop: { MachineInstr *Br = nullptr; MachineBasicBlock *UncondBrTarget = nullptr; - if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) { + bool Negated = false; + if (MachineInstr *BrCond = + verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget, Negated)) { const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB(); Register Reg = MI.getOperand(2).getReg(); + if (Negated) + std::swap(CondBrTarget, UncondBrTarget); + B.setInsertPt(B.getMBB(), BrCond->getIterator()); B.buildInstr(AMDGPU::SI_LOOP) .addUse(Reg) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if-invalid.mir @@ -5,7 +5,9 @@ # ERR: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user) # ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_condition) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use) --- @@ -55,10 +57,63 @@ ... +# Make sure we only match G_XOR (if), -1 --- -name: si_if_not_condition +name: brcond_si_if_xor_0 body: | bb.0: + successors: %bb.1 + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(s1) = G_CONSTANT i1 false + %6:_(s1) = G_XOR %3, %5 + G_BRCOND %6, %bb.2 + G_BR %bb.3 + + bb.1: + S_NOP 0 + + bb.2: + S_NOP 1 + + bb.3: + S_NOP 2 +... + +# Make sure we only match G_XOR (if), -1 +--- +name: brcond_si_if_or_neg1 +body: | + bb.0: + successors: %bb.1 + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(s1) = G_CONSTANT i1 true + %6:_(s1) = G_OR %3, %5 + G_BRCOND %6, %bb.2 + G_BR %bb.3 + + bb.1: + S_NOP 0 + + bb.2: + S_NOP 1 + + bb.3: + S_NOP 2 +... + +--- +name: brcond_si_if_negated_multi_use +body: | + bb.0: + successors: %bb.1 liveins: $vgpr0, $vgpr1 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -66,8 +121,16 @@ %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 %5:_(s1) = G_CONSTANT i1 true %6:_(s1) = G_XOR %3, %5 - G_BRCOND %6, %bb.1 + S_NOP 0, implicit %6 + G_BRCOND %6, %bb.2 + G_BR %bb.3 bb.1: + S_NOP 0 + + bb.2: + S_NOP 1 + bb.3: + S_NOP 2 ... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir @@ -19,3 +19,4 @@ %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 G_BRCOND %3, %bb.1 ... + Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir @@ -406,3 +406,216 @@ bb.2: S_NOP 0 ... + +--- +name: brcond_si_if_negated +body: | + ; WAVE64-LABEL: name: brcond_si_if_negated + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.1 + ; WAVE64: bb.1: + ; WAVE64: successors: %bb.2(0x80000000) + ; WAVE64: S_NOP 0 + ; WAVE64: bb.2: + ; WAVE64: S_NOP 1 + ; WAVE32-LABEL: name: brcond_si_if_negated + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.1 + ; WAVE32: bb.1: + ; WAVE32: successors: %bb.2(0x80000000) + ; WAVE32: S_NOP 0 + ; WAVE32: bb.2: + ; WAVE32: S_NOP 1 + bb.0: + successors: %bb.1 + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(s1) = G_CONSTANT i1 true + %6:_(s1) = G_XOR %3, %5 + G_BRCOND %6, %bb.2 + + bb.1: + S_NOP 0 + + bb.2: + S_NOP 1 +... + +--- +name: brcond_si_if_br_negated +body: | + ; WAVE64-LABEL: name: brcond_si_if_br_negated + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.3 + ; WAVE64: bb.1: + ; WAVE64: successors: %bb.2(0x80000000) + ; WAVE64: S_NOP 0 + ; WAVE64: bb.2: + ; WAVE64: successors: %bb.3(0x80000000) + ; WAVE64: S_NOP 1 + ; WAVE64: bb.3: + ; WAVE64: S_NOP 2 + ; WAVE32-LABEL: name: brcond_si_if_br_negated + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.3 + ; WAVE32: bb.1: + ; WAVE32: successors: %bb.2(0x80000000) + ; WAVE32: S_NOP 0 + ; WAVE32: bb.2: + ; WAVE32: successors: %bb.3(0x80000000) + ; WAVE32: S_NOP 1 + ; WAVE32: bb.3: + ; WAVE32: S_NOP 2 + bb.0: + successors: %bb.1 + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(s1) = G_CONSTANT i1 true + %6:_(s1) = G_XOR %3, %5 + G_BRCOND %6, %bb.2 + G_BR %bb.3 + + bb.1: + S_NOP 0 + + bb.2: + S_NOP 1 + + bb.3: + S_NOP 2 +... + +--- +name: brcond_si_loop_brcond_negated +tracksRegLiveness: true +body: | + ; WAVE64-LABEL: name: brcond_si_loop_brcond_negated + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64: bb.1: + ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE64: S_NOP 0 + ; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.2 + ; WAVE64: bb.2: + ; WAVE64: S_NOP 0 + ; WAVE32-LABEL: name: brcond_si_loop_brcond_negated + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32: bb.1: + ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE32: S_NOP 0 + ; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.2 + ; WAVE32: bb.2: + ; WAVE32: S_NOP 0 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = COPY $sgpr0_sgpr1 + + bb.1: + successors: %bb.1, %bb.2 + S_NOP 0 + %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 + %4:_(s1) = G_CONSTANT i1 true + %5:_(s1) = G_XOR %3, %4 + G_BRCOND %5, %bb.1 + + bb.2: + S_NOP 0 +... + +--- +name: brcond_si_loop_brcond_br_negated +tracksRegLiveness: true +body: | + ; WAVE64-LABEL: name: brcond_si_loop_brcond_br_negated + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64: bb.1: + ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE64: S_NOP 0 + ; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.1 + ; WAVE64: bb.2: + ; WAVE64: S_NOP 0 + ; WAVE32-LABEL: name: brcond_si_loop_brcond_br_negated + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32: bb.1: + ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE32: S_NOP 0 + ; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.1 + ; WAVE32: bb.2: + ; WAVE32: S_NOP 0 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = COPY $sgpr0_sgpr1 + + bb.1: + successors: %bb.1, %bb.2 + S_NOP 0 + %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 + %4:_(s1) = G_CONSTANT i1 true + %5:_(s1) = G_XOR %3, %4 + G_BRCOND %5, %bb.2 + G_BR %bb.1 + + bb.2: + S_NOP 0 +...