Index: llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -95,6 +95,11 @@ MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const; + bool isVreg1(unsigned Reg) const { + return TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass; + } + bool isLaneMaskReg(unsigned Reg) const { return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) && TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) == @@ -494,13 +499,10 @@ unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass) + if (!isVreg1(SrcReg)) continue; - if (isLaneMaskReg(DstReg) || - (TargetRegisterInfo::isVirtualRegister(DstReg) && - MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass)) + if (isLaneMaskReg(DstReg) || isVreg1(DstReg)) continue; // Copy into a 32-bit vector register. @@ -543,7 +545,7 @@ for (MachineInstr &MI : MBB.phis()) { unsigned DstReg = MI.getOperand(0).getReg(); - if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass) + if (!isVreg1(DstReg)) continue; LLVM_DEBUG(dbgs() << "Lower PHI: " << MI); @@ -560,7 +562,7 @@ if (IncomingDef->getOpcode() == AMDGPU::COPY) { IncomingReg = IncomingDef->getOperand(1).getReg(); - assert(isLaneMaskReg(IncomingReg)); + assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg)); assert(!IncomingDef->getOperand(1).getSubReg()); } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) { continue; @@ -668,8 +670,7 @@ continue; unsigned DstReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg) || - MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass) + if (!isVreg1(DstReg)) continue; if (MRI->use_empty(DstReg)) { @@ -689,7 +690,7 @@ assert(!MI.getOperand(1).getSubReg()); if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - !isLaneMaskReg(SrcReg)) { + (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) { assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32); unsigned TmpReg = createLaneMaskReg(*MF); BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg) Index: llvm/trunk/test/CodeGen/AMDGPU/i1-copies-rpo.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/i1-copies-rpo.mir +++ llvm/trunk/test/CodeGen/AMDGPU/i1-copies-rpo.mir @@ -0,0 +1,51 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck %s + +# The strange block ordering visits the use before the def. +--- +name: inserted_cmp_operand_class_rpo +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; CHECK-LABEL: name: inserted_cmp_operand_class_rpo + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: S_BRANCH %bb.3 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %1 + ; CHECK: bb.2: + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]] + ; CHECK: S_ENDPGM 0 + ; CHECK: bb.3: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_1]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[V_CMP_EQ_U32_e64_]] + ; CHECK: S_BRANCH %bb.1 + bb.0: + successors: %bb.3 + + S_BRANCH %bb.3 + + bb.1: + successors: %bb.2 + + %0:vreg_1 = COPY %1 + + bb.2: + %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %3:sreg_64_xexec = COPY %0 + S_ENDPGM 0 + + bb.3: + successors: %bb.1 + + %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %5:sreg_32_xm0 = S_MOV_B32 0 + %6:sreg_64 = V_CMP_EQ_U32_e64 killed %4, killed %5, implicit $exec + %1:vreg_1 = COPY %6 + S_BRANCH %bb.1