Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -64,6 +64,7 @@ MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const; bool selectCOPY(MachineInstr &I) const; bool selectG_TRUNC(MachineInstr &I) const; + bool selectEXT(MachineInstr &I) const; bool selectG_CONSTANT(MachineInstr &I) const; bool selectG_ADD(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -381,10 +381,10 @@ MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) .add(I.getOperand(2)) .add(I.getOperand(3)); - MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) - .addReg(AMDGPU::SCC); - bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) | - constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) + .addReg(AMDGPU::SCC); + bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && + RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); I.eraseFromParent(); return Ret; } @@ -625,6 +625,122 @@ return true; } +bool AMDGPUInstructionSelector::selectEXT(MachineInstr &I) const { + bool Signed = I.getOpcode() == AMDGPU::G_SEXT; + const DebugLoc &DL = I.getDebugLoc(); + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + const LLT S1 = LLT::scalar(1); + const unsigned SrcSize = SrcTy.getSizeInBits(); + const unsigned DstSize = DstTy.getSizeInBits(); + if (!DstTy.isScalar()) + return false; + + const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); + + if (SrcBank->getID() == AMDGPU::SCCRegBankID) { + if (SrcTy != S1 || DstSize > 64) // Invalid + return false; + + unsigned Opcode = DstSize > 32 ? + AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; + const TargetRegisterClass *DstRC = DstSize > 32 ? + &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; + + // FIXME: Create an extra copy to avoid incorrectly constraining the result + // of the scc producer. + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) + .addReg(SrcReg); + BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) + .addReg(TmpReg); + + // The instruction operands are backwards from what you would expect. + BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) + .addImm(0) + .addImm(Signed ? -1 : 1); + return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); + } + + if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { + if (SrcTy != S1) // Invalid + return false; + + MachineInstr *ExtI = + BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) // src0_modifiers + .addImm(0) // src0 + .addImm(0) // src1_modifiers + .addImm(Signed ? -1 : 1) // src1 + .addUse(SrcReg); + return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); + } + + if (I.getOpcode() == AMDGPU::G_ANYEXT) + return selectCOPY(I); + + if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { + // 64-bit should have been split up in RegBankSelect + const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; + MachineInstr *ExtI = + BuildMI(MBB, I, DL, TII.get(BFE), DstReg) + .addReg(SrcReg) + .addImm(0) // Offset + .addImm(SrcSize); // Width + return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); + } + + if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { + if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) + return false; + + if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { + const unsigned SextOpc = SrcSize == 8 ? + AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; + BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) + .addReg(SrcReg); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); + } + + const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; + const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; + + // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. + if (DstSize > 32 && SrcSize <= 32) { + // We need a 64-bit register source, but the high bits don't matter. + unsigned ExtReg + = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned UndefReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); + BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) + .addReg(SrcReg) + .addImm(AMDGPU::sub0) + .addReg(UndefReg) + .addImm(AMDGPU::sub1); + + BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) + .addReg(ExtReg) + .addImm(SrcSize << 16); + + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); + } + + BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) + .addReg(SrcReg) + .addImm(SrcSize << 16); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); + } + + return false; +} + bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); @@ -842,6 +958,15 @@ return selectG_STORE(I); case TargetOpcode::G_TRUNC: return selectG_TRUNC(I); + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + if (selectEXT(I)) { + I.eraseFromParent(); + return true; + } + + return false; } return false; } Index: lib/Target/AMDGPU/AMDGPURegisterBanks.td =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -14,7 +14,7 @@ [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512] >; -def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS]>; +def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>; // It is helpful to distinguish conditions from ordinary SGPRs. def VCCRegBank : RegisterBank <"VCC", [SReg_64]>; Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -0,0 +1,171 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- + +name: anyext_scc_s1_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_scc_s1_to_sgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY]], implicit-def $scc + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN: $scc = COPY [[COPY1]] + ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0 = S_CSELECT_B32 0, 1, implicit $scc + ; GCN: $sgpr0 = COPY [[S_CSELECT_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:scc(s1) = G_ICMP intpred(eq), %0, %0 + %2:sgpr(s32) = G_ANYEXT %1 + $sgpr0 = COPY %2 +... + +--- + +name: anyext_scc_s1_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_scc_s1_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY]], implicit-def $scc + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN: $scc = COPY [[COPY1]] + ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 0, 1, implicit $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_CSELECT_B64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:scc(s1) = G_ICMP intpred(eq), %0, %0 + %2:sgpr(s64) = G_ANYEXT %1 + $sgpr0_sgpr1 = COPY %2 +... + +--- + +name: anyext_sgpr_s1_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s32 + ; GCN: $sgpr0 = COPY %2:sreg_32_xm0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s32) = G_ANYEXT %1 + $sgpr0 = COPY %2 +... + +--- + +name: anyext_sgpr_s1_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s64 + ; GCN: $sgpr0_sgpr1 = COPY %2:sreg_64_xexec + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s64) = G_ANYEXT %1 + $sgpr0_sgpr1 = COPY %2 +... + +--- + +name: anyext_sgpr_s16_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s32 + ; GCN: $sgpr0 = COPY %2:sreg_32_xm0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s32) = G_ANYEXT %1 + $sgpr0 = COPY %2 + +... + +--- + +name: anyext_sgpr_s16_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s64 + ; GCN: $sgpr0_sgpr1 = COPY %2:sreg_64_xexec + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s64) = G_ANYEXT %1 + $sgpr0_sgpr1 = COPY %2 + +... + +--- + +name: anyext_vcc_s1_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: anyext_vcc_s1_to_vgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vcc(s1) = G_ICMP intpred(eq), %0, %0 + %2:vgpr(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- + +name: anyext_vgpr_s1_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s32 + ; GCN: $vgpr0 = COPY %2:vgpr_32 + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0 + %2:vgpr(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- + +name: anyext_vgpr_s16_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s32 + ; GCN: $vgpr0 = COPY %2:vgpr_32 + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 + +... Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir @@ -0,0 +1,187 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- + +name: sext_scc_s1_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_scc_s1_to_sgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY]], implicit-def $scc + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN: $scc = COPY [[COPY1]] + ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0 = S_CSELECT_B32 0, -1, implicit $scc + ; GCN: $sgpr0 = COPY [[S_CSELECT_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:scc(s1) = G_ICMP intpred(eq), %0, %0 + %2:sgpr(s32) = G_SEXT %1 + $sgpr0 = COPY %2 +... + +--- + +name: sext_scc_s1_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_scc_s1_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY]], implicit-def $scc + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN: $scc = COPY [[COPY1]] + ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 0, -1, implicit $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_CSELECT_B64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:scc(s1) = G_ICMP intpred(eq), %0, %0 + %2:sgpr(s64) = G_SEXT %1 + $sgpr0_sgpr1 = COPY %2 +... + +--- + +name: sext_sgpr_s1_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32_xm0 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_BFE_I32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s32) = G_SEXT %1 + $sgpr0 = COPY %2 +... + +--- + +name: sext_sgpr_s1_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64_xexec = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s64) = G_SEXT %1 + $sgpr0_sgpr1 = COPY %2 +... + +--- + +name: sext_sgpr_s16_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32_xm0 = S_SEXT_I32_I16 [[COPY]] + ; GCN: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s32) = G_SEXT %1 + $sgpr0 = COPY %2 + +... + +--- + +name: sext_sgpr_s16_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64_xexec = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s64) = G_SEXT %1 + $sgpr0_sgpr1 = COPY %2 + +... + +--- + +name: sext_vcc_s1_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_vcc_s1_to_vgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vcc(s1) = G_ICMP intpred(eq), %0, %0 + %2:vgpr(s32) = G_SEXT %1 + $vgpr0 = COPY %2 +... + +--- + +name: sext_vgpr_s1_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 1, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0 + %2:vgpr(s32) = G_SEXT %1 + $vgpr0 = COPY %2 +... + +--- + +name: sext_vgpr_s16_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_vgpr_s16_to_vgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 16, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s32) = G_SEXT %1 + $vgpr0 = COPY %2 + +... Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir @@ -0,0 +1,187 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- + +name: zext_scc_s1_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: zext_scc_s1_to_sgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY]], implicit-def $scc + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN: $scc = COPY [[COPY1]] + ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0 = S_CSELECT_B32 0, 1, implicit $scc + ; GCN: $sgpr0 = COPY [[S_CSELECT_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:scc(s1) = G_ICMP intpred(eq), %0, %0 + %2:sgpr(s32) = G_ZEXT %1 + $sgpr0 = COPY %2 +... + +--- + +name: zext_scc_s1_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: zext_scc_s1_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY]], implicit-def $scc + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN: $scc = COPY [[COPY1]] + ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 0, 1, implicit $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_CSELECT_B64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:scc(s1) = G_ICMP intpred(eq), %0, %0 + %2:sgpr(s64) = G_ZEXT %1 + $sgpr0_sgpr1 = COPY %2 +... + +--- + +name: zext_sgpr_s1_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[S_BFE_U32_:%[0-9]+]]:sreg_32_xm0 = S_BFE_U32 [[COPY]], 65536, implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_BFE_U32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s32) = G_ZEXT %1 + $sgpr0 = COPY %2 +... + +--- + +name: zext_sgpr_s1_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_U64_:%[0-9]+]]:sreg_64_xexec = S_BFE_U64 [[REG_SEQUENCE]], 65536, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s64) = G_ZEXT %1 + $sgpr0_sgpr1 = COPY %2 +... + +--- + +name: zext_sgpr_s16_to_sgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[S_BFE_U32_:%[0-9]+]]:sreg_32_xm0 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_BFE_U32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s32) = G_ZEXT %1 + $sgpr0 = COPY %2 + +... + +--- + +name: zext_sgpr_s16_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: [[S_BFE_U64_:%[0-9]+]]:sreg_64_xexec = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc + ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s64) = G_ZEXT %1 + $sgpr0_sgpr1 = COPY %2 + +... + +--- + +name: zext_vcc_s1_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: zext_vcc_s1_to_vgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vcc(s1) = G_ICMP intpred(eq), %0, %0 + %2:vgpr(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 +... + +--- + +name: zext_vgpr_s1_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[COPY]], 0, 1, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0 + %2:vgpr(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 +... + +--- + +name: zext_vgpr_s16_to_vgpr_s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: zext_vgpr_s16_to_vgpr_s32 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[COPY]], 0, 16, implicit $exec + ; GCN: $vgpr0 = COPY [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 + +...