Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -204,36 +204,67 @@ MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineOperand &ImmOp = I.getOperand(1); + + // The AMDGPU backend only supports Imm operands and not CImm or FPImm. + if (ImmOp.isFPImm()) { + const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); + ImmOp.ChangeToImmediate(Imm.getZExtValue()); + } else if (ImmOp.isCImm()) { + ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); + } + unsigned DstReg = I.getOperand(0).getReg(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); + unsigned Size; + bool IsSgpr; + const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); + if (RB) { + IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; + Size = MRI.getType(DstReg).getSizeInBits(); + } else { + const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); + IsSgpr = TRI.isSGPRClass(RC); + Size = RC->MC->getPhysRegSize() * 8; + } + + if (Size != 32 && Size != 64) + return false; + unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; if (Size == 32) { - I.setDesc(TII.get(AMDGPU::S_MOV_B32)); + I.setDesc(TII.get(Opcode)); + I.addImplicitDefUseOperands(*MF); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - assert(Size == 64); - DebugLoc DL = I.getDebugLoc(); - unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - const APInt &Imm = I.getOperand(1).getCImm()->getValue(); + const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : + &AMDGPU::VGPR_32RegClass; + unsigned LoReg = MRI.createVirtualRegister(RC); + unsigned HiReg = MRI.createVirtualRegister(RC); + const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg) + BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) .addImm(Imm.trunc(32).getZExtValue()); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg) + BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) .addImm(Imm.ashr(32).getZExtValue()); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) - .addReg(LoReg) - .addImm(AMDGPU::sub0) - .addReg(HiReg) - .addImm(AMDGPU::sub1); + const MachineInstr *RS = + BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(LoReg) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + // We can't call constrainSelectedInstRegOperands here, because it doesn't // work for target independent opcodes I.eraseFromParent(); - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); + const TargetRegisterClass *DstRC = + TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); + if (!DstRC) + return true; + return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); } static bool isConstant(const MachineInstr &MI) { @@ -484,6 +515,7 @@ case TargetOpcode::G_BITCAST: return selectCOPY(I); case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: return selectG_CONSTANT(I); case TargetOpcode::G_GEP: return selectG_GEP(I); Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -0,0 +1,61 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define amdgpu_kernel void @constant(i32 addrspace(1)* %global0, i64 addrspace(1)* %global1) {ret void} +... +--- + +name: constant +legalized: true +regBankSelected: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GCN-LABEL: name: constant + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = COPY $vgpr2_vgpr3 + + ; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1 + %2:sreg_32(s32) = G_CONSTANT i32 1 + + ; GCN: [[LO0:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 + ; GCN: [[HI0:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 + ; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO0]], %subreg.sub0, [[HI0]], %subreg.sub1 + %3:sgpr(s64) = G_CONSTANT i64 4294967296 + + ; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1065353216 + %4:sgpr(s32) = G_FCONSTANT float 1.0 + + ; GCN: [[LO1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 + ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1072693248 + ; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO1]], %subreg.sub0, [[HI1]], %subreg.sub1 + %5:sgpr(s64) = G_FCONSTANT double 1.0 + + ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1 + %6:vgpr(s32) = G_CONSTANT i32 1 + + ; GCN: [[LO2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0 + ; GCN: [[HI2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1 + ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO2]], %subreg.sub0, [[HI2]], %subreg.sub1 + %7:vgpr(s64) = G_CONSTANT i64 4294967296 + + ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1065353216 + %8:vgpr(s32) = G_FCONSTANT float 1.0 + + ; GCN: [[LO3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0 + ; GCN: [[HI3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248 + ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO3]], %subreg.sub0, [[HI3]], %subreg.sub1 + %9:vgpr(s64) = G_FCONSTANT double 1.0 + + G_STORE %2, %0 :: (volatile store 4 into %ir.global0) + G_STORE %4, %0 :: (volatile store 4 into %ir.global0) + G_STORE %6, %0 :: (volatile store 4 into %ir.global0) + G_STORE %8, %0 :: (volatile store 4 into %ir.global0) + G_STORE %3, %1 :: (volatile store 8 into %ir.global1) + G_STORE %5, %1 :: (volatile store 8 into %ir.global1) + G_STORE %7, %1 :: (volatile store 8 into %ir.global1) + G_STORE %9, %1 :: (volatile store 8 into %ir.global1) +... +--- Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -42,8 +42,8 @@ # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 # Max immediate for CI -# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 -# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3 +# SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4294967292 +# SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 3 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 @@ -56,8 +56,8 @@ # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0 # Immediate overflow for CI -# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 -# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4 +# GCN: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 +# GCN: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4 # GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # GCN-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 # GCN-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 @@ -74,8 +74,8 @@ # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 # Overflow 32-bit byte offset -# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 -# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1 +# SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 +# SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0