Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -56,6 +56,12 @@ bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; bool selectG_LOAD(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; + unsigned getSALUOpcode(const MachineInstr &I) const; + bool selectSimpleSALU(MachineInstr &I) const; + unsigned getVALUOpcode(const MachineInstr &I) const; + bool selectSimpleVALU(MachineInstr &I) const; + bool selectSimple(MachineInstr &I) const; + bool selectImpl(MachineInstr &I) const; const SIInstrInfo &TII; const SIRegisterInfo &TRI; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -407,6 +407,105 @@ return Ret; } +unsigned AMDGPUInstructionSelector::getSALUOpcode(const MachineInstr &I) const { + const MachineBasicBlock *BB = I.getParent(); + const MachineFunction *MF = BB->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned Size0 = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); + switch (I.getOpcode()) { + default: break; + case TargetOpcode::G_OR: + if (Size0 == 32) + return AMDGPU::S_OR_B32; + break; + } + return AMDGPU::INSTRUCTION_LIST_END; +} + +bool AMDGPUInstructionSelector::selectSimpleSALU(MachineInstr &I) const { + unsigned Opcode = getSALUOpcode(I); + if (Opcode == AMDGPU::INSTRUCTION_LIST_END) + return false; + I.setDesc(TII.get(Opcode)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + +unsigned AMDGPUInstructionSelector::getVALUOpcode(const MachineInstr &I) const { + const MachineBasicBlock *BB = I.getParent(); + const MachineFunction *MF = BB->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned Size0 = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); + switch (I.getOpcode()) { + default: break; + case TargetOpcode::G_OR: + if (Size0 == 32) + return AMDGPU::V_OR_B32_e64; + break; + } + return AMDGPU::INSTRUCTION_LIST_END; +} + +static const TargetRegisterClass *getRegClassFromBank(const RegisterBank *Bank, + unsigned Size) { + switch (Size) { + case 32: + return Bank->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : + &AMDGPU::SReg_32RegClass; + case 64: + return Bank->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass : + &AMDGPU::SReg_64RegClass; + default: + llvm_unreachable("not implemented"); + } +} + +bool AMDGPUInstructionSelector::selectSimpleVALU(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const DebugLoc &DL = I.getDebugLoc(); + unsigned Opcode = getVALUOpcode(I); + if (Opcode == AMDGPU::INSTRUCTION_LIST_END) + return false; + + const MCInstrDesc &Desc = TII.get(Opcode); + MachineInstrBuilder VALU = + BuildMI(*BB, &I, DL, Desc, I.getOperand(0).getReg()); + for (unsigned i = 1, OpIdx = 1, e = Desc.NumOperands; i != e; ++i) { + int RegClassID = Desc.OpInfo[i].RegClass; + if (RegClassID == -1) + continue; + + const TargetRegisterClass *RC = TRI.getRegClass(RegClassID); + const MachineOperand &MO = I.getOperand(OpIdx++); + if (!RC->isAllocatable()) { + // constrainSelectedInstRegOperands() cannot handle non-allocatble register + // classes like VS_*, so we need to constrain those registers manually. + const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); + unsigned Size = RBI.getSizeInBits(MO.getReg(), MRI, TRI); + const TargetRegisterClass *ConstrainedRC = getRegClassFromBank(OpBank, + Size); + RBI.constrainGenericRegister(MO.getReg(), *ConstrainedRC, MRI); + } + VALU.addReg(MO.getReg()); + } + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*VALU, TII, TRI, RBI); +} + +bool AMDGPUInstructionSelector::selectSimple(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const RegisterBank *OpBank = RBI.getRegBank(I.getOperand(0).getReg(), + MRI, TRI); + if (OpBank->getID() == AMDGPU::SGPRRegBankID) + return selectSimpleSALU(I); + + return selectSimpleVALU(I); +} + bool AMDGPUInstructionSelector::select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const { @@ -416,6 +515,8 @@ switch (I.getOpcode()) { default: break; + case TargetOpcode::G_OR: + return selectSimple(I); case TargetOpcode::G_ADD: return selectG_ADD(I); case TargetOpcode::G_CONSTANT: Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -0,0 +1,39 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define amdgpu_kernel void @or(i32 addrspace(1)* %global0) {ret void} +... +--- + +name: or +legalized: true +regBankSelected: true + +# GCN-LABEL: name: or +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:vgpr(s32) = COPY $vgpr0 + %3:vgpr(s64) = COPY $vgpr3_vgpr4 + + ; or ss + ; GCN: S_OR_B32 + %4:sgpr(s32) = G_OR %0, %1 + + ; or vs + ; GCN: V_OR_B32_e64 + %5:vgpr(s32) = G_OR %2, %4 + + ; or sv + ; GCN: V_OR_B32_e64 + %6:vgpr(s32) = G_OR %4, %5 + + ; or vv + ; GCN: V_OR_B32_e64 + %7:vgpr(s32) = G_OR %6, %2 + + G_STORE %7, %3 :: (store 4 into %ir.global0) +... +---