Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -827,11 +827,7 @@ const MachineOperand &MO = MI.getOperand(OpNo); if (MO.isReg()) { if (unsigned SubReg = MO.getSubReg()) { - assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg( - MI.getParent()->getParent()->getRegInfo(). - getRegClass(MO.getReg()), SubReg)) >= 32 && - "Sub-dword subregs are not supported"); - return RI.getNumChannelsFromSubReg(SubReg) * 4; + return RI.getSubRegIdxSize(SubReg) / 8; } } return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2509,13 +2509,35 @@ unsigned Opc = UseMI.getOpcode(); if (Opc == AMDGPU::COPY) { - bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg()); + Register DstReg = UseMI.getOperand(0).getReg(); + Register SrcReg = UseMI.getOperand(1).getReg(); + bool Is16Bit = getOpSize(UseMI, 0) == 2 || getOpSize(UseMI, 1) == 2; + bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; - if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) { + + if (RI.isAGPR(*MRI, DstReg)) { if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32)) return false; NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32; } + + if (Is16Bit) { + if (isVGPRCopy) + return false; // Do not clobber vgpr_hi16 + + if (DstReg.isVirtual() && + UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) + return false; + + UseMI.getOperand(0).setSubReg(0); + UseMI.getOperand(1).setSubReg(0); + if (DstReg.isPhysical()) { + DstReg = RI.get32BitRegister(DstReg); + UseMI.getOperand(0).setReg(DstReg); + } + assert(SrcReg.isVirtual()); + } + UseMI.setDesc(get(NewOpc)); UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm()); UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); Index: llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir @@ -0,0 +1,109 @@ +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: fold_simm_16_sub_to_lo +# GCN: %1:sgpr_lo16 = COPY killed %0.lo16 +--- +name: fold_simm_16_sub_to_lo +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 2048 + %1:sgpr_lo16 = COPY killed %0.lo16 + +... + +# GCN-LABEL: name: fold_simm_16_sub_to_sub +# GCN: %1:sreg_32 = S_MOV_B32 2048 +--- +name: fold_simm_16_sub_to_sub +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:sreg_32 = COPY killed %0.lo16 + +... + +# GCN-LABEL: name: fold_simm_16_sub_to_phys +# GCN: $sgpr0 = S_MOV_B32 2048 +--- +name: fold_simm_16_sub_to_phys +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 2048 + $sgpr0_lo16 = COPY killed %0.lo16 + +... + +# GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048 +# GCN: %1.lo16:agpr_32 = COPY killed %0.lo16 +--- +name: fold_aimm_16_sub_to_sub_2048 +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:agpr_32 = COPY killed %0.lo16 + +... + +# GCN-LABEL: name: fold_aimm_16_sub_to_sub_0 +# GCN: %1:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec +--- +name: fold_aimm_16_sub_to_sub_0 +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 0 + %1.lo16:agpr_32 = COPY killed %0.lo16 + +... + +# GCN-LABEL: name: fold_aimm_16_sub_to_phys +# GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec +--- +name: fold_aimm_16_sub_to_phys +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 0 + $agpr0_lo16 = COPY killed %0.lo16 + +... + +# GCN-LABEL: name: fold_vimm_16_sub_to_lo +# GCN: %1:vgpr_lo16 = COPY killed %0.lo16 +--- +name: fold_vimm_16_sub_to_lo +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 2048 + %1:vgpr_lo16 = COPY killed %0.lo16 + +... + +# GCN-LABEL: name: fold_vimm_16_sub_to_sub +# GCN: %1.lo16:vgpr_32 = COPY killed %0.lo16 +--- +name: fold_vimm_16_sub_to_sub +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:vgpr_32 = COPY killed %0.lo16 + +... + +# GCN-LABEL: name: fold_vimm_16_sub_to_phys +# GCN: $vgpr0_lo16 = COPY killed %0.lo16 +--- +name: fold_vimm_16_sub_to_phys +body: | + bb.0: + + %0:sreg_32 = S_MOV_B32 2048 + $vgpr0_lo16 = COPY killed %0.lo16 + +...