Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -827,11 +827,7 @@ const MachineOperand &MO = MI.getOperand(OpNo); if (MO.isReg()) { if (unsigned SubReg = MO.getSubReg()) { - assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg( - MI.getParent()->getParent()->getRegInfo(). - getRegClass(MO.getReg()), SubReg)) >= 32 && - "Sub-dword subregs are not supported"); - return RI.getNumChannelsFromSubReg(SubReg) * 4; + return RI.getSubRegIdxSize(SubReg) / 8; } } return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2509,15 +2509,41 @@ unsigned Opc = UseMI.getOpcode(); if (Opc == AMDGPU::COPY) { - bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg()); + Register DstReg = UseMI.getOperand(0).getReg(); + Register SrcReg = UseMI.getOperand(1).getReg(); + bool Is16Bit = getOpSize(UseMI, 0) == 2; + bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; - if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) { - if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32)) + APInt Imm(32, ImmOp->getImm()); + + if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16) + Imm = Imm.ashr(16); + + if (RI.isAGPR(*MRI, DstReg)) { + if (!isInlineConstant(Imm)) return false; NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32; } + + if (Is16Bit) { + if (isVGPRCopy) + return false; // Do not clobber vgpr_hi16 + + if (DstReg.isVirtual() && + UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) + return false; + + UseMI.getOperand(0).setSubReg(0); + if (DstReg.isPhysical()) { + DstReg = RI.get32BitRegister(DstReg); + UseMI.getOperand(0).setReg(DstReg); + } + assert(SrcReg.isVirtual()); + } + UseMI.setDesc(get(NewOpc)); - UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm()); + UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); + UseMI.getOperand(1).setTargetFlags(0); UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); return true; } Index: llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir @@ -0,0 +1,257 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: fold_simm_16_sub_to_lo +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_lo + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG [[COPY]] + %0:sreg_32 = S_MOV_B32 2048 + %1:sgpr_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_sub +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_sub + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:sreg_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_phys +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_phys + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: $sgpr0 = S_MOV_B32 2048 + ; GCN: SI_RETURN_TO_EPILOG $sgpr0_lo16 + %0:sreg_32 = S_MOV_B32 2048 + $sgpr0_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG $sgpr0_lo16 + +... + +--- +name: fold_aimm_16_sub_to_sub_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:agpr_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_sub_0 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec + ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] + %0:sreg_32 = S_MOV_B32 0 + %1.lo16:agpr_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_phys +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_phys + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec + ; GCN: SI_RETURN_TO_EPILOG $agpr0_lo16 + %0:sreg_32 = S_MOV_B32 0 + $agpr0_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG $agpr0_lo16 + +... + +--- +name: fold_vimm_16_sub_to_lo +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_sub_to_lo + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[COPY:%[0-9]+]]:vgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG [[COPY]] + %0:sreg_32 = S_MOV_B32 2048 + %1:vgpr_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_vimm_16_sub_to_sub +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_sub_to_sub + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:vgpr_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_vimm_16_sub_to_phys +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_sub_to_phys + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: $vgpr0_lo16 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG $vgpr0_lo16 + %0:sreg_32 = S_MOV_B32 2048 + $vgpr0_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG $vgpr0_lo16 + +... + +--- +name: fold_vimm_16_lo_to_hi +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_lo_to_hi + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.hi16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.hi16:vgpr_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_vimm_16_hi_to_lo +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_hi_to_lo + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].hi16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:vgpr_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_sub_lo_to_hi +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_sub_lo_to_hi + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.hi16:sreg_32 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.hi16:sreg_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_sub_hi_to_lo_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:sreg_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728 + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] + %0:sreg_32 = S_MOV_B32 134217728 + %1.lo16:sreg_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_sub_hi_to_lo_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec + ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:agpr_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65536 + ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 1, implicit $exec + ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] + %0:sreg_32 = S_MOV_B32 65536 + %1.lo16:agpr_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728 + ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].hi16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 134217728 + %1.lo16:agpr_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +...