Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -362,8 +362,10 @@ def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, GISDNodeXFormEquiv; -def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">, +def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastFPImm">, GISDNodeXFormEquiv; +def gi_bitcast_fpimm_to_i64 : GICustomOperandRenderer<"renderBitcastFPImm">, + GISDNodeXFormEquiv; def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">, GISDNodeXFormEquiv; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -314,8 +314,8 @@ void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; - void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI, - int OpIdx) const; + void renderBitcastFPImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2440,10 +2440,18 @@ } bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { + if (selectImpl(I, *CoverageInfo)) + return true; + + // FIXME: Relying on manual selection for 64-bit case, and pointer typed + // constants. MachineBasicBlock *BB = I.getParent(); MachineOperand &ImmOp = I.getOperand(1); Register DstReg = I.getOperand(0).getReg(); - unsigned Size = MRI->getType(DstReg).getSizeInBits(); + LLT Ty = MRI->getType(DstReg); + unsigned Size = Ty.getSizeInBits(); + assert((Size == 64 || Ty.isPointer()) && + "patterns should have selected this"); // The AMDGPU backend only supports Imm operands and not CImm or FPImm. if (ImmOp.isFPImm()) { @@ -2457,19 +2465,7 @@ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); const bool IsSgpr = DstRB->getID() == AMDGPU::SGPRRegBankID; - - unsigned Opcode; - if (DstRB->getID() == AMDGPU::VCCRegBankID) { - Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - } else { - Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; - - // We should never produce s1 values on banks other than VCC. If the user of - // this already constrained the register, we may incorrectly think it's VCC - // if it wasn't originally. - if (Size == 1) - return false; - } + unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; if (Size != 64) { I.setDesc(TII.get(Opcode)); @@ -2478,7 +2474,6 @@ } const DebugLoc &DL = I.getDebugLoc(); - APInt Imm(Size, I.getOperand(1).getImm()); MachineInstr *ResInst; @@ -5063,18 +5058,12 @@ MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue()); } -void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB, - const MachineInstr &MI, - int OpIdx) const { - assert(OpIdx == -1); - +void AMDGPUInstructionSelector::renderBitcastFPImm(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { const MachineOperand &Op = MI.getOperand(1); - if (MI.getOpcode() == TargetOpcode::G_FCONSTANT) - MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); - else { - assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); - MIB.addImm(Op.getCImm()->getSExtValue()); - } + assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1); + MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); } void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB, Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -860,7 +860,9 @@ class VGPRImm : PatLeaf; +}]> { + let GISelPredicateCode = [{return true;}]; +} def NegateImm : SDNodeXFormgetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1898,6 +1898,12 @@ /********** Immediate Patterns **********/ /********** ================== **********/ +def : GCNPat < + (i32 imm:$imm), + (S_MOV_B32 imm:$imm) +>; + +// FIXME: Remove VGPRImm def : GCNPat < (VGPRImm<(i32 imm)>:$imm), (V_MOV_B32_e32 imm:$imm) @@ -1908,11 +1914,6 @@ (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) >; -def : GCNPat < - (i32 imm:$imm), - (S_MOV_B32 imm:$imm) ->; - def : GCNPat < (VGPRImm<(SIlds tglobaladdr:$ga)>), (V_MOV_B32_e32 $ga) @@ -1931,16 +1932,31 @@ (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) >; +def : GCNPat < + (VGPRImm<(i16 imm)>:$imm), + (V_MOV_B32_e32 imm:$imm) +>; + def : GCNPat < (f32 fpimm:$imm), (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) >; +def : GCNPat < + (f32 fpimm:$imm), + (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) +>; + def : GCNPat < (f16 fpimm:$imm), (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) >; +def : GCNPat < + (f16 fpimm:$imm), + (V_MOV_B32_e32 (i32 (bitcast_fpimm_to_i32 $imm))) +>; + def : GCNPat < (p5 frameindex:$fi), (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi))) @@ -1956,28 +1972,22 @@ (S_MOV_B64 InlineImm64:$imm) >; -// XXX - Should this use a s_cmp to set SCC? +def : GCNPat < + (f64 InlineImmFP64:$imm), + (S_MOV_B64 (i64 (bitcast_fpimm_to_i64 $imm))) +>; // Set to sign-extended 64-bit value (true = -1, false = 0) -def : GCNPat < - (i1 imm:$imm), - (S_MOV_B64 (i64 (as_i64imm $imm))) -> { +def : GCNPat <(i1 imm:$imm), + (S_MOV_B64 imm:$imm)> { let WaveSizePredicate = isWave64; } -def : GCNPat < - (i1 imm:$imm), - (S_MOV_B32 (i32 (as_i32imm $imm))) -> { +def : GCNPat <(i1 imm:$imm), + (S_MOV_B32 imm:$imm)> { let WaveSizePredicate = isWave32; } -def : GCNPat < - (f64 InlineImmFP64:$imm), - (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineImmFP64:$imm))) ->; - /********** ================== **********/ /********** Intrinsic Patterns **********/ /********** ================== **********/ Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -342,6 +342,195 @@ S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 ... +--- +name: constant_s_p2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + + ; WAVE64-LABEL: name: constant_s_p2 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; + ; WAVE32-LABEL: name: constant_s_p2 + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + %0:sgpr(p2) = G_CONSTANT i32 0 + %1:sgpr(p2) = G_CONSTANT i32 1 + %2:sgpr(p2) = G_CONSTANT i32 -1 + %3:sgpr(p2) = G_CONSTANT i32 -54 + %4:sgpr(p2) = G_CONSTANT i32 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... + +--- +name: constant_v_p2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_v_p2 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; + ; WAVE32-LABEL: name: constant_v_p2 + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + %0:vgpr(p2) = G_CONSTANT i32 0 + %1:vgpr(p2) = G_CONSTANT i32 1 + %2:vgpr(p2) = G_CONSTANT i32 -1 + %3:vgpr(p2) = G_CONSTANT i32 -54 + %4:vgpr(p2) = G_CONSTANT i32 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... + +--- +name: constant_s_p5 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + + ; WAVE64-LABEL: name: constant_s_p5 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; + ; WAVE32-LABEL: name: constant_s_p5 + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + %0:sgpr(p5) = G_CONSTANT i32 0 + %1:sgpr(p5) = G_CONSTANT i32 1 + %2:sgpr(p5) = G_CONSTANT i32 -1 + %3:sgpr(p5) = G_CONSTANT i32 -54 + %4:sgpr(p5) = G_CONSTANT i32 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... + +--- +name: constant_v_p5 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_v_p5 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; + ; WAVE32-LABEL: name: constant_v_p5 + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + %0:vgpr(p5) = G_CONSTANT i32 0 + %1:vgpr(p5) = G_CONSTANT i32 1 + %2:vgpr(p5) = G_CONSTANT i32 -1 + %3:vgpr(p5) = G_CONSTANT i32 -54 + %4:vgpr(p5) = G_CONSTANT i32 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... + +--- +name: constant_s_p6 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + + ; WAVE64-LABEL: name: constant_s_p6 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; + ; WAVE32-LABEL: name: constant_s_p6 + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + %0:sgpr(p6) = G_CONSTANT i32 0 + %1:sgpr(p6) = G_CONSTANT i32 1 + %2:sgpr(p6) = G_CONSTANT i32 -1 + %3:sgpr(p6) = G_CONSTANT i32 -54 + %4:sgpr(p6) = G_CONSTANT i32 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... + +--- +name: constant_v_p6 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_v_p6 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; + ; WAVE32-LABEL: name: constant_v_p6 + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + %0:vgpr(p6) = G_CONSTANT i32 0 + %1:vgpr(p6) = G_CONSTANT i32 1 + %2:vgpr(p6) = G_CONSTANT i32 -1 + %3:vgpr(p6) = G_CONSTANT i32 -54 + %4:vgpr(p6) = G_CONSTANT i32 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... + --- name: constant_s_p1 legalized: true Index: llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -311,7 +311,7 @@ ; GFX11-LABEL: tied_operand_test: ; GFX11: ; %bb.0: ; %entry ; GFX11-DAG: scratch_load_u16 [[LDRESULT:v[0-9]+]], off, off offset:4 -; GFX11-DAG: v_mov_b32_e32 [[C:v[0-9]+]], 0x7b +; GFX11-DAG: v_dual_mov_b32 [[C:v[0-9]+]], 0x7b ; GFX11-DAG: ds_store_b16 v{{[0-9]+}}, [[LDRESULT]] offset:10 ; GFX11-DAG: ds_store_b16 v{{[0-9]+}}, [[C]] offset:8 ; GFX11-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -973,21 +973,21 @@ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GCN-NEXT: s_addc_u32 s5, s5, 0 ; GCN-NEXT: v_mov_b32_e32 v0, 4 +; GCN-NEXT: v_mov_b32_e32 v1, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_and_b32 s3, s3, 3 -; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_lshrrev_b16_e64 v2, 1, s2 -; GCN-NEXT: v_lshrrev_b16_e64 v3, 2, s2 -; GCN-NEXT: v_lshrrev_b16_e64 v4, 3, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s2 +; GCN-NEXT: v_lshrrev_b16_e64 v4, 2, s2 +; GCN-NEXT: v_lshrrev_b16_e64 v5, 3, s2 ; GCN-NEXT: v_or_b32_e32 v0, s3, v0 ; GCN-NEXT: v_and_b32_e32 v2, 1, v2 -; GCN-NEXT: v_and_b32_e32 v3, 3, v3 -; GCN-NEXT: v_and_b32_e32 v4, 1, v4 -; GCN-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:4 -; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0 offset:7 -; GCN-NEXT: buffer_store_byte v3, off, s[4:7], 0 offset:6 +; GCN-NEXT: v_and_b32_e32 v4, 3, v4 +; GCN-NEXT: v_and_b32_e32 v5, 1, v5 +; GCN-NEXT: buffer_store_byte v3, off, s[4:7], 0 offset:4 +; GCN-NEXT: buffer_store_byte v5, off, s[4:7], 0 offset:7 +; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0 offset:6 ; GCN-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:5 -; GCN-NEXT: v_mov_b32_e32 v1, 1 ; GCN-NEXT: buffer_store_byte v1, v0, s[4:7], 0 offen ; GCN-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 offset:4 ; GCN-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 offset:5 Index: llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll +++ llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll @@ -94,14 +94,14 @@ ; CHECK-LABEL: module_0_kernel_normal_extern_normal: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v0, 2 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_lshl_b32 s0, s0, 2 ; CHECK-NEXT: s_add_i32 s0, s0, 4 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b32 v2, v0 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b32 v2, v1 ; CHECK-NEXT: s_endpgm store i16 2, ptr addrspace(3) @kernel_normal @@ -129,14 +129,14 @@ ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11] ; CHECK-NEXT: s_lshl_b32 s4, s12, 2 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 1 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_add_i32 s4, s4, 4 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b16 v0, v2 offset:2 -; CHECK-NEXT: ds_write_b32 v3, v0 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 +; CHECK-NEXT: ds_write_b32 v3, v1 ; CHECK-NEXT: s_endpgm call void @use_module() store i16 1, ptr addrspace(3) @module_variable @@ -152,14 +152,14 @@ ; CHECK-LABEL: module_0_kernel_overalign_extern_normal: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v0, 2 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_lshl_b32 s0, s0, 2 ; CHECK-NEXT: s_add_i32 s0, s0, 4 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b32 v2, v0 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b32 v2, v1 ; CHECK-NEXT: s_endpgm store i16 2, ptr addrspace(3) @kernel_overalign @@ -187,14 +187,14 @@ ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11] ; CHECK-NEXT: s_lshl_b32 s4, s12, 2 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 1 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_add_i32 s4, s4, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b16 v0, v2 offset:4 -; CHECK-NEXT: ds_write_b32 v3, v0 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 +; CHECK-NEXT: ds_write_b32 v3, v1 ; CHECK-NEXT: s_endpgm call void @use_module() store i16 1, ptr addrspace(3) @module_variable @@ -210,14 +210,14 @@ ; CHECK-LABEL: module_0_kernel_normal_extern_overalign: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v0, 2 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_lshl_b32 s0, s0, 2 ; CHECK-NEXT: s_add_i32 s0, s0, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b32 v2, v0 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b32 v2, v1 ; CHECK-NEXT: s_endpgm store i16 2, ptr addrspace(3) @kernel_normal @@ -245,14 +245,14 @@ ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11] ; CHECK-NEXT: s_lshl_b32 s4, s12, 2 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 1 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_add_i32 s4, s4, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b16 v0, v2 offset:2 -; CHECK-NEXT: ds_write_b32 v3, v0 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 +; CHECK-NEXT: ds_write_b32 v3, v1 ; CHECK-NEXT: s_endpgm call void @use_module() store i16 1, ptr addrspace(3) @module_variable @@ -268,14 +268,14 @@ ; CHECK-LABEL: module_0_kernel_overalign_extern_overalign: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v0, 2 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_lshl_b32 s0, s0, 2 ; CHECK-NEXT: s_add_i32 s0, s0, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b32 v2, v0 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b32 v2, v1 ; CHECK-NEXT: s_endpgm store i16 2, ptr addrspace(3) @kernel_overalign @@ -303,14 +303,14 @@ ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11] ; CHECK-NEXT: s_lshl_b32 s4, s12, 2 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, 1 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_add_i32 s4, s4, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b16 v0, v2 offset:4 -; CHECK-NEXT: ds_write_b32 v3, v0 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 +; CHECK-NEXT: ds_write_b32 v3, v1 ; CHECK-NEXT: s_endpgm call void @use_module() store i16 1, ptr addrspace(3) @module_variable @@ -342,12 +342,12 @@ ; CHECK-NEXT: s_getpc_b64 s[6:7] ; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 2 ; CHECK-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] ; CHECK-NEXT: s_mov_b32 s15, 0 -; CHECK-NEXT: ds_write_b16 v0, v1 +; CHECK-NEXT: ds_write_b16 v1, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[8:9] ; CHECK-NEXT: s_endpgm @@ -377,13 +377,13 @@ ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, use_extern_normal@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_normal@gotpcrel32@hi+12 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v1, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: s_mov_b32 s15, 4 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b16 v0, v2 offset:2 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CHECK-NEXT: s_endpgm @@ -409,12 +409,12 @@ ; CHECK-NEXT: s_getpc_b64 s[6:7] ; CHECK-NEXT: s_add_u32 s6, s6, use_extern_normal@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_normal@gotpcrel32@hi+12 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 2 ; CHECK-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] ; CHECK-NEXT: s_mov_b32 s15, 2 -; CHECK-NEXT: ds_write_b16 v0, v1 +; CHECK-NEXT: ds_write_b16 v1, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[8:9] ; CHECK-NEXT: s_endpgm @@ -444,13 +444,13 @@ ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, use_extern_normal@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_normal@gotpcrel32@hi+12 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v1, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: s_mov_b32 s15, 6 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b16 v0, v2 offset:4 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CHECK-NEXT: s_endpgm @@ -476,12 +476,12 @@ ; CHECK-NEXT: s_getpc_b64 s[6:7] ; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 2 ; CHECK-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] ; CHECK-NEXT: s_mov_b32 s15, 1 -; CHECK-NEXT: ds_write_b16 v0, v1 +; CHECK-NEXT: ds_write_b16 v1, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[8:9] ; CHECK-NEXT: s_endpgm @@ -511,13 +511,13 @@ ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, use_extern_overalign@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_overalign@gotpcrel32@hi+12 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v1, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: s_mov_b32 s15, 5 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b16 v0, v2 offset:2 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CHECK-NEXT: s_endpgm @@ -543,12 +543,12 @@ ; CHECK-NEXT: s_getpc_b64 s[6:7] ; CHECK-NEXT: s_add_u32 s6, s6, use_extern_overalign@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s7, s7, use_extern_overalign@gotpcrel32@hi+12 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 2 ; CHECK-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] ; CHECK-NEXT: s_mov_b32 s15, 3 -; CHECK-NEXT: ds_write_b16 v0, v1 +; CHECK-NEXT: ds_write_b16 v1, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[8:9] ; CHECK-NEXT: s_endpgm @@ -578,13 +578,13 @@ ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, use_extern_overalign@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_overalign@gotpcrel32@hi+12 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; CHECK-NEXT: v_mov_b32_e32 v1, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: s_mov_b32 s15, 7 -; CHECK-NEXT: ds_write_b16 v0, v1 -; CHECK-NEXT: ds_write_b16 v0, v2 offset:4 +; CHECK-NEXT: ds_write_b16 v1, v0 +; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CHECK-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll +++ llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll @@ -18,18 +18,18 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 { ; GCN-LABEL: test: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: v_mov_b32_e32 v1, 2 -; GCN-NEXT: ds_write_b8 v0, v1 -; GCN-NEXT: ds_read_u8 v2, v0 offset:2 -; GCN-NEXT: ds_read_u16 v3, v0 +; GCN-NEXT: v_mov_b32_e32 v0, 2 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: ds_write_b8 v1, v0 +; GCN-NEXT: ds_read_u8 v2, v1 offset:2 +; GCN-NEXT: ds_read_u16 v3, v1 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: ds_write_b8 v0, v2 offset:6 -; GCN-NEXT: ds_write_b16 v0, v3 offset:4 -; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v1 src0_sel:BYTE_0 src1_sel:DWORD -; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] -; GCN-NEXT: global_store_byte v0, v1, s[0:1] +; GCN-NEXT: ds_write_b8 v1, v2 offset:6 +; GCN-NEXT: ds_write_b16 v1, v3 offset:4 +; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v0 src0_sel:BYTE_0 src1_sel:DWORD +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3] +; GCN-NEXT: global_store_byte v1, v0, s[0:1] ; GCN-NEXT: s_endpgm ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: Index: llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll +++ llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll @@ -1,10 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-OPT %s -; RUN: llc -march=amdgcn -mcpu=fiji -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-NOOPT %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}scalar_to_vector_i16: -; GCN-NOOPT: s_mov_b32 [[S:s[0-9]+]], 42 -; GCN-NOOPT: v_mov_b32_e32 [[V:v[0-9]+]], [[S]] -; GCN-OPT: v_mov_b32_e32 [[V:v[0-9]+]], 42 +; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 42 ; GCN: buffer_store_short [[V]], define void @scalar_to_vector_i16() { %tmp = load <2 x i16>, ptr addrspace(5) undef @@ -14,9 +12,7 @@ } ; GCN-LABEL: {{^}}scalar_to_vector_f16: -; GCN-NOOPT: s_mov_b32 [[S:s[0-9]+]], 0x3c00 -; GCN-NOOPT: v_mov_b32_e32 [[V:v[0-9]+]], [[S]] -; GCN-OPT: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00 +; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00 ; GCN: buffer_store_short [[V]], define void @scalar_to_vector_f16() { %tmp = load <2 x half>, ptr addrspace(5) undef Index: llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll +++ llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll @@ -25,8 +25,7 @@ ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: s_mov_b32 s4, 0 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4 -; CHECK-NEXT: s_mov_b32 s4, 0 -; CHECK-NEXT: v_mov_b32_e32 v2, s4 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: ds_write_b8 v1, v2 ; CHECK-NEXT: s_mov_b64 s[4:5], exec ; CHECK-NEXT: v_writelane_b32 v0, s4, 0