diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -11,7 +11,7 @@ >; def VGPRRegBank : RegisterBank<"VGPR", - [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024] + [VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024] >; // It is helpful to distinguish conditions from ordinary SGPRs. diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -32,6 +32,7 @@ const GCNSubtarget &ST; bool SpillSGPRToVGPR; bool isWave32; + BitVector RegPressureIgnoredUnits; void reserveRegisterTuples(BitVector &, unsigned Reg) const; @@ -269,7 +270,13 @@ // \returns number of 32 bit registers covered by a \p LM static unsigned getNumCoveredRegs(LaneBitmask LM) { - return LM.getNumLanes(); + // The assumption is that every lo16 subreg is an even bit and every hi16 + // is an adjacent odd bit or vice versa. + uint64_t Mask = LM.getAsInteger(); + uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL; + Mask = (Even >> 1) | Mask; + uint64_t Odd = Mask & 0x5555555555555555ULL; + return countPopulation(Odd); } // \returns a DWORD offset of a \p SubReg diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -40,7 +40,20 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), - SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {} + SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { + + assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && + getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && + (getSubRegIndexLaneMask(AMDGPU::lo16) | + getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == + getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && + "getNumCoveredRegs() will not work with generated subreg masks!"); + + RegPressureIgnoredUnits.resize(getNumRegUnits()); + RegPressureIgnoredUnits.set(*MCRegUnitIterator(AMDGPU::M0, this)); + for (auto Reg : AMDGPU::VGPR_HI16RegClass) + RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this)); +} void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { @@ -1777,6 +1790,8 @@ default: return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF); case AMDGPU::VGPR_32RegClassID: + case AMDGPU::VGPR_LO16RegClassID: + case AMDGPU::VGPR_HI16RegClassID: return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); case AMDGPU::SGPR_32RegClassID: return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF)); @@ -1800,8 +1815,9 @@ const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { static const int Empty[] = { -1 }; - if (hasRegUnit(AMDGPU::M0, RegUnit)) + if (RegPressureIgnoredUnits[RegUnit]) return Empty; + return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -10,10 +10,44 @@ // Subregister declarations //===----------------------------------------------------------------------===// +class Indexes { + list all = [0, 1, 2, 3, 4, 5, 6 , 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31]; + + // Returns list of indexes [0..N) + list slice = + !foldl([], all, acc, cur, + !listconcat(acc, !if(!lt(cur, N), [cur], []))); +} + let Namespace = "AMDGPU" in { + +def lo16 : SubRegIndex<16, 0>; +def hi16 : SubRegIndex<16, 16>; + foreach Index = 0-31 in { def sub#Index : SubRegIndex<32, !shl(Index, 5)>; } + +foreach Index = 1-31 in { + def sub#Index#_lo16 : ComposedSubRegIndex(sub#Index), lo16>; + def sub#Index#_hi16 : ComposedSubRegIndex(sub#Index), hi16>; +} + +foreach Size = {2-5,8,16} in { + foreach Index = Indexes.slice in { + def !foldl("", Indexes.slice, acc, cur, + !strconcat(acc#!if(!eq(acc,""),"","_"), "sub"#!add(cur, Index))) : + SubRegIndex { + let CoveringSubRegIndices = + !foldl([], Indexes.slice, acc, cur, + !listconcat(acc, [!cast(sub#!add(cur, Index))])); + } + } +} + } //===----------------------------------------------------------------------===// @@ -92,6 +126,16 @@ let HWEncoding = regIdx; } +class SIRegWithSubRegs subregs, bits<16> regIdx = 0> : + RegisterWithSubRegs { + let Namespace = "AMDGPU"; + + // This is the not yet the complete register encoding. An additional + // bit is set for VGPRs. + let HWEncoding = regIdx; + let CoveredBySubRegs = 1; +} + // Special Registers def VCC_LO : SIReg<"vcc_lo", 106>; def VCC_HI : SIReg<"vcc_hi", 107>; @@ -221,10 +265,29 @@ // The ratio of index/allocation_granularity is taken as the cost value. // Considered the allocation granularity as 4 here. let CostPerUse=!if(!gt(Index, 31), !srl(Index, 2), 0) in { - def VGPR#Index : - SIReg <"v"#Index, Index>, - DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { + + // There is no special encoding for low 16 bit subreg, this not a real + // register but rather an operand for instructions preserving high 16 bits + // of the result or reading just low 16 bits of a 32 bit VGPR. + // It is encoded as a corresponding 32 bit register. + def VGPR#Index#_LO16 : SIReg <"v"#Index#".l", Index>, + DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { + let HWEncoding{8} = 1; + } + // There is no special encoding for low 16 bit subreg, this not a real + // register but rather an operand for instructions preserving low 16 bits + // of the result or reading just high 16 bits of a 32 bit VGPR. + // It is encoded as a corresponding 32 bit register. + def VGPR#Index#_HI16 : SIReg <"v"#Index#".h", Index>, + DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { + let HWEncoding{8} = 1; + } + def VGPR#Index : SIRegWithSubRegs <"v"#Index, + [!cast("VGPR"#Index#"_LO16"), !cast("VGPR"#Index#"_HI16")], + Index>, + DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { let HWEncoding{8} = 1; + let SubRegIndices = [lo16, hi16]; } } } @@ -386,6 +449,19 @@ def Reg16Types : RegisterTypes<[i16, f16]>; def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>; +def VGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16, + (add (sequence "VGPR%u_LO16", 0, 255))> { + let AllocationPriority = 1; + let Size = 16; + let GeneratePressureSet = 0; +} + +def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16, + (add (sequence "VGPR%u_HI16", 0, 255))> { + let AllocationPriority = 1; + let Size = 16; + let GeneratePressureSet = 0; +} // VGPR 32-bit registers // i16/f16 only on VI+ @@ -393,6 +469,7 @@ (add (sequence "VGPR%u", 0, 255))> { let AllocationPriority = 1; let Size = 32; + let Weight = 1; } // VGPR 64-bit registers @@ -634,6 +711,7 @@ // Requires 2 v_mov_b32 to copy let CopyCost = 2; let AllocationPriority = 2; + let Weight = 2; } def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> { @@ -642,6 +720,7 @@ // Requires 3 v_mov_b32 to copy let CopyCost = 3; let AllocationPriority = 3; + let Weight = 3; } def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64, i128], 32, @@ -651,6 +730,7 @@ // Requires 4 v_mov_b32 to copy let CopyCost = 4; let AllocationPriority = 4; + let Weight = 4; } def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, @@ -660,6 +740,7 @@ // Requires 5 v_mov_b32 to copy let CopyCost = 5; let AllocationPriority = 5; + let Weight = 5; } def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, @@ -667,6 +748,7 @@ let Size = 256; let CopyCost = 8; let AllocationPriority = 6; + let Weight = 8; } def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, @@ -674,6 +756,7 @@ let Size = 512; let CopyCost = 16; let AllocationPriority = 7; + let Weight = 16; } def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, @@ -681,6 +764,7 @@ let Size = 1024; let CopyCost = 32; let AllocationPriority = 8; + let Weight = 32; } def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -16,14 +16,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v2s32 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V2_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -46,14 +46,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:sreg_96 = S_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:sreg_96 = S_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V3_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v3s32 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:sreg_96 = S_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:sreg_96 = S_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V3_]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32) = COPY $sgpr3 @@ -76,14 +76,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr3 @@ -106,14 +106,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:sreg_160 = S_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:sreg_160 = S_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V5_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v5s32 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_160 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:sreg_160 = S_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:sreg_160 = S_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V5_]] %0:sgpr(<5 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s32) = COPY $sgpr5 @@ -136,14 +136,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -166,14 +166,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr16 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr17 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V16_:%[0-9]+]]:sreg_512 = S_INDIRECT_REG_WRITE_B32_V16 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V16_:%[0-9]+]]:sreg_512 = S_INDIRECT_REG_WRITE_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V16_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v16s32 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr16 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr17 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V16_:%[0-9]+]]:sreg_512 = S_INDIRECT_REG_WRITE_B32_V16 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V16_:%[0-9]+]]:sreg_512 = S_INDIRECT_REG_WRITE_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V16_]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr16 @@ -196,14 +196,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr41 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V32_:%[0-9]+]]:sreg_1024 = S_INDIRECT_REG_WRITE_B32_V32 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V32_:%[0-9]+]]:sreg_1024 = S_INDIRECT_REG_WRITE_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr41 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V32_:%[0-9]+]]:sreg_1024 = S_INDIRECT_REG_WRITE_B32_V32 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V32_:%[0-9]+]]:sreg_1024 = S_INDIRECT_REG_WRITE_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V32_]] %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -226,14 +226,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B64_V2 [[COPY]], [[COPY1]], 35, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v2s64 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B64_V2 [[COPY]], [[COPY1]], 35, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V2_]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -256,14 +256,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V4_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B64_V4 [[COPY]], [[COPY1]], 35, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V4_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v4s64 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V4_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B64_V4 [[COPY]], [[COPY1]], 35, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V4_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V4_]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s64) = COPY $sgpr8_sgpr9 @@ -286,14 +286,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr16_sgpr17 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr18 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V8_:%[0-9]+]]:sreg_512 = S_INDIRECT_REG_WRITE_B64_V8 [[COPY]], [[COPY1]], 35, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V8_:%[0-9]+]]:sreg_512 = S_INDIRECT_REG_WRITE_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v8s64 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr16_sgpr17 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr18 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V8_:%[0-9]+]]:sreg_512 = S_INDIRECT_REG_WRITE_B64_V8 [[COPY]], [[COPY1]], 35, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V8_:%[0-9]+]]:sreg_512 = S_INDIRECT_REG_WRITE_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V8_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s64) = COPY $sgpr16_sgpr17 @@ -316,14 +316,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr40_sgpr41 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr42 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V16_:%[0-9]+]]:sreg_1024 = S_INDIRECT_REG_WRITE_B64_V16 [[COPY]], [[COPY1]], 35, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V16_:%[0-9]+]]:sreg_1024 = S_INDIRECT_REG_WRITE_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V16_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr40_sgpr41 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr42 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V16_:%[0-9]+]]:sreg_1024 = S_INDIRECT_REG_WRITE_B64_V16 [[COPY]], [[COPY1]], 35, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V16_:%[0-9]+]]:sreg_1024 = S_INDIRECT_REG_WRITE_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V16_]] %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s64) = COPY $sgpr40_sgpr41 @@ -346,14 +346,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v2s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V2_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -377,14 +377,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V3_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v3s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V3_]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 @@ -408,14 +408,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v4s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -439,14 +439,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V5_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v5s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V5_]] %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -470,14 +470,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -501,14 +501,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 2, implicit $m0, implicit $exec + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 10, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_1 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 2, implicit $m0, implicit $exec + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 10, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -536,7 +536,7 @@ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -545,7 +545,7 @@ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 8, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -571,14 +571,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 2, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 10, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_1 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 2, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 10, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -605,7 +605,7 @@ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -614,7 +614,7 @@ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; GPRIDX: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -641,14 +641,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; MOVREL: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GPRIDX: $m0 = COPY [[S_MOV_B32_]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -671,14 +671,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; MOVREL: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GPRIDX: S_SET_GPR_IDX_ON [[S_MOV_B32_]], 8, implicit-def $m0, implicit $m0 - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir b/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir --- a/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir @@ -5,7 +5,7 @@ # CHECK-NEXT: - basic block: %bb.0 # CHECK-NEXT: - instruction: 48B dead undef %2.sub0:vreg_128 = COPY %0.sub0:vreg_128 # CHECK-NEXT: - operand 1: %0.sub0:vreg_128 -# CHECK-NEXT: - interval: %0 [16r,48r:0) 0@16r L0000000000000002 [16r,32r:0) 0@16r weight:0.000000e+00 +# CHECK-NEXT: - interval: %0 [16r,48r:0) 0@16r L000000000000000C [16r,32r:0) 0@16r weight:0.000000e+00 # This used to assert with: !SR.empty() && "At least one value should be defined by this mask" diff --git a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll --- a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll @@ -1,19 +1,19 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s ; Make sure the expected regmask is generated for sub/superregisters. -; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}} +; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}} define void @csr() #0 { call void asm sideeffect "", "~{v0},~{v36},~{v37}"() #0 ret void } -; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} +; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} define void @subregs_for_super() #0 { call void asm sideeffect "", "~{v0},~{v1}"() #0 ret void } -; CHECK-DAG: Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} +; CHECK-DAG: Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}} define void @clobbered_reg_with_sub() #0 { call void asm sideeffect "", "~{v[0:1]}"() #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/load-hi16.ll b/llvm/test/CodeGen/AMDGPU/load-hi16.ll --- a/llvm/test/CodeGen/AMDGPU/load-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-hi16.ll @@ -989,4 +989,24 @@ ret <2 x i16> %build1 } +; FIXME: This test should work without copying of v0. +; ds_read_u16_d16_hi preserves low 16 bits of the destination +; and ds_write_b16 only reads low 16 bits. +; GCN: s_waitcnt +; GFX900: v_mov_b32_e32 [[COPY:v[0-9]+]], v0 +; GFX900-NEXT: ds_read_u16_d16_hi [[COPY]], v1 +; GFX900-NEXT: ds_write_b16 v1, v0 +; GFX900-NEXT: s_waitcnt +; GFX900-NEXT: v_mov_b32_e32 v0, [[COPY]] +; GFX900-NEXT: s_waitcnt +; GFX900-NEXT: s_setpc_b64 +define <2 x i16> @load_local_hi_v2i16_store_local_lo(i16 %reg, i16 addrspace(3)* %in) #0 { +entry: + %load = load i16, i16 addrspace(3)* %in + %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 + %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1 + store volatile i16 %reg, i16 addrspace(3)* %in + ret <2 x i16> %build1 +} + attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir --- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir +++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir @@ -9,13 +9,13 @@ ; GCN-LABEL: name: bundle_memops ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec ; GCN: S_NOP 0 - ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr1, implicit undef $vgpr3_vgpr4, implicit $exec { + ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit undef $vgpr3_vgpr4, implicit $exec { ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec ; GCN: } ; GCN: S_NOP 0 ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec - ; GCN: BUNDLE implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr5, implicit undef $vgpr0_vgpr1, implicit $exec, implicit undef $vgpr3_vgpr4 { + ; GCN: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit undef $vgpr0_vgpr1, implicit $exec, implicit undef $vgpr3_vgpr4 { ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, 0, 0, implicit $exec ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec ; GCN: $vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec @@ -34,7 +34,7 @@ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec ; GCN: S_NOP 0 ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec - ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit $exec, implicit $vgpr1 { + ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $exec, implicit $vgpr1 { ; GCN: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec ; GCN: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec ; GCN: } @@ -47,7 +47,7 @@ ; GCN: $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0 ; GCN: $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0 ; GCN: } - ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr2, implicit $exec, implicit $vgpr1 { + ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr2, implicit $exec, implicit $vgpr1 { ; GCN: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec ; GCN: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, implicit $exec ; GCN: } @@ -55,7 +55,7 @@ ; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec ; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec ; GCN: } - ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec { + ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec { ; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec ; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec ; GCN: } @@ -67,7 +67,7 @@ ; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0 ; GCN: $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0, 0 ; GCN: S_NOP 0 - ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit $vgpr0, implicit $exec, implicit $vgpr1 { + ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $exec, implicit $vgpr1 { ; GCN: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec ; GCN: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec ; GCN: } diff --git a/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir b/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir --- a/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir +++ b/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir @@ -5,7 +5,7 @@ # CHECK-LABEL: bb.0: # CHECK: renamable $sgpr1 = COPY renamable $sgpr2 # CHECK-LABEL: bb.1: -# CHECK: liveins: $sgpr0_sgpr1:0x0000000000000003 +# CHECK: liveins: $sgpr0_sgpr1:0x000000000000000F # CHECK: renamable $vgpr1_vgpr2 = COPY renamable $sgpr0_sgpr1 ---