diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def --- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -220,77 +220,6 @@ return &ValMappingsSGPR64OnlyVGPR32[1]; } -const RegisterBankInfo::PartialMapping LoadSGPROnlyBreakDown[] { - /* 256-bit load */ {0, 256, SGPRRegBank}, - /* 512-bit load */ {0, 512, SGPRRegBank}, - /* 8 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank}, - {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank}, - {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank}, - {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank}, - /* 16 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank}, - {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank}, - {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank}, - {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank}, - {256, 32, VGPRRegBank}, {288, 32, VGPRRegBank}, - {320, 32, VGPRRegBank}, {352, 32, VGPRRegBank}, - {384, 32, VGPRRegBank}, {416, 32, VGPRRegBank}, - {448, 32, VGPRRegBank}, {480, 32, VGPRRegBank}, - /* 4 64-bit loads */ {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank}, - {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank}, - /* 8 64-bit loads */ {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank}, - {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank}, - {256, 64, VGPRRegBank}, {320, 64, VGPRRegBank}, - {384, 64, VGPRRegBank}, {448, 64, VGPRRegBank}, - - /* FIXME: The generic register bank select does not support complex - * break downs where the number of vector elements does not equal the - * number of breakdowns. - * FIXME: register bank select now tries to handle complex break downs, - * but it emits an illegal instruction: - * %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS %2:vgpr(s128), %3:vgpr(s128) - */ - /* 2 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank}, - /* 4 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank}, - {256, 128, VGPRRegBank}, {384, 128, VGPRRegBank} -}; - -const RegisterBankInfo::ValueMapping ValMappingsLoadSGPROnly[] { - /* 256-bit load */ {&LoadSGPROnlyBreakDown[0], 1}, - /* 512-bit load */ {&LoadSGPROnlyBreakDown[1], 1}, - /* <8 x i32> load */ {&LoadSGPROnlyBreakDown[2], 8}, - /* <16 x i32> load */ {&LoadSGPROnlyBreakDown[10], 16}, - /* <4 x i64> load */ {&LoadSGPROnlyBreakDown[26], 4}, - /* <8 x i64> load */ {&LoadSGPROnlyBreakDown[30], 8} -}; - -const RegisterBankInfo::ValueMapping * -getValueMappingLoadSGPROnly(unsigned BankID, LLT SizeTy) { - unsigned Size = SizeTy.getSizeInBits(); - if (Size < 256 || BankID == AMDGPU::SGPRRegBankID) - return getValueMapping(BankID, Size); - - assert((Size == 256 || Size == 512) && BankID == AMDGPU::VGPRRegBankID); - - // Default to using the non-split ValueMappings, we will use these if - // the register bank is SGPR or if we don't know how to handle the vector - // type. - unsigned Idx = Size == 256 ? 0 : 1; - - // We need to split this load if it has a vgpr pointer. - if (BankID == AMDGPU::VGPRRegBankID) { - if (SizeTy == LLT::vector(8, 32)) - Idx = 2; - else if (SizeTy == LLT::vector(16, 32)) - Idx = 3; - else if (SizeTy == LLT::vector(4, 64)) - Idx = 4; - else if (SizeTy == LLT::vector(8, 64)) - Idx = 5; - } - - return &ValMappingsLoadSGPROnly[Idx]; -} - } // End AMDGPU namespace. } // End llvm namespace. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -541,7 +541,6 @@ LLT PtrTy = MRI.getType(MI.getOperand(1).getReg()); unsigned PtrSize = PtrTy.getSizeInBits(); unsigned AS = PtrTy.getAddressSpace(); - LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && AS != AMDGPUAS::PRIVATE_ADDRESS) && @@ -555,9 +554,10 @@ } const InstructionMapping &VVMapping = getInstructionMapping( - 2, 1, getOperandsMapping( - {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy), - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}), + 2, 1, + getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}), 2); // Num Operands AltMappings.push_back(&VVMapping); @@ -1102,23 +1102,6 @@ MI.getOperand(OpIdx).setReg(SGPR); } -// When regbankselect repairs registers, it will insert a repair instruction -// which defines the repaired register. Then it calls applyMapping and expects -// that the targets will either delete or rewrite the originally wrote to the -// repaired registers. Beccause of this, we end up in a situation where -// we have 2 instructions defining the same registers. -static MachineInstr *getOtherVRegDef(const MachineRegisterInfo &MRI, - Register Reg, - const MachineInstr &MI) { - // Is there some way we can assert that there are exactly 2 def instructions? - for (MachineInstr &Other : MRI.def_instructions(Reg)) { - if (&Other != &MI) - return &Other; - } - - return nullptr; -} - bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, MachineRegisterInfo &MRI) const { @@ -1144,11 +1127,6 @@ assert(LoadSize % MaxNonSmrdLoadSize == 0); - // We want to get the repair instruction now, because it will help us - // determine which instruction the legalizer inserts that will also - // write to DstReg. - MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI); - // RegBankSelect only emits scalar types, so we need to reset the pointer // operand to a pointer type. Register BasePtrReg = SrcRegs[0]; @@ -1167,26 +1145,6 @@ if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) return false; - // At this point, the legalizer has split the original load into smaller - // loads. At the end of lowering, it inserts an instruction (LegalizedInst) - // that combines the outputs of the lower loads and writes it to DstReg. - // The register bank selector has also added the RepairInst which writes to - // DstReg as well. - - MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst); - - // Replace the output of the LegalizedInst with a temporary register, since - // RepairInst already defines DstReg. - Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg)); - LegalizedInst->getOperand(0).setReg(TmpReg); - B.setInsertPt(*RepairInst->getParent(), RepairInst); - - for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) { - Register IdxReg = B.buildConstant(LLT::scalar(32), DefIdx).getReg(0); - MRI.setRegBank(IdxReg, AMDGPU::VGPRRegBank); - B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg); - } - MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank); return true; } @@ -2972,7 +2930,6 @@ const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(2); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); - LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); Register PtrReg = MI.getOperand(1).getReg(); LLT PtrTy = MRI.getType(PtrReg); unsigned AS = PtrTy.getAddressSpace(); @@ -2998,11 +2955,9 @@ AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID; PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize); - ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, - LoadTy); } } else { - ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy); + ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -89,24 +89,7 @@ ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v8i32 + 16, align 32, addrspace 1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX7]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s32), [[OUT1]](s32), [[OUT2]](s32), [[OUT3]](s32), [[OUT4]](s32), [[OUT5]](s32), [[OUT6]](s32), [[OUT7]](s32) + ; CHECK: %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v8i32) ... @@ -123,16 +106,7 @@ ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v4i64 + 16, align 32, addrspace 1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX3]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s64), [[OUT1]](s64), [[OUT2]](s64), [[OUT3]](s64) + ; CHECK: %1:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v4i64) @@ -157,40 +131,7 @@ ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 48, align 64, addrspace 1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX7]] - ; CHECK: [[IDX8:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 - ; CHECK: [[OUT8:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX8]] - ; CHECK: [[IDX9:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 9 - ; CHECK: [[OUT9:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX9]] - ; CHECK: [[IDX10:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 10 - ; CHECK: [[OUT10:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX10]] - ; CHECK: [[IDX11:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 11 - ; CHECK: [[OUT11:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX11]] - ; CHECK: [[IDX12:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12 - ; CHECK: [[OUT12:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX12]] - ; CHECK: [[IDX13:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 13 - ; CHECK: [[OUT13:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX13]] - ; CHECK: [[IDX14:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 14 - ; CHECK: [[OUT14:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX14]] - ; CHECK: [[IDX15:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 15 - ; CHECK: [[OUT15:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX15]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s32), [[OUT1]](s32), [[OUT2]](s32), [[OUT3]](s32), [[OUT4]](s32), [[OUT5]](s32), [[OUT6]](s32), [[OUT7]](s32), [[OUT8]](s32), [[OUT9]](s32), [[OUT10]](s32), [[OUT11]](s32), [[OUT12]](s32), [[OUT13]](s32), [[OUT14]](s32), [[OUT15]](s32) + ; CHECK: %1:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v16i32) ... @@ -213,24 +154,7 @@ ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v8i64 + 48, align 64, addrspace 1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX7]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s64), [[OUT1]](s64), [[OUT2]](s64), [[OUT3]](s64), [[OUT4]](s64), [[OUT5]](s64), [[OUT6]](s64), [[OUT7]](s64) + ; CHECK: %1:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v8i64) ... @@ -300,24 +224,7 @@ ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32 + 16, align 32, addrspace 4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX7]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s32), [[OUT1]](s32), [[OUT2]](s32), [[OUT3]](s32), [[OUT4]](s32), [[OUT5]](s32), [[OUT6]](s32), [[OUT7]](s32) + ; CHECK: %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v8i32) ... @@ -335,16 +242,7 @@ ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64 + 16, align 32, addrspace 4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX3]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s64), [[OUT1]](s64), [[OUT2]](s64), [[OUT3]](s64) + ; CHECK: %1:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v4i64) ... @@ -368,40 +266,7 @@ ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 48, align 64, addrspace 4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX7]] - ; CHECK: [[IDX8:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 - ; CHECK: [[OUT8:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX8]] - ; CHECK: [[IDX9:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 - ; CHECK: [[OUT9:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX9]] - ; CHECK: [[IDX10:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 10 - ; CHECK: [[OUT10:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX10]] - ; CHECK: [[IDX11:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 11 - ; CHECK: [[OUT11:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX11]] - ; CHECK: [[IDX12:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12 - ; CHECK: [[OUT12:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX12]] - ; CHECK: [[IDX13:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 13 - ; CHECK: [[OUT13:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX13]] - ; CHECK: [[IDX14:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 14 - ; CHECK: [[OUT14:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX14]] - ; CHECK: [[IDX15:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 15 - ; CHECK: [[OUT15:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX15]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s32), [[OUT1]](s32), [[OUT2]](s32), [[OUT3]](s32), [[OUT4]](s32), [[OUT5]](s32), [[OUT6]](s32), [[OUT7]](s32), [[OUT8]](s32), [[OUT9]](s32), [[OUT10]](s32), [[OUT11]](s32), [[OUT12]](s32), [[OUT13]](s32), [[OUT14]](s32), [[OUT15]](s32) + ; CHECK: %1:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v16i32) ... @@ -425,24 +290,7 @@ ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 48, align 64, addrspace 4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX7]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s64), [[OUT1]](s64), [[OUT2]](s64), [[OUT3]](s64), [[OUT4]](s64), [[OUT5]](s64), [[OUT6]](s64), [[OUT7]](s64) + ; CHECK: %1:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v8i64) ...