diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1133,7 +1133,6 @@ if (LoadSize <= MaxNonSmrdLoadSize) return false; - SmallVector DefRegs(OpdMapper.getVRegs(0)); SmallVector SrcRegs(OpdMapper.getVRegs(1)); // If the pointer is an SGPR, we have nothing to do. @@ -1171,24 +1170,13 @@ return false; // At this point, the legalizer has split the original load into smaller - // loads. At the end of lowering, it inserts an instruction (LegalizedInst) - // that combines the outputs of the lower loads and writes it to DstReg. - // The register bank selector has also added the RepairInst which writes to - // DstReg as well. - - MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst); - - // Replace the output of the LegalizedInst with a temporary register, since - // RepairInst already defines DstReg. - Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg)); - LegalizedInst->getOperand(0).setReg(TmpReg); - B.setInsertPt(*RepairInst->getParent(), RepairInst); - - for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) { - Register IdxReg = B.buildConstant(LLT::scalar(32), DefIdx).getReg(0); - MRI.setRegBank(IdxReg, AMDGPU::VGPRRegBank); - B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg); - } + // loads. At the end of lowering, it inserts an instruction that combines the + // outputs of the lower loads and writes it to DstReg. The register bank + // selector has also added the RepairInst which writes to DstReg as well. + // Since the instruction added by the legalizer will handle the concatenation + // of the split loads, we can erase the repair instruction which was added by + // RegBankSelect. + RepairInst->eraseFromParent(); MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank); return true; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -90,23 +90,6 @@ ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v8i32 + 16, align 32, addrspace 1) ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX7]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s32), [[OUT1]](s32), [[OUT2]](s32), [[OUT3]](s32), [[OUT4]](s32), [[OUT5]](s32), [[OUT6]](s32), [[OUT7]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v8i32) ... @@ -124,15 +107,6 @@ ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v4i64 + 16, align 32, addrspace 1) ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX3]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s64), [[OUT1]](s64), [[OUT2]](s64), [[OUT3]](s64) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v4i64) @@ -158,39 +132,6 @@ ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 48, align 64, addrspace 1) ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX7]] - ; CHECK: [[IDX8:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 - ; CHECK: [[OUT8:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX8]] - ; CHECK: [[IDX9:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 9 - ; CHECK: [[OUT9:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX9]] - ; CHECK: [[IDX10:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 10 - ; CHECK: [[OUT10:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX10]] - ; CHECK: [[IDX11:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 11 - ; CHECK: [[OUT11:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX11]] - ; CHECK: [[IDX12:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12 - ; CHECK: [[OUT12:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX12]] - ; CHECK: [[IDX13:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 13 - ; CHECK: [[OUT13:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX13]] - ; CHECK: [[IDX14:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 14 - ; CHECK: [[OUT14:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX14]] - ; CHECK: [[IDX15:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 15 - ; CHECK: [[OUT15:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX15]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s32), [[OUT1]](s32), [[OUT2]](s32), [[OUT3]](s32), [[OUT4]](s32), [[OUT5]](s32), [[OUT6]](s32), [[OUT7]](s32), [[OUT8]](s32), [[OUT9]](s32), [[OUT10]](s32), [[OUT11]](s32), [[OUT12]](s32), [[OUT13]](s32), [[OUT14]](s32), [[OUT15]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v16i32) ... @@ -214,23 +155,6 @@ ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v8i64 + 48, align 64, addrspace 1) ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX7]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s64), [[OUT1]](s64), [[OUT2]](s64), [[OUT3]](s64), [[OUT4]](s64), [[OUT5]](s64), [[OUT6]](s64), [[OUT7]](s64) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v8i64) ... @@ -301,23 +225,6 @@ ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32 + 16, align 32, addrspace 4) ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s32>), [[IDX7]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s32), [[OUT1]](s32), [[OUT2]](s32), [[OUT3]](s32), [[OUT4]](s32), [[OUT5]](s32), [[OUT6]](s32), [[OUT7]](s32) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v8i32) ... @@ -336,15 +243,6 @@ ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64 + 16, align 32, addrspace 4) ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s64>), [[IDX3]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s64), [[OUT1]](s64), [[OUT2]](s64), [[OUT3]](s64) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v4i64) ... @@ -369,39 +267,6 @@ ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 48, align 64, addrspace 4) ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX7]] - ; CHECK: [[IDX8:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 - ; CHECK: [[OUT8:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX8]] - ; CHECK: [[IDX9:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 - ; CHECK: [[OUT9:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX9]] - ; CHECK: [[IDX10:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 10 - ; CHECK: [[OUT10:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX10]] - ; CHECK: [[IDX11:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 11 - ; CHECK: [[OUT11:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX11]] - ; CHECK: [[IDX12:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12 - ; CHECK: [[OUT12:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX12]] - ; CHECK: [[IDX13:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 13 - ; CHECK: [[OUT13:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX13]] - ; CHECK: [[IDX14:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 14 - ; CHECK: [[OUT14:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX14]] - ; CHECK: [[IDX15:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 15 - ; CHECK: [[OUT15:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s32>), [[IDX15]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s32), [[OUT1]](s32), [[OUT2]](s32), [[OUT3]](s32), [[OUT4]](s32), [[OUT5]](s32), [[OUT6]](s32), [[OUT7]](s32), [[OUT8]](s32), [[OUT9]](s32), [[OUT10]](s32), [[OUT11]](s32), [[OUT12]](s32), [[OUT13]](s32), [[OUT14]](s32), [[OUT15]](s32) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v16i32) ... @@ -426,23 +291,6 @@ ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 48, align 64, addrspace 4) ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) - ; CHECK: [[IDX0:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[OUT0:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX0]] - ; CHECK: [[IDX1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[OUT1:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX1]] - ; CHECK: [[IDX2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[OUT2:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX2]] - ; CHECK: [[IDX3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[OUT3:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX3]] - ; CHECK: [[IDX4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; CHECK: [[OUT4:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX4]] - ; CHECK: [[IDX5:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5 - ; CHECK: [[OUT5:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX5]] - ; CHECK: [[IDX6:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 6 - ; CHECK: [[OUT6:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX6]] - ; CHECK: [[IDX7:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 7 - ; CHECK: [[OUT7:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s64>), [[IDX7]] - ; CHECK: G_BUILD_VECTOR [[OUT0]](s64), [[OUT1]](s64), [[OUT2]](s64), [[OUT3]](s64), [[OUT4]](s64), [[OUT5]](s64), [[OUT6]](s64), [[OUT7]](s64) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v8i64) ...