Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -478,14 +478,15 @@ return Legalized; } case TargetOpcode::G_LOAD: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - const auto &MMO = **MI.memoperands_begin(); unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); + int NumParts = SizeOp0 / NarrowSize; + unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); + unsigned LeftoverBits = DstTy.getSizeInBits() - HandledSize; + + if (DstTy.isVector() && LeftoverBits != 0) + return UnableToLegalize; if (8 * MMO.getSize() != DstTy.getSizeInBits()) { unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); @@ -502,13 +503,12 @@ MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector DstRegs; for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); unsigned SrcReg = 0; unsigned Adjustment = i * NarrowSize / 8; unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); @@ -521,15 +521,47 @@ MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, Adjustment); - MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); + MIRBuilder.buildLoad(PartDstReg, SrcReg, *SplitMMO); - DstRegs.push_back(DstReg); + DstRegs.push_back(PartDstReg); } + unsigned MergeResultReg = LeftoverBits == 0 ? DstReg : + MRI.createGenericVirtualRegister(LLT::scalar(HandledSize)); + + // For the leftover piece, still create the merge and insert it. + // TODO: Would it be better to directly insert the intermediate pieces? if (DstTy.isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); + MIRBuilder.buildBuildVector(MergeResultReg, DstRegs); else - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildMerge(MergeResultReg, DstRegs); + + if (LeftoverBits == 0) { + MI.eraseFromParent(); + return Legalized; + } + + unsigned ImpDefReg = MRI.createGenericVirtualRegister(DstTy); + unsigned Insert0Reg = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildUndef(ImpDefReg); + MIRBuilder.buildInsert(Insert0Reg, ImpDefReg, MergeResultReg, 0); + + unsigned PartDstReg + = MRI.createGenericVirtualRegister(LLT::scalar(LeftoverBits)); + + unsigned Offset = HandledSize / 8; + unsigned Alignment = MinAlign(MMO.getAlignment(), Offset); + MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( + MMO.getPointerInfo().getWithOffset(Offset), MMO.getFlags(), + LeftoverBits / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), + MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); + + unsigned SrcReg = 0; + MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, + Offset); + MIRBuilder.buildLoad(PartDstReg, SrcReg, *SplitMMO); + MIRBuilder.buildInsert(DstReg, Insert0Reg, PartDstReg, HandledSize); + MI.eraseFromParent(); return Legalized; } Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -global-isel-abort=0 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s --- name: test_load_global_i32 @@ -7,10 +8,14 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + ; SI-LABEL: name: test_load_global_i32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](s32) + ; VI-LABEL: name: test_load_global_i32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1) @@ -23,10 +28,14 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_i64 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + ; SI-LABEL: name: test_load_global_i64 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](s32) + ; VI-LABEL: name: test_load_global_i64 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1) @@ -39,10 +48,14 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_p1 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; SI-LABEL: name: test_load_global_p1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; VI-LABEL: name: test_load_global_p1 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, addrspace 1) @@ -55,27 +68,34 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_p4 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; SI-LABEL: name: test_load_global_p4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; VI-LABEL: name: test_load_global_p4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... - --- name: test_load_global_p3 body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_p3 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](p3) + ; SI-LABEL: name: test_load_global_p3 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](p3) + ; VI-LABEL: name: test_load_global_p3 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](p3) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = G_LOAD %0 :: (load 4, addrspace 1) @@ -88,10 +108,14 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-LABEL: name: test_load_global_v2s32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-LABEL: name: test_load_global_v2s32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, addrspace 1) @@ -105,10 +129,14 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; SI-LABEL: name: test_load_global_v2s16 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; VI-LABEL: name: test_load_global_v2s16 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, addrspace 1) $vgpr0 = COPY %1 @@ -120,10 +148,14 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_v3i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; SI-LABEL: name: test_load_global_v3i32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI-LABEL: name: test_load_global_v3i32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) @@ -136,11 +168,16 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_ext_load_global_s64_from_1_align1 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-LABEL: name: test_ext_load_global_s64_from_1_align1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, addrspace 1, align 4) @@ -153,11 +190,16 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_ext_load_global_s64_from_2_align2 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-LABEL: name: test_ext_load_global_s64_from_2_align2 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, addrspace 1, align 4) @@ -170,11 +212,16 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_ext_load_global_s64_from_4_align4 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-LABEL: name: test_ext_load_global_s64_from_4_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 4, addrspace 1, align 4) @@ -187,13 +234,135 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_ext_load_global_s128_from_4_align4 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128) + ; SI-LABEL: name: test_ext_load_global_s128_from_4_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128) + ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... + +--- +name: test_load_global_s96_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_s96_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[COPY1]](s64), 0 + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1) + ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + ; VI-LABEL: name: test_load_global_s96_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s96) = G_LOAD %0 :: (load 12, addrspace 1, align 4) + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_load_global_s160_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_s160_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64) + ; SI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[MV]](s128), 0 + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1) + ; SI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD2]](s32), 128 + ; SI: S_NOP 0, implicit [[INSERT1]](s160) + ; VI-LABEL: name: test_load_global_s160_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64) + ; VI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[MV]](s128), 0 + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1) + ; VI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD2]](s32), 128 + ; VI: S_NOP 0, implicit [[INSERT1]](s160) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s160) = G_LOAD %0 :: (load 20, addrspace 1, align 4) + S_NOP 0, implicit %1 +... + +--- +name: test_load_global_s224_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_s224_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) + ; SI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[MV]](s192), 0 + ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1) + ; SI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD3]](s32), 192 + ; SI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT1]](s224), 0 + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256) + ; VI-LABEL: name: test_load_global_s224_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) + ; VI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[MV]](s192), 0 + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1) + ; VI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD3]](s32), 192 + ; VI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT1]](s224), 0 + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s224) = G_LOAD %0 :: (load 28, addrspace 1, align 4) + + %2:_(s256) = G_IMPLICIT_DEF + %3:_(s256) = G_INSERT %2, %1, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + +...