Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -126,6 +126,25 @@ void extractParts(unsigned Reg, LLT Ty, int NumParts, SmallVectorImpl &VRegs); + /// Version which handles irregular splits. + bool extractParts(unsigned Reg, LLT RegTy, LLT MainTy, + LLT &LeftoverTy, + SmallVectorImpl &VRegs, + SmallVectorImpl &LeftoverVRegs); + + /// Helper function to build a wide generic register \p DstReg of type \p + /// RegTy from smaller parts. This will produce a G_MERGE_VALUES, + /// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate + /// for the types. + /// + /// \p PartRegs must be registers of type \p PartTy. + /// + /// If \p ResultTy does not evenly break into \p PartTy sized pieces, the + /// remainder must be specified with \p LeftoverRegs of type \p LeftoverTy. + void insertParts(unsigned DstReg, LLT ResultTy, + LLT PartTy, ArrayRef PartRegs, + LLT LeftoverTy = LLT(), ArrayRef LeftoverRegs = {}); + LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -86,6 +86,91 @@ MIRBuilder.buildUnmerge(VRegs, Reg); } +bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy, + LLT MainTy, LLT &LeftoverTy, + SmallVectorImpl &VRegs, + SmallVectorImpl &LeftoverRegs) { + assert(!LeftoverTy.isValid() && "this is an out argument"); + + unsigned RegSize = RegTy.getSizeInBits(); + unsigned MainSize = MainTy.getSizeInBits(); + unsigned NumParts = RegSize / MainSize; + unsigned LeftoverSize = RegSize - NumParts * MainSize; + + // Use an unmerge when possible. + if (LeftoverSize == 0) { + for (unsigned I = 0; I < NumParts; ++I) + VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); + MIRBuilder.buildUnmerge(VRegs, Reg); + return true; + } + + if (MainTy.isVector()) { + unsigned EltSize = MainTy.getScalarSizeInBits(); + if (LeftoverSize % EltSize != 0) + return false; + LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); + } else { + LeftoverTy = LLT::scalar(LeftoverSize); + } + + // For irregular sizes, extract the individual parts. + for (unsigned I = 0; I != NumParts; ++I) { + unsigned NewReg = MRI.createGenericVirtualRegister(MainTy); + VRegs.push_back(NewReg); + MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); + } + + for (unsigned Offset = MainSize * NumParts; Offset < RegSize; + Offset += LeftoverSize) { + unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy); + LeftoverRegs.push_back(NewReg); + MIRBuilder.buildExtract(NewReg, Reg, Offset); + } + + return true; +} + +void LegalizerHelper::insertParts(unsigned DstReg, + LLT ResultTy, LLT PartTy, + ArrayRef PartRegs, + LLT LeftoverTy, + ArrayRef LeftoverRegs) { + if (!LeftoverTy.isValid()) { + assert(LeftoverRegs.empty()); + + if (PartTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, PartRegs); + else + MIRBuilder.buildBuildVector(DstReg, PartRegs); + return; + } + + unsigned PartSize = PartTy.getSizeInBits(); + unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); + + unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy); + MIRBuilder.buildUndef(CurResultReg); + + unsigned Offset = 0; + for (unsigned PartReg : PartRegs) { + unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy); + MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); + CurResultReg = NewResultReg; + Offset += PartSize; + } + + for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { + // Use the original output register for the final insert to avoid a copy. + unsigned NewResultReg = (I + 1 == E) ? + DstReg : MRI.createGenericVirtualRegister(ResultTy); + + MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); + CurResultReg = NewResultReg; + Offset += LeftoverPartSize; + } +} + static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: @@ -1696,6 +1781,36 @@ return Legalized; } +/// Try to break down \p OrigTy into \p NarrowTy sized pieces. +/// +/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, +/// with any leftover piece as type \p LeftoverTy +/// +/// Returns -1 if the breakdown is not satisfiable. +static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { + assert(!LeftoverTy.isValid() && "this is an out argument"); + + unsigned Size = OrigTy.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned NumParts = Size / NarrowSize; + unsigned LeftoverSize = Size - NumParts * NarrowSize; + assert(Size > NarrowSize); + + if (LeftoverSize == 0) + return NumParts; + + if (NarrowTy.isVector()) { + unsigned EltSize = OrigTy.getScalarSizeInBits(); + if (LeftoverSize % EltSize != 0) + return -1; + LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); + } else { + LeftoverTy = LLT::scalar(LeftoverSize); + } + + return NumParts; +} + LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -1714,42 +1829,68 @@ bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; unsigned ValReg = MI.getOperand(0).getReg(); unsigned AddrReg = MI.getOperand(1).getReg(); - unsigned NarrowSize = NarrowTy.getSizeInBits(); - unsigned Size = MRI.getType(ValReg).getSizeInBits(); - unsigned NumParts = Size / NarrowSize; - if (NumParts * NarrowSize != Size) + LLT ValTy = MRI.getType(ValReg); + + int NumParts = -1; + LLT LeftoverTy; + SmallVector NarrowRegs, NarrowLeftoverRegs; + if (IsLoad) { + NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); + } else { + if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, + NarrowLeftoverRegs)) + NumParts = NarrowRegs.size(); + } + + if (NumParts == -1) return UnableToLegalize; - SmallVector NarrowRegs; - if (!IsLoad) - extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); - - const LLT OffsetTy = - LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); - MachineFunction &MF = *MI.getMF(); - - for (unsigned Idx = 0; Idx < NumParts; ++Idx) { - unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; - unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); - unsigned NewAddrReg = 0; - MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); - MachineMemOperand &NewMMO = *MF.getMachineMemOperand( - MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), - NarrowTy.getSizeInBits() / 8, Alignment); - if (IsLoad) { - unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); - NarrowRegs.push_back(Dst); - MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); - } else { - MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); + const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); + + unsigned TotalSize = ValTy.getSizeInBits(); + + // Split the load/store into PartTy sized pieces starting at Offset. If this + // is a load, return the new registers in ValRegs. For a store, each elements + // of ValRegs should be PartTy. Returns the next offset that needs to be + // handled. + auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl &ValRegs, + unsigned Offset) -> unsigned { + MachineFunction &MF = MIRBuilder.getMF(); + unsigned PartSize = PartTy.getSizeInBits(); + for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; + Offset += PartSize, ++Idx) { + unsigned ByteSize = PartSize / 8; + unsigned ByteOffset = Offset / 8; + unsigned NewAddrReg = 0; + + MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); + + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); + + if (IsLoad) { + unsigned Dst = MRI.createGenericVirtualRegister(PartTy); + ValRegs.push_back(Dst); + MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); + } else { + MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); + } } - } + + return Offset; + }; + + unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); + + // Handle the rest of the register if this isn't an even type breakdown. + if (LeftoverTy.isValid()) + splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); + if (IsLoad) { - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); - else - MIRBuilder.buildBuildVector(ValReg, NarrowRegs); + insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, + LeftoverTy, NarrowLeftoverRegs); } + MI.eraseFromParent(); return Legalized; } Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -294,6 +294,15 @@ [](const LegalityQuery &Query) { return std::make_pair(0, LLT::scalar(32)); }) + + .fewerElementsIf([=, &ST](const LegalityQuery &Query) { + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + return (MemSize == 96) && + ST.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS; + }, + [=](const LegalityQuery &Query) { + return std::make_pair(0, V2S32); + }) .legalIf([=, &ST](const LegalityQuery &Query) { const LLT &Ty0 = Query.Types[0]; Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s --- name: test_load_global_i32 @@ -7,13 +8,16 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + ; SI-LABEL: name: test_load_global_i32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](s32) + ; VI-LABEL: name: test_load_global_i32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1) - $vgpr0 = COPY %1 ... @@ -23,13 +27,16 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_i64 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + ; SI-LABEL: name: test_load_global_i64 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](s32) + ; VI-LABEL: name: test_load_global_i64 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1) - $vgpr0 = COPY %1 ... @@ -39,13 +46,16 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_p1 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; SI-LABEL: name: test_load_global_p1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; VI-LABEL: name: test_load_global_p1 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 ... @@ -55,30 +65,35 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_p4 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; SI-LABEL: name: test_load_global_p4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; VI-LABEL: name: test_load_global_p4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 ... - --- name: test_load_global_p3 body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_p3 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](p3) + ; SI-LABEL: name: test_load_global_p3 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](p3) + ; VI-LABEL: name: test_load_global_p3 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](p3) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = G_LOAD %0 :: (load 4, addrspace 1) - $vgpr0 = COPY %1 ... @@ -88,13 +103,16 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; SI-LABEL: name: test_load_global_v2s32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI-LABEL: name: test_load_global_v2s32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, addrspace 1) - $vgpr0_vgpr1 = COPY %1 ... @@ -105,10 +123,14 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; SI-LABEL: name: test_load_global_v2s16 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; VI-LABEL: name: test_load_global_v2s16 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, addrspace 1) $vgpr0 = COPY %1 @@ -120,13 +142,22 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_load_global_v3i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; SI-LABEL: name: test_load_global_v3i32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 + ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + ; VI-LABEL: name: test_load_global_v3i32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) - $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -136,14 +167,18 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_ext_load_global_s64_from_1_align1 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-LABEL: name: test_ext_load_global_s64_from_1_align1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, addrspace 1, align 4) - $vgpr0_vgpr1 = COPY %1 ... @@ -153,14 +188,18 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_ext_load_global_s64_from_2_align2 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-LABEL: name: test_ext_load_global_s64_from_2_align2 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, addrspace 1, align 4) - $vgpr0_vgpr1 = COPY %1 ... @@ -170,14 +209,18 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_ext_load_global_s64_from_4_align4 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI-LABEL: name: test_ext_load_global_s64_from_4_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 4, addrspace 1, align 4) - $vgpr0_vgpr1 = COPY %1 ... @@ -187,13 +230,42 @@ bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_ext_load_global_s128_from_4_align4 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128) + ; SI-LABEL: name: test_ext_load_global_s128_from_4_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128) + ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... + +--- +name: test_load_global_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_v3s32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 16, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, align 8, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 + ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + ; VI-LABEL: name: test_load_global_v3s32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, addrspace 1, align 16) + $vgpr0_vgpr1_vgpr2 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -1,16 +1,20 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s - +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s --- name: test_store_global_i32 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-LABEL: name: test_store_global_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI-LABEL: name: test_store_global_i32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI-LABEL: name: test_store_global_i32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store 4, addrspace 1) @@ -22,10 +26,14 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; CHECK-LABEL: name: test_store_global_i64 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI-LABEL: name: test_store_global_i64 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI-LABEL: name: test_store_global_i64 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store 8, addrspace 1) @@ -37,10 +45,14 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; CHECK-LABEL: name: test_store_global_p1 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI-LABEL: name: test_store_global_p1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; SI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI-LABEL: name: test_store_global_p1 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; VI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store 8, addrspace 1) @@ -52,10 +64,14 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; CHECK-LABEL: name: test_store_global_p4 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3 - ; CHECK: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI-LABEL: name: test_store_global_p4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3 + ; SI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI-LABEL: name: test_store_global_p4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3 + ; VI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p4) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store 8, addrspace 1) @@ -67,10 +83,14 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-LABEL: name: test_store_global_p3 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI-LABEL: name: test_store_global_p3 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; SI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI-LABEL: name: test_store_global_p3 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; VI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 G_STORE %1, %0 :: (store 4, addrspace 1) @@ -82,10 +102,14 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; CHECK-LABEL: name: test_store_global_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI-LABEL: name: test_store_global_v2s32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI-LABEL: name: test_store_global_v2s32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store 8, addrspace 1) @@ -97,10 +121,14 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-LABEL: name: test_store_global_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI-LABEL: name: test_store_global_v2s16 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI-LABEL: name: test_store_global_v2s16 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 G_STORE %1, %0 :: (store 4, addrspace 1) @@ -112,10 +140,19 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 - ; CHECK-LABEL: name: test_store_global_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CHECK: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + ; SI-LABEL: name: test_store_global_v3s32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; SI: G_STORE [[EXTRACT1]](s32), [[GEP]](p1) :: (store 4, addrspace 1) + ; VI-LABEL: name: test_store_global_v3s32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 + ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store 12, align 4, addrspace 1) @@ -127,11 +164,16 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; CHECK-LABEL: name: test_truncstore_global_s64_to_s8 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI-LABEL: name: test_truncstore_global_s64_to_s8 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_s64_to_s8 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store 1, addrspace 1) @@ -143,11 +185,16 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; CHECK-LABEL: name: test_truncstore_global_s64_to_s16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI-LABEL: name: test_truncstore_global_s64_to_s16 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_s64_to_s16 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store 1, addrspace 1) @@ -159,11 +206,16 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; CHECK-LABEL: name: test_truncstore_global_s64_to_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI-LABEL: name: test_truncstore_global_s64_to_s32 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_s64_to_s32 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store 4, addrspace 1) @@ -175,11 +227,16 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-LABEL: name: test_truncstore_global_s128_to_s16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) - ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI-LABEL: name: test_truncstore_global_s128_to_s16 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_s128_to_s16 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store 1, addrspace 1)