Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -110,7 +110,7 @@ ArrayRef Types; struct MemDesc { - uint64_t SizeInBits; + LLT MemoryTy; uint64_t AlignInBits; AtomicOrdering Ordering; }; @@ -196,13 +196,12 @@ struct TypePairAndMemDesc { LLT Type0; LLT Type1; - uint64_t MemSize; + LLT MemTy; uint64_t Align; bool operator==(const TypePairAndMemDesc &Other) const { return Type0 == Other.Type0 && Type1 == Other.Type1 && - Align == Other.Align && - MemSize == Other.MemSize; + Align == Other.Align && MemTy == Other.MemTy; } /// \returns true if this memory access is legal with for the access described @@ -210,7 +209,9 @@ bool isCompatible(const TypePairAndMemDesc &Other) const { return Type0 == Other.Type0 && Type1 == Other.Type1 && Align >= Other.Align && - MemSize == Other.MemSize; + // FIXME: This perhaps should be stricter, but the current legality + // rules are written only considering the size. + MemTy.getSizeInBits() == Other.MemTy.getSizeInBits(); } }; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -489,7 +489,7 @@ // Check for legality. if (LI) { LegalityQuery::MemDesc MMDesc; - MMDesc.SizeInBits = MMO.getSizeInBits(); + MMDesc.MemoryTy = MMO.getMemoryType(); MMDesc.AlignInBits = MMO.getAlign().value() * 8; MMDesc.Ordering = MMO.getOrdering(); LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); @@ -3632,7 +3632,7 @@ Register Ptr = LowestIdxLoad->getOperand(1).getReg(); const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); LegalityQuery::MemDesc MMDesc; - MMDesc.SizeInBits = WideMemSizeInBits; + MMDesc.MemoryTy = Ty; MMDesc.AlignInBits = MMO.getAlign().value() * 8; MMDesc.Ordering = MMO.getOrdering(); if (!isLegalOrBeforeLegalizer( Index: llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -55,7 +55,7 @@ SmallVector TypesAndMemDesc = TypesAndMemDescInit; return [=](const LegalityQuery &Query) { TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], - Query.MMODescrs[MMOIdx].SizeInBits, + Query.MMODescrs[MMOIdx].MemoryTy, Query.MMODescrs[MMOIdx].AlignInBits}; return llvm::any_of(TypesAndMemDesc, [=](const TypePairAndMemDesc &Entry) -> bool { @@ -176,7 +176,7 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { - return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8); + return !isPowerOf2_32(Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes()); }; } Index: llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -88,7 +88,7 @@ OS << Opcode << ", MMOs={"; for (const auto &MMODescr : MMODescrs) { - OS << MMODescr.SizeInBits << ", "; + OS << MMODescr.MemoryTy << ", "; } OS << "}"; @@ -352,8 +352,8 @@ SmallVector MemDescrs; for (const auto &MMO : MI.memoperands()) - MemDescrs.push_back({8 * MMO->getSize() /* in bits */, - 8 * MMO->getAlign().value(), MMO->getOrdering()}); + MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(), + MMO->getOrdering()}); return getAction({MI.getOpcode(), Types, MemDescrs}); } Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -252,15 +252,15 @@ getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, - {s32, p0, 16, 8}, - {s32, p0, 32, 8}, - {s64, p0, 8, 2}, - {s64, p0, 16, 2}, - {s64, p0, 32, 4}, - {s64, p0, 64, 8}, - {p0, p0, 64, 8}, - {v2s32, p0, 64, 8}}) + .legalForTypesWithMemDesc({{s32, p0, s8, 8}, + {s32, p0, s16, 8}, + {s32, p0, s32, 8}, + {s64, p0, s8, 2}, + {s64, p0, s16, 2}, + {s64, p0, s32, 4}, + {s64, p0, s64, 8}, + {p0, p0, s64, 8}, + {v2s32, p0, s64, 8}}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) // TODO: We could support sum-of-pow2's but the lowering code doesn't know @@ -278,34 +278,34 @@ }; getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s8, p0, 8, 8}, - {s16, p0, 16, 8}, - {s32, p0, 32, 8}, - {s64, p0, 64, 8}, - {p0, p0, 64, 8}, - {s128, p0, 128, 8}, - {v8s8, p0, 64, 8}, - {v16s8, p0, 128, 8}, - {v4s16, p0, 64, 8}, - {v8s16, p0, 128, 8}, - {v2s32, p0, 64, 8}, - {v4s32, p0, 128, 8}, - {v2s64, p0, 128, 8}}) + .legalForTypesWithMemDesc({{s8, p0, s8, 8}, + {s16, p0, s16, 8}, + {s32, p0, s32, 8}, + {s64, p0, s64, 8}, + {p0, p0, s64, 8}, + {s128, p0, s128, 8}, + {v8s8, p0, s64, 8}, + {v16s8, p0, s128, 8}, + {v4s16, p0, s64, 8}, + {v8s16, p0, s128, 8}, + {v2s32, p0, s64, 8}, + {v4s32, p0, s128, 8}, + {v2s64, p0, s128, 8}}) // These extends are also legal - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) + .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}}) .clampScalar(0, s8, s64) .lowerIfMemSizeNotPow2() .widenScalarToNextPow2(0) .narrowScalarIf([=](const LegalityQuery &Query) { // Clamp extending load results to 32-bits. return Query.Types[0].isScalar() && - Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits && - Query.Types[0].getSizeInBits() > 32; + Query.Types[0] != Query.MMODescrs[0].MemoryTy && + Query.Types[0].getSizeInBits() > 32; }, changeTo(0, s32)) // Lower any any-extending loads left into G_ANYEXT and G_LOAD .lowerIf([=](const LegalityQuery &Query) { - return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; + return Query.Types[0] != Query.MMODescrs[0].MemoryTy; }) .clampMaxNumElements(0, s8, 16) .clampMaxNumElements(0, s16, 8) @@ -314,31 +314,31 @@ .customIf(IsPtrVecPred); getActionDefinitionsBuilder(G_STORE) - .legalForTypesWithMemDesc({{s8, p0, 8, 8}, - {s16, p0, 8, 8}, // truncstorei8 from s16 - {s32, p0, 8, 8}, // truncstorei8 from s32 - {s64, p0, 8, 8}, // truncstorei8 from s64 - {s16, p0, 16, 8}, - {s32, p0, 16, 8}, // truncstorei16 from s32 - {s64, p0, 16, 8}, // truncstorei16 from s64 - {s32, p0, 8, 8}, - {s32, p0, 16, 8}, - {s32, p0, 32, 8}, - {s64, p0, 64, 8}, - {s64, p0, 32, 8}, // truncstorei32 from s64 - {p0, p0, 64, 8}, - {s128, p0, 128, 8}, - {v16s8, p0, 128, 8}, - {v8s8, p0, 64, 8}, - {v4s16, p0, 64, 8}, - {v8s16, p0, 128, 8}, - {v2s32, p0, 64, 8}, - {v4s32, p0, 128, 8}, - {v2s64, p0, 128, 8}}) + .legalForTypesWithMemDesc({{s8, p0, s8, 8}, + {s16, p0, s8, 8}, // truncstorei8 from s16 + {s32, p0, s8, 8}, // truncstorei8 from s32 + {s64, p0, s8, 8}, // truncstorei8 from s64 + {s16, p0, s16, 8}, + {s32, p0, s16, 8}, // truncstorei16 from s32 + {s64, p0, s16, 8}, // truncstorei16 from s64 + {s32, p0, s8, 8}, + {s32, p0, s16, 8}, + {s32, p0, s32, 8}, + {s64, p0, s64, 8}, + {s64, p0, s32, 8}, // truncstorei32 from s64 + {p0, p0, s64, 8}, + {s128, p0, s128, 8}, + {v16s8, p0, s128, 8}, + {v8s8, p0, s64, 8}, + {v4s16, p0, s64, 8}, + {v8s16, p0, s128, 8}, + {v2s32, p0, s64, 8}, + {v4s32, p0, s128, 8}, + {v2s64, p0, s128, 8}}) .clampScalar(0, s8, s64) .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].isScalar() && - Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; + Query.Types[0] != Query.MMODescrs[0].MemoryTy; }) // Maximum: sN * k = 128 .clampMaxNumElements(0, s8, 16) Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -228,7 +228,7 @@ return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; return !Ty.isVector() && Ty.getSizeInBits() > 32 && - Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits(); + Query.MMODescrs[0].MemoryTy.getSizeInBits() < Ty.getSizeInBits(); }; } @@ -268,7 +268,7 @@ const bool IsLoad = Query.Opcode != AMDGPU::G_STORE; unsigned RegSize = Ty.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); unsigned AlignBits = Query.MMODescrs[0].AlignInBits; unsigned AS = Query.Types[1].getAddressSpace(); @@ -357,23 +357,28 @@ /// Return true if a load or store of the type should be lowered with a bitcast /// to a different type. static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty, - const unsigned MemSizeInBits) { + const LLT MemTy) { + const unsigned MemSizeInBits = MemTy.getSizeInBits(); const unsigned Size = Ty.getSizeInBits(); if (Size != MemSizeInBits) return Size <= 32 && Ty.isVector(); if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty)) return true; - return Ty.isVector() && (Size <= 32 || isRegisterSize(Size)) && + + // Don't try to handle bitcasting vector ext loads for now. + return Ty.isVector() && (!MemTy.isVector() || MemTy == Ty) && + (Size <= 32 || isRegisterSize(Size)) && !isRegisterVectorElementType(Ty.getElementType()); } /// Return true if we should legalize a load by widening an odd sized memory /// access up to the alignment. Note this case when the memory access itself /// changes, not the size of the result register. -static bool shouldWidenLoad(const GCNSubtarget &ST, unsigned SizeInBits, +static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy, unsigned AlignInBits, unsigned AddrSpace, unsigned Opcode) { + unsigned SizeInBits = MemoryTy.getSizeInBits(); // We don't want to widen cases that are naturally legal. if (isPowerOf2_32(SizeInBits)) return false; @@ -409,7 +414,7 @@ if (Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic) return false; - return shouldWidenLoad(ST, Query.MMODescrs[0].SizeInBits, + return shouldWidenLoad(ST, Query.MMODescrs[0].MemoryTy, Query.MMODescrs[0].AlignInBits, Query.Types[1].getAddressSpace(), Opcode); } @@ -1044,7 +1049,7 @@ const LLT DstTy = Query.Types[0]; // Split vector extloads. - unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); unsigned AlignBits = Query.MMODescrs[0].AlignInBits; if (MemSize < DstTy.getSizeInBits()) @@ -1093,32 +1098,32 @@ auto &Actions = getActionDefinitionsBuilder(Op); // Explicitly list some common cases. // TODO: Does this help compile time at all? - Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32}, - {V2S32, GlobalPtr, 64, GlobalAlign32}, - {V4S32, GlobalPtr, 128, GlobalAlign32}, - {S64, GlobalPtr, 64, GlobalAlign32}, - {V2S64, GlobalPtr, 128, GlobalAlign32}, - {V2S16, GlobalPtr, 32, GlobalAlign32}, - {S32, GlobalPtr, 8, GlobalAlign8}, - {S32, GlobalPtr, 16, GlobalAlign16}, - - {S32, LocalPtr, 32, 32}, - {S64, LocalPtr, 64, 32}, - {V2S32, LocalPtr, 64, 32}, - {S32, LocalPtr, 8, 8}, - {S32, LocalPtr, 16, 16}, - {V2S16, LocalPtr, 32, 32}, - - {S32, PrivatePtr, 32, 32}, - {S32, PrivatePtr, 8, 8}, - {S32, PrivatePtr, 16, 16}, - {V2S16, PrivatePtr, 32, 32}, - - {S32, ConstantPtr, 32, GlobalAlign32}, - {V2S32, ConstantPtr, 64, GlobalAlign32}, - {V4S32, ConstantPtr, 128, GlobalAlign32}, - {S64, ConstantPtr, 64, GlobalAlign32}, - {V2S32, ConstantPtr, 32, GlobalAlign32}}); + Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, S32, GlobalAlign32}, + {V2S32, GlobalPtr, V2S32, GlobalAlign32}, + {V4S32, GlobalPtr, V4S32, GlobalAlign32}, + {S64, GlobalPtr, S64, GlobalAlign32}, + {V2S64, GlobalPtr, V2S64, GlobalAlign32}, + {V2S16, GlobalPtr, V2S16, GlobalAlign32}, + {S32, GlobalPtr, S8, GlobalAlign8}, + {S32, GlobalPtr, S16, GlobalAlign16}, + + {S32, LocalPtr, S32, 32}, + {S64, LocalPtr, S64, 32}, + {V2S32, LocalPtr, V2S32, 32}, + {S32, LocalPtr, S8, 8}, + {S32, LocalPtr, S16, 16}, + {V2S16, LocalPtr, S32, 32}, + + {S32, PrivatePtr, S32, 32}, + {S32, PrivatePtr, S8, 8}, + {S32, PrivatePtr, S16, 16}, + {V2S16, PrivatePtr, S32, 32}, + + {S32, ConstantPtr, S32, GlobalAlign32}, + {V2S32, ConstantPtr, V2S32, GlobalAlign32}, + {V4S32, ConstantPtr, V4S32, GlobalAlign32}, + {S64, ConstantPtr, S64, GlobalAlign32}, + {V2S32, ConstantPtr, V2S32, GlobalAlign32}}); Actions.legalIf( [=](const LegalityQuery &Query) -> bool { return isLoadStoreLegal(ST, Query); @@ -1140,7 +1145,7 @@ Actions.bitcastIf( [=](const LegalityQuery &Query) -> bool { return shouldBitcastLoadStoreType(ST, Query.Types[0], - Query.MMODescrs[0].SizeInBits); + Query.MMODescrs[0].MemoryTy); }, bitcastToRegisterType(0)); if (!IsStore) { @@ -1163,7 +1168,7 @@ const LLT PtrTy = Query.Types[1]; const unsigned DstSize = DstTy.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); // Split extloads. if (DstSize > MemSize) @@ -1211,7 +1216,8 @@ // FIXME: 3 element stores scalarized on SI // Split if it's too large for the address space. - if (Query.MMODescrs[0].SizeInBits > MaxSize) { + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); + if (MemSize > MaxSize) { unsigned NumElts = DstTy.getNumElements(); unsigned EltSize = EltTy.getSizeInBits(); @@ -1220,7 +1226,7 @@ 0, LLT::scalarOrVector(MaxSize / EltSize, EltTy)); } - unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize; + unsigned NumPieces = MemSize / MaxSize; // FIXME: Refine when odd breakdowns handled // The scalars will need to be re-legalized. @@ -1233,7 +1239,6 @@ } // FIXME: We could probably handle weird extending loads better. - unsigned MemSize = Query.MMODescrs[0].SizeInBits; if (DstTy.getSizeInBits() > MemSize) return std::make_pair(0, EltTy); @@ -1267,14 +1272,14 @@ } auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8}, - {S32, GlobalPtr, 16, 2 * 8}, - {S32, LocalPtr, 8, 8}, - {S32, LocalPtr, 16, 16}, - {S32, PrivatePtr, 8, 8}, - {S32, PrivatePtr, 16, 16}, - {S32, ConstantPtr, 8, 8}, - {S32, ConstantPtr, 16, 2 * 8}}) + .legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8}, + {S32, GlobalPtr, S16, 2 * 8}, + {S32, LocalPtr, S8, 8}, + {S32, LocalPtr, S16, 16}, + {S32, PrivatePtr, S8, 8}, + {S32, PrivatePtr, S16, 16}, + {S32, ConstantPtr, S8, 8}, + {S32, ConstantPtr, S16, 2 * 8}}) .legalIf( [=](const LegalityQuery &Query) -> bool { return isLoadStoreLegal(ST, Query); @@ -1282,7 +1287,7 @@ if (ST.hasFlatAddressSpace()) { ExtLoads.legalForTypesWithMemDesc( - {{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}}); + {{S32, FlatPtr, S8, 8}, {S32, FlatPtr, S16, 16}}); } // Constant 32-bit is handled by addrspacecasting the 32-bit pointer to @@ -2436,12 +2441,13 @@ MachineMemOperand *MMO = *MI.memoperands_begin(); const unsigned ValSize = ValTy.getSizeInBits(); - const unsigned MemSize = 8 * MMO->getSize(); + const LLT MemTy = MMO->getMemoryType(); const Align MemAlign = MMO->getAlign(); + const unsigned MemSize = MemTy.getSizeInBits(); const unsigned AlignInBits = 8 * MemAlign.value(); // Widen non-power-of-2 loads to the alignment if needed - if (shouldWidenLoad(ST, MemSize, AlignInBits, AddrSpace, MI.getOpcode())) { + if (shouldWidenLoad(ST, MemTy, AlignInBits, AddrSpace, MI.getOpcode())) { const unsigned WideMemSize = PowerOf2Ceil(MemSize); // This was already the correct extending load result type, so just adjust @@ -4556,7 +4562,7 @@ Observer.changingInstr(MI); - if (shouldBitcastLoadStoreType(ST, Ty, Size)) { + if (shouldBitcastLoadStoreType(ST, Ty, LLT::scalar(Size))) { Ty = getBitcastRegisterType(Ty); Helper.bitcastDst(MI, Ty, 0); Dst = MI.getOperand(0).getReg(); Index: llvm/lib/Target/ARM/ARMInstructionSelector.cpp =================================================================== --- llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -1096,24 +1096,6 @@ if (NewOpc == G_LOAD || NewOpc == G_STORE) return false; - if (ValSize == 1 && NewOpc == Opcodes.STORE8) { - // Before storing a 1-bit value, make sure to clear out any unneeded bits. - Register OriginalValue = I.getOperand(0).getReg(); - - Register ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass); - I.getOperand(0).setReg(ValueToStore); - - auto InsertBefore = I.getIterator(); - auto AndI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.AND)) - .addDef(ValueToStore) - .addUse(OriginalValue) - .addImm(1) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - if (!constrainSelectedInstRegOperands(*AndI, TII, TRI, RBI)) - return false; - } - I.setDesc(TII.get(NewOpc)); if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH) Index: llvm/lib/Target/ARM/ARMLegalizerInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -149,11 +149,10 @@ // We're keeping these builders around because we'll want to add support for // floating point to them. auto &LoadStoreBuilder = getActionDefinitionsBuilder({G_LOAD, G_STORE}) - .legalForTypesWithMemDesc({{s1, p0, 8, 8}, - {s8, p0, 8, 8}, - {s16, p0, 16, 8}, - {s32, p0, 32, 8}, - {p0, p0, 32, 8}}) + .legalForTypesWithMemDesc({{s8, p0, s8, 8}, + {s16, p0, s16, 8}, + {s32, p0, s32, 8}, + {p0, p0, p0, 8}}) .unsupportedIfMemSizeNotPow2(); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); @@ -176,7 +175,7 @@ .legalFor({s32, s64}); LoadStoreBuilder - .legalForTypesWithMemDesc({{s64, p0, 64, 32}}) + .legalForTypesWithMemDesc({{s64, p0, s64, 32}}) .maxScalar(0, s32); PhiBuilder.legalFor({s64}); @@ -221,6 +220,9 @@ .libcallForCartesianProduct({s32, s64}, {s32}); } + // Just expand whatever loads and stores are left. + LoadStoreBuilder.lower(); + if (!ST.useSoftFloat() && ST.hasVFP4Base()) getActionDefinitionsBuilder(G_FMA).legalFor({s32, s64}); else Index: llvm/lib/Target/Mips/MipsLegalizerInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -37,7 +37,7 @@ static bool CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query, std::initializer_list SupportedValues) { - unsigned QueryMemSize = Query.MMODescrs[0].SizeInBits; + unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); // Non power of two memory access is never legal. if (!isPowerOf2_64(QueryMemSize)) @@ -67,6 +67,8 @@ using namespace TargetOpcode; const LLT s1 = LLT::scalar(1); + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); const LLT v16s8 = LLT::vector(16, 8); @@ -125,13 +127,13 @@ return false; unsigned Size = Query.Types[0].getSizeInBits(); - unsigned QueryMemSize = Query.MMODescrs[0].SizeInBits; + unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); assert(QueryMemSize <= Size && "Scalar can't hold MemSize"); if (Size > 64 || QueryMemSize > 64) return false; - if (!isPowerOf2_64(Query.MMODescrs[0].SizeInBits)) + if (!isPowerOf2_64(Query.MMODescrs[0].MemoryTy.getSizeInBits())) return true; if (!ST.systemSupportsUnalignedAccess() && @@ -143,7 +145,8 @@ return false; }) - .minScalar(0, s32); + .minScalar(0, s32) + .lower(); getActionDefinitionsBuilder(G_IMPLICIT_DEF) .legalFor({s32, s64}); @@ -155,8 +158,8 @@ .legalFor({{s64, s32}}); getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD}) - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, - {s32, p0, 16, 8}}) + .legalForTypesWithMemDesc({{s32, p0, s8, 8}, + {s32, p0, s16, 8}}) .clampScalar(0, s32, s32); getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -8,8 +8,9 @@ ; CHECK-LABEL: name: test_load ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s1)) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8)) + ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[LOAD]], 1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASSERT_ZEXT]](s8) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) @@ -63,11 +64,12 @@ ; CHECK-LABEL: name: test_store ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s1)) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8)) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY]](p0) :: (store (s8)) ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir @@ -11,8 +11,9 @@ ; CHECK-LABEL: name: test_load_trunc ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s10)) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s16)) + ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD]], 10 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s16) ; CHECK: RET_ReallyLR implicit [[TRUNC]](s1) %0:_(p0) = G_FRAME_INDEX %stack.0 %1:_(s10) = G_LOAD %0(p0) :: (load (s10)) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -12,21 +12,21 @@ ; CI-LABEL: name: test_load_constant_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_constant_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_constant_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -45,21 +45,21 @@ ; CI-LABEL: name: test_load_constant_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_constant_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_constant_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -12,21 +12,21 @@ ; CI-LABEL: name: test_load_flat_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_flat_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_flat_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -45,21 +45,21 @@ ; CI-LABEL: name: test_load_flat_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_flat_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_flat_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -19,42 +19,42 @@ ; SI-LABEL: name: test_load_global_s1_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-HSA-LABEL: name: test_load_global_s1_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-HSA: $vgpr0 = COPY [[AND]](s32) ; CI-MESA-LABEL: name: test_load_global_s1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-MESA: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_global_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s1_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-HSA: $vgpr0 = COPY [[AND]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -73,42 +73,42 @@ ; SI-LABEL: name: test_load_global_s2_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-HSA-LABEL: name: test_load_global_s2_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-HSA: $vgpr0 = COPY [[AND]](s32) ; CI-MESA-LABEL: name: test_load_global_s2_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-MESA: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_global_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s2_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-HSA: $vgpr0 = COPY [[AND]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s2_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -13,28 +13,28 @@ ; SI-LABEL: name: test_load_private_s1_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_private_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_private_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_private_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -53,28 +53,28 @@ ; SI-LABEL: name: test_load_private_s2_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_private_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_private_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_private_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir @@ -23,12 +23,14 @@ ; GFX8-LABEL: name: test_sextload_global_i32_i1 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1 + ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i1 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1 + ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s1), addrspace 1) $vgpr0 = COPY %1 @@ -42,12 +44,14 @@ ; GFX8-LABEL: name: test_sextload_global_i32_i7 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7 + ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i7 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7 + ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s7), addrspace 1) $vgpr0 = COPY %1 @@ -79,12 +83,14 @@ ; GFX8-LABEL: name: test_sextload_global_i32_i30 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30 + ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i30 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30 + ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s30), addrspace 1) $vgpr0 = COPY %1 @@ -98,12 +104,14 @@ ; GFX8-LABEL: name: test_sextload_global_i32_i31 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31 + ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i31 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31 + ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s31), addrspace 1) $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -17,28 +17,32 @@ ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; SI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; CI-LABEL: name: test_store_global_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_store_global_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; VI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; GFX9-LABEL: name: test_store_global_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; GFX9: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s1) = G_TRUNC %1 @@ -55,22 +59,30 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; CI-LABEL: name: test_store_global_s7_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_store_global_s7_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; GFX9-LABEL: name: test_store_global_s7_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s7) = G_TRUNC %1 @@ -435,22 +447,30 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; CI-LABEL: name: test_store_global_s25_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; VI-LABEL: name: test_store_global_s25_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; GFX9-LABEL: name: test_store_global_s25_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s25) = G_TRUNC %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -275,14 +275,16 @@ ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; SI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_store_global_i1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; VI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s1) = G_TRUNC %1 @@ -970,7 +972,9 @@ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; SI: G_STORE [[AND4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -996,7 +1000,9 @@ ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: G_STORE [[AND4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 @@ -1039,7 +1045,9 @@ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4095 + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; SI: G_STORE [[AND4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -1065,7 +1073,9 @@ ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4095 + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: G_STORE [[AND4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir @@ -23,12 +23,14 @@ ; GFX8-LABEL: name: test_zextload_global_i32_i1 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1 + ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i1 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1 + ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s1), addrspace 1) $vgpr0 = COPY %1 @@ -42,12 +44,14 @@ ; GFX8-LABEL: name: test_zextload_global_i32_i7 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7 + ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i7 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7 + ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s7), addrspace 1) $vgpr0 = COPY %1 @@ -80,12 +84,14 @@ ; GFX8-LABEL: name: test_zextload_global_i32_i30 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30 + ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i30 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30 + ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s30), addrspace 1) $vgpr0 = COPY %1 @@ -99,12 +105,14 @@ ; GFX8-LABEL: name: test_zextload_global_i32_i31 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31 + ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i31 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31 + ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s31), addrspace 1) $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -3247,11 +3247,13 @@ ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 ; GFX8-NEXT: v_xor_b32_e32 v3, s8, v3 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s8, v3 +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v3 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: flat_store_byte v[0:1], v3 +; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: sdivrem_i3: @@ -3292,12 +3294,14 @@ ; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 ; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v2, v0, s[0:1] -; GFX9-NEXT: global_store_byte v2, v1, s[2:3] +; GFX9-NEXT: v_and_b32_e32 v0, 7, v1 +; GFX9-NEXT: global_store_byte v2, v0, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: sdivrem_i3: @@ -3307,40 +3311,42 @@ ; GFX10-NEXT: s_lshr_b32 s1, s0, 8 ; GFX10-NEXT: s_bfe_i32 s0, s0, 0x30000 ; GFX10-NEXT: s_bfe_i32 s1, s1, 0x30000 -; GFX10-NEXT: s_ashr_i32 s8, s0, 31 +; GFX10-NEXT: s_ashr_i32 s7, s0, 31 ; GFX10-NEXT: s_ashr_i32 s6, s1, 31 -; GFX10-NEXT: s_add_i32 s0, s0, s8 +; GFX10-NEXT: s_add_i32 s0, s0, s7 ; GFX10-NEXT: s_add_i32 s1, s1, s6 -; GFX10-NEXT: s_xor_b32 s0, s0, s8 -; GFX10-NEXT: s_xor_b32 s7, s1, s6 -; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 -; GFX10-NEXT: s_sub_i32 s1, 0, s7 +; GFX10-NEXT: s_xor_b32 s0, s0, s7 +; GFX10-NEXT: s_xor_b32 s1, s1, s6 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 +; GFX10-NEXT: s_sub_i32 s2, 0, s1 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 ; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, v0, s7 +; GFX10-NEXT: v_mul_lo_u32 v1, v0, s1 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX10-NEXT: s_xor_b32 s4, s8, s6 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX10-NEXT: s_xor_b32 s4, s7, s6 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX10-NEXT: v_xor_b32_e32 v1, s7, v1 ; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s8, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s7, v1 +; GFX10-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_store_byte v2, v0, s[0:1] ; GFX10-NEXT: global_store_byte v2, v1, s[2:3] @@ -3352,11 +3358,155 @@ ret void } -; FIXME: Reenable test -; define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { -; %div = sdiv i27 %x, %y -; store i27 %div, i27 addrspace(1)* %out0 -; %rem = srem i27 %x, %y -; store i27 %rem, i27 addrspace(1)* %out1 -; ret void -; } +define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { +; GFX8-LABEL: sdivrem_i27: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX8-NEXT: s_mov_b32 s9, 0x7ffffff +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_bfe_i32 s1, s1, 0x1b0000 +; GFX8-NEXT: s_ashr_i32 s2, s1, 31 +; GFX8-NEXT: s_add_i32 s1, s1, s2 +; GFX8-NEXT: s_xor_b32 s3, s1, s2 +; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX8-NEXT: s_sub_i32 s1, 0, s3 +; GFX8-NEXT: s_bfe_i32 s0, s0, 0x1b0000 +; GFX8-NEXT: s_ashr_i32 s8, s0, 31 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX8-NEXT: s_add_i32 s0, s0, s8 +; GFX8-NEXT: s_xor_b32 s0, s0, s8 +; GFX8-NEXT: s_xor_b32 s2, s8, s2 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s0, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e64 v2, s[0:1], s3, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e64 v2, s[0:1], s3, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX8-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s2, v0 +; GFX8-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s8, v1 +; GFX8-NEXT: v_and_b32_e32 v3, s9, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: flat_store_dword v[0:1], v3 +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: v_and_b32_e32 v2, s9, v2 +; GFX8-NEXT: v_mov_b32_e32 v1, s7 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: sdivrem_i27: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_bfe_i32 s1, s1, 0x1b0000 +; GFX9-NEXT: s_ashr_i32 s6, s1, 31 +; GFX9-NEXT: s_add_i32 s1, s1, s6 +; GFX9-NEXT: s_xor_b32 s7, s1, s6 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX9-NEXT: s_sub_i32 s1, 0, s7 +; GFX9-NEXT: s_bfe_i32 s0, s0, 0x1b0000 +; GFX9-NEXT: s_ashr_i32 s8, s0, 31 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: s_add_i32 s0, s0, s8 +; GFX9-NEXT: s_xor_b32 s9, s0, s8 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: s_xor_b32 s5, s8, s6 +; GFX9-NEXT: s_mov_b32 s4, 0x7ffffff +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, s5, v0 +; GFX9-NEXT: v_subrev_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] +; GFX9-NEXT: v_and_b32_e32 v0, s4, v1 +; GFX9-NEXT: global_store_dword v2, v0, s[2:3] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sdivrem_i27: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_bfe_i32 s1, s1, 0x1b0000 +; GFX10-NEXT: s_bfe_i32 s0, s0, 0x1b0000 +; GFX10-NEXT: s_ashr_i32 s6, s1, 31 +; GFX10-NEXT: s_ashr_i32 s7, s0, 31 +; GFX10-NEXT: s_add_i32 s1, s1, s6 +; GFX10-NEXT: s_add_i32 s0, s0, s7 +; GFX10-NEXT: s_xor_b32 s1, s1, s6 +; GFX10-NEXT: s_xor_b32 s0, s0, s7 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 +; GFX10-NEXT: s_sub_i32 s2, 0, s1 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 +; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, v0, s1 +; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX10-NEXT: s_xor_b32 s4, s7, s6 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX10-NEXT: v_xor_b32_e32 v1, s7, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s7, v1 +; GFX10-NEXT: s_mov_b32 s4, 0x7ffffff +; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: global_store_dword v2, v0, s[0:1] +; GFX10-NEXT: global_store_dword v2, v1, s[2:3] +; GFX10-NEXT: s_endpgm + %div = sdiv i27 %x, %y + store i27 %div, i27 addrspace(1)* %out0 + %rem = srem i27 %x, %y + store i27 %rem, i27 addrspace(1)* %out1 + ret void +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -2565,11 +2565,13 @@ ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v3 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: flat_store_byte v[0:1], v3 +; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: udivrem_i3: @@ -2601,10 +2603,12 @@ ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: v_subrev_u32_e32 v3, s6, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v2, v0, s[0:1] -; GFX9-NEXT: global_store_byte v2, v1, s[2:3] +; GFX9-NEXT: v_and_b32_e32 v0, 7, v1 +; GFX9-NEXT: global_store_byte v2, v0, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: udivrem_i3: @@ -2635,8 +2639,10 @@ ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v1 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s6, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_store_byte v2, v0, s[0:1] ; GFX10-NEXT: global_store_byte v2, v1, s[2:3] @@ -2648,11 +2654,123 @@ ret void } -; FIXME: Reenable test -; define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { -; %div = udiv i27 %x, %y -; store i27 %div, i27 addrspace(1)* %out0 -; %rem = urem i27 %x, %y -; store i27 %rem, i27 addrspace(1)* %out1 -; ret void -; } +define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { +; GFX8-LABEL: udivrem_i27: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX8-NEXT: s_mov_b32 s6, 0x7ffffff +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_and_b32 s7, s1, s6 +; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX8-NEXT: s_sub_i32 s1, 0, s7 +; GFX8-NEXT: s_and_b32 s8, s0, s6 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: v_mul_hi_u32 v2, s8, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_mul_lo_u32 v3, v2, s7 +; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s8, v3 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 +; GFX8-NEXT: v_and_b32_e32 v2, s6, v2 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_and_b32_e32 v2, s6, v3 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: udivrem_i27: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX9-NEXT: s_mov_b32 s6, 0x7ffffff +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_and_b32 s7, s1, s6 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX9-NEXT: s_sub_i32 s1, 0, s7 +; GFX9-NEXT: s_and_b32 s8, s0, s6 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s8, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s8, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 +; GFX9-NEXT: v_and_b32_e32 v0, s6, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] +; GFX9-NEXT: v_and_b32_e32 v0, s6, v1 +; GFX9-NEXT: global_store_dword v2, v0, s[2:3] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: udivrem_i27: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX10-NEXT: s_mov_b32 s6, 0x7ffffff +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_and_b32 s7, s1, s6 +; GFX10-NEXT: s_and_b32 s0, s0, s6 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX10-NEXT: s_sub_i32 s1, 0, s7 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, v0, s7 +; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_and_b32_e32 v0, s6, v0 +; GFX10-NEXT: v_and_b32_e32 v1, s6, v1 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: global_store_dword v2, v0, s[0:1] +; GFX10-NEXT: global_store_dword v2, v1, s[2:3] +; GFX10-NEXT: s_endpgm + %div = udiv i27 %x, %y + store i27 %div, i27 addrspace(1)* %out0 + %rem = urem i27 %x, %y + store i27 %rem, i27 addrspace(1)* %out1 + ret void +} Index: llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir =================================================================== --- llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -938,7 +938,7 @@ ; CHECK: [[LDRi12_:%[0-9]+]]:gpr = LDRi12 [[ADDri]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[LDRi12_]] ; CHECK: [[ADDri1:%[0-9]+]]:gpr = ADDri %fixed-stack.2, 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[LDRBi12_:%[0-9]+]]:gprnopc = LDRBi12 [[ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s1)) + ; CHECK: [[LDRBi12_:%[0-9]+]]:gprnopc = LDRBi12 [[ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s8)) ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY [[LDRBi12_]] ; CHECK: $r0 = COPY [[COPY]] ; CHECK: BX_RET 14 /* CC::al */, $noreg @@ -950,9 +950,9 @@ %2(p0) = G_FRAME_INDEX %fixed-stack.0 - %3(s1) = G_LOAD %2(p0) :: (load (s1)) + %3(s8) = G_LOAD %2(p0) :: (load (s8)) - %4(s32) = G_ANYEXT %3(s1) + %4(s32) = G_ANYEXT %3(s8) $r0 = COPY %4 @@ -977,8 +977,6 @@ ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:gprnopc = COPY [[COPY1]] - ; CHECK: [[ANDri:%[0-9]+]]:gprnopc = ANDri [[COPY1]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRBi12 [[ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s1)) ; CHECK: STRBi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: STRH [[COPY1]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: STRi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32)) @@ -987,14 +985,10 @@ %3(s32) = COPY $r1 - %4(s1) = G_TRUNC %3(s32) - %1(s8) = G_TRUNC %3(s32) %2(s16) = G_TRUNC %3(s32) - G_STORE %4(s1), %0(p0) :: (store (s1)) - G_STORE %1(s8), %0(p0) :: (store (s8)) G_STORE %2(s16), %0(p0) :: (store (s16)) Index: llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir =================================================================== --- llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir +++ llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir @@ -41,14 +41,17 @@ ; CHECK: {{%[0-9]+}}:_(s8) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i8 - %3(s1) = G_CONSTANT i1 1 - G_STORE %3(s1), %4(p0) :: (store (s1)) + %3:_(s1) = G_CONSTANT i1 1 + %6:_(s32) = G_CONSTANT i32 99 + %7:_(s32) = G_SELECT %3, %0, %6 + G_STORE %7(s32), %4(p0) :: (store (s32)) ; CHECK-NOT: G_CONSTANT i1 ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: {{%[0-9]+}}:_(s1) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i1 %5(p0) = G_CONSTANT i32 0 + G_STORE %5(p0), %4(p0) :: (store (p0)) ; CHECK: {{%[0-9]+}}:_(p0) = G_CONSTANT i32 0 Index: llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir =================================================================== --- llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir +++ llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir @@ -143,20 +143,18 @@ regBankSelected: false selected: false tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } body: | bb.0: - liveins: $r0 + liveins: $r0, $r1 - %0(p0) = COPY $r0 - %1(s1) = G_LOAD %0(p0) :: (load (s1)) - %2(s16) = G_SEXT %1(s1) + %0:_(p0) = COPY $r0 + %1:_(s32) = COPY $r1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %1, %2 + %4:_(s16) = G_SEXT %3(s1) ; G_SEXT from s1 to s16 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s16) = G_SEXT {{%[0-9]+}}(s1) - G_STORE %2(s16), %0(p0) :: (store (s16)) + G_STORE %4(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... --- @@ -167,20 +165,18 @@ regBankSelected: false selected: false tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } body: | bb.0: - liveins: $r0 + liveins: $r0, $r1 - %0(p0) = COPY $r0 - %1(s1) = G_LOAD %0(p0) :: (load (s1)) - %2(s8) = G_ANYEXT %1 + %0:_(p0) = COPY $r0 + %1:_(s32) = COPY $r1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %1, %2 + %4:_(s8) = G_ANYEXT %3 ; G_ANYEXT from s1 to s8 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s8) = G_ANYEXT {{%[0-9]+}}(s1) - G_STORE %2(s8), %0(p0) :: (store (s8)) + G_STORE %4(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... --- Index: llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir =================================================================== --- llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir +++ llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple thumbv7-- -run-pass=legalizer %s -o - | FileCheck %s --- | define void @test_legal_loads_stores() { ret void } + define void @test_load_store_s1() { ret void } define void @test_load_from_stack() { ret void } define void @test_load_store_64_vfp() #0 { ret void } @@ -34,16 +35,14 @@ liveins: $r0 ; These are all legal, so we should find them unchanged in the output - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s32), %0(p0) - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s16), %0(p0) - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s8), %0(p0) - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s1), %0(p0) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s32), %0(p0) :: (store (s32)) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s16), %0(p0) :: (store (s16)) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s8), %0(p0) :: (store (s8)) ; CHECK-DAG: G_STORE {{%[0-9]+}}(p0), %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_LOAD %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s16) = G_LOAD %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s8) = G_LOAD %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s1) = G_LOAD %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(p0) = G_LOAD %0(p0) + ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_LOAD %0(p0) :: (load (s32)) + ; CHECK-DAG: {{%[0-9]+}}:_(s16) = G_LOAD %0(p0) :: (load (s16)) + ; CHECK-DAG: {{%[0-9]+}}:_(s8) = G_LOAD %0(p0) :: (load (s8)) + ; CHECK-DAG: {{%[0-9]+}}:_(p0) = G_LOAD %0(p0) :: (load (p0)) %0(p0) = COPY $r0 %2(s32) = G_LOAD %0(p0) :: (load (s32)) G_STORE %2(s32), %0(p0) :: (store (s32)) @@ -51,12 +50,34 @@ G_STORE %3(s16), %0(p0) :: (store (s16)) %4(s8) = G_LOAD %0(p0) :: (load (s8)) G_STORE %4(s8), %0(p0) :: (store (s8)) - %5(s1) = G_LOAD %0(p0) :: (load (s1)) - G_STORE %5(s1), %0(p0) :: (store (s1)) %6(p0) = G_LOAD %0(p0) :: (load (p0)) G_STORE %6(p0), %0(p0) :: (store (p0)) BX_RET 14, $noreg ... + +--- +name: test_load_store_s1 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0 + + ; CHECK: [[LD:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8)) + ; CHECK: [[ONE:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LD]] + ; CHECK: [[COPYONE:%[0-9]+]]:_(s32) = COPY [[ONE]] + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXT]], [[COPYONE]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]] + ; CHECK: G_STORE [[TRUNC]](s8), {{%[0-9]+}}(p0) :: (store (s8)) + %0:_(p0) = COPY $r0 + %5:_(s1) = G_LOAD %0(p0) :: (load (s1)) + G_STORE %5(s1), %0(p0) :: (store (s1)) + BX_RET 14, $noreg +... --- name: test_load_from_stack # CHECK-LABEL: name: test_load_from_stack Index: llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir =================================================================== --- llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -400,8 +400,7 @@ # CHECK: - { id: 2, class: gprb, preferred-register: '' } # CHECK: - { id: 3, class: gprb, preferred-register: '' } # CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } -# CHECK: - { id: 6, class: fprb, preferred-register: '' } +# CHECK: - { id: 5, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -415,12 +414,11 @@ bb.0: liveins: $r0 %0(p0) = COPY $r0 - %6(s64) = G_LOAD %0 :: (load (s64)) + %5(s64) = G_LOAD %0 :: (load (s64)) %1(s32) = G_LOAD %0 :: (load (s32)) %2(s16) = G_LOAD %0 :: (load (s16)) %3(s8) = G_LOAD %0 :: (load (s8)) - %4(s1) = G_LOAD %0 :: (load (s1)) - %5(p0) = G_LOAD %0 :: (load (p0)) + %4(p0) = G_LOAD %0 :: (load (p0)) BX_RET 14, $noreg, implicit $r0 ... @@ -435,10 +433,7 @@ # CHECK: - { id: 1, class: gprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } # CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } -# CHECK: - { id: 6, class: fprb, preferred-register: '' } - +# CHECK: - { id: 4, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -457,12 +452,8 @@ G_STORE %2(s16), %0 :: (store (s16)) %3(s8) = G_TRUNC %1(s32) G_STORE %3(s8), %0 :: (store (s8)) - %4(s1) = G_TRUNC %1(s32) - G_STORE %4(s1), %0 :: (store (s1)) - %5(p0) = COPY $r5 - G_STORE %5(p0), %0 :: (store (p0)) - %6(s64) = COPY $d6 - G_STORE %6(s64), %0 :: (store (s64)) + %4(s64) = COPY $d6 + G_STORE %4(s64), %0 :: (store (s64)) BX_RET 14, $noreg, implicit $r0 ... Index: llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir =================================================================== --- llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir +++ llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir @@ -1,7 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple thumb-- -mattr=+v6t2 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | - define void @test_s1() { ret void } define void @test_s8() { ret void } define void @test_s16() { ret void } define void @test_s32() { ret void } @@ -11,32 +10,6 @@ define void @test_load_from_stack() { ret void } ... --- -name: test_s1 -legalized: true -regBankSelected: true -selected: false -registers: - - { id: 0, class: gprb } - - { id: 1, class: gprb } -body: | - bb.0: - liveins: $r0 - - ; CHECK-LABEL: name: test_s1 - ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s1)) - ; CHECK: [[t2ANDri:%[0-9]+]]:rgpr = t2ANDri [[t2LDRBi12_]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRBi12 [[t2ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s1)) - ; CHECK: BX_RET 14 /* CC::al */, $noreg - %0(p0) = COPY $r0 - - %1(s1) = G_LOAD %0(p0) :: (load (s1)) - - G_STORE %1(s1), %0(p0) :: (store (s1)) - - BX_RET 14, $noreg -... ---- name: test_s8 legalized: true regBankSelected: true @@ -164,7 +137,7 @@ ; CHECK: [[t2LDRi12_:%[0-9]+]]:gpr = t2LDRi12 [[t2ADDri]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[t2LDRi12_]] ; CHECK: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri %fixed-stack.2, 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:gprnopc = t2LDRBi12 [[t2ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s1)) + ; CHECK: [[t2LDRBi12_:%[0-9]+]]:gprnopc = t2LDRBi12 [[t2ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s8)) ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY [[t2LDRBi12_]] ; CHECK: $r0 = COPY [[COPY]] ; CHECK: BX_RET 14 /* CC::al */, $noreg @@ -176,9 +149,9 @@ %2(p0) = G_FRAME_INDEX %fixed-stack.0 - %3(s1) = G_LOAD %2(p0) :: (load (s1)) + %3(s8) = G_LOAD %2(p0) :: (load (s8)) - %4(s32) = G_ANYEXT %3(s1) + %4(s32) = G_ANYEXT %3(s8) $r0 = COPY %4 Index: llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir +++ llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir @@ -342,7 +342,8 @@ ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s1) into %ir.pcarry_flag) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; MIPS32: G_STORE [[AND1]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) ; MIPS32: G_STORE [[ADD]](s32), [[COPY2]](p0) :: (store (s32) into %ir.padd) ; MIPS32: RetRA %0:_(s32) = COPY $a0 Index: llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -446,7 +446,8 @@ ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s1) into %ir.pcarry_flag) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C1]] + ; MIPS32: G_STORE [[AND1]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) ; MIPS32: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store (s32) into %ir.pmul) ; MIPS32: RetRA %0:_(s32) = COPY $a0 Index: llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir +++ llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir @@ -62,11 +62,12 @@ ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s1) from %ir.py) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.py) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY]](p0) :: (store (s1) into %ir.px) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; MIPS32: G_STORE [[AND1]](s32), [[COPY]](p0) :: (store (s8) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll @@ -173,6 +173,7 @@ ; MIPS32-NEXT: addu $1, $4, $5 ; MIPS32-NEXT: sltu $2, $1, $5 ; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: andi $2, $2, 1 ; MIPS32-NEXT: sb $2, 0($7) ; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -187,6 +187,7 @@ ; MIPS32-NEXT: mul $1, $4, $5 ; MIPS32-NEXT: sltu $2, $zero, $2 ; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: andi $2, $2, 1 ; MIPS32-NEXT: sb $2, 0($7) ; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/truncStore_and_aExtLoad.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/truncStore_and_aExtLoad.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/truncStore_and_aExtLoad.ll @@ -28,6 +28,7 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lbu $1, 0($5) ; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: sb $1, 0($4) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop Index: llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp =================================================================== --- llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp +++ llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp @@ -391,22 +391,22 @@ LegalizerInfo LI; auto &LegacyInfo = LI.getLegacyLegalizerInfo(); LI.getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s32, p0, 32, 32}}); + .legalForTypesWithMemDesc({{s32, p0, s32, 32}}); LegacyInfo.computeTables(); EXPECT_ACTION(Legal, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, LegalityQuery::MemDesc{ - 32, 32, AtomicOrdering::NotAtomic})); + s32, 32, AtomicOrdering::NotAtomic})); EXPECT_ACTION(Unsupported, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, LegalityQuery::MemDesc{ - 32, 16, AtomicOrdering::NotAtomic })); + s32, 16, AtomicOrdering::NotAtomic })); EXPECT_ACTION(Unsupported, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, LegalityQuery::MemDesc{ - 32, 8, AtomicOrdering::NotAtomic})); + s32, 8, AtomicOrdering::NotAtomic})); } // Test that the maximum supported alignment value isn't truncated @@ -417,18 +417,18 @@ LegalizerInfo LI; auto &LegacyInfo = LI.getLegacyLegalizerInfo(); LI.getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s32, p0, 32, MaxAlignInBits}}); + .legalForTypesWithMemDesc({{s32, p0, s32, MaxAlignInBits}}); LegacyInfo.computeTables(); EXPECT_ACTION(Legal, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, - LegalityQuery::MemDesc{32, + LegalityQuery::MemDesc{s32, MaxAlignInBits, AtomicOrdering::NotAtomic})); EXPECT_ACTION(Unsupported, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, LegalityQuery::MemDesc{ - 32, 8, AtomicOrdering::NotAtomic })); + s32, 8, AtomicOrdering::NotAtomic })); } } Index: llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp =================================================================== --- llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp +++ llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp @@ -31,10 +31,11 @@ DefineLegalizerInfo(ALegalizer, { auto p0 = LLT::pointer(0, 64); + auto s8 = LLT::scalar(8); auto v2s8 = LLT::vector(2, 8); auto v2s16 = LLT::vector(2, 16); getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s16, p0, 8, 8}}) + .legalForTypesWithMemDesc({{s16, p0, s8, 8}}) .scalarize(0) .clampScalar(0, s16, s16); getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s64}});