Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -268,14 +268,12 @@ * ``LLVMConstSelect`` -Changes to the FastISel infrastructure --------------------------------------- - -* ... - -Changes to the DAG infrastructure ---------------------------------- +Changes to the CodeGen infrastructure +------------------------------------- +* ``llvm.memcpy``, ``llvm.memmove`` and ``llvm.memset`` are now + expanded into loops by default for targets which do not report the + corresponding library function is available. Changes to the Metadata Info --------------------------------- Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -356,6 +356,10 @@ /// source/destination type and alignment and the number of bytes copied. InstructionCost getMemcpyCost(const Instruction *I) const; + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size in + /// bytes that still makes it profitable to inline the call. + int64_t getMaxMemIntrinsicInlineSizeThreshold() const; + /// \return The estimated number of case clusters when lowering \p 'SI'. /// \p JTSize Set a jump table size only when \p SI is suitable for a jump /// table. @@ -1673,6 +1677,7 @@ virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0; virtual int getInlinerVectorBonusPercent() const = 0; virtual InstructionCost getMemcpyCost(const Instruction *I) = 0; + virtual int64_t getMaxMemIntrinsicInlineSizeThreshold() const = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, @@ -2044,6 +2049,11 @@ InstructionCost getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } + + int64_t getMaxMemIntrinsicInlineSizeThreshold() const override { + return Impl.getMaxMemIntrinsicInlineSizeThreshold(); + } + InstructionCost getInstructionCost(const User *U, ArrayRef Operands, TargetCostKind CostKind) override { Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -77,6 +77,10 @@ return TTI::TCC_Expensive; } + int64_t getMaxMemIntrinsicInlineSizeThreshold() const { + return 64; + } + // Although this default value is arbitrary, it is not random. It is assumed // that a condition that evaluates the same way by a higher percentage than // this is best represented as control flow. Therefore, the default value N Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1035,6 +1035,10 @@ return Cost; } +int64_t TargetTransformInfo::getMaxMemIntrinsicInlineSizeThreshold() const { + return TTIImpl->getMaxMemIntrinsicInlineSizeThreshold(); +} + InstructionCost TargetTransformInfo::getArithmeticReductionCost( unsigned Opcode, VectorType *Ty, std::optional FMF, TTI::TargetCostKind CostKind) const { Index: llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp =================================================================== --- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -6,14 +6,16 @@ // //===----------------------------------------------------------------------===// // -// This pass implements IR lowering for the llvm.load.relative and llvm.objc.* -// intrinsics. +// This pass implements IR lowering for the llvm.memcpy, llvm.memmove, +// llvm.memset, llvm.load.relative and llvm.objc.* intrinsics. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/ObjCARCUtil.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -24,9 +26,44 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" using namespace llvm; +/// Threshold to leave statically sized memory intrinsic calls. Calls of known +/// size larger than this will be expanded by the pass. Calls of unknown or +/// lower size will be left for expansion in codegen. +static cl::opt MemIntrinsicExpandSizeThresholdOpt( + "mem-intrinsic-expand-size", + cl::desc("Set minimum mem intrinsic size to expand in IR"), cl::init(-1), + cl::Hidden); + +namespace { + +struct PreISelIntrinsicLowering { + const function_ref LookupTTI; + const function_ref LookupLibInfo; + + /// If this is true, assume it's preferably to leave memory intrinsic calls + /// for replacement with a library call later. Otherwise this depends on + /// TargetLibraryInfo availability of the corresponding function. + const bool UseMemIntrinsicLibFunc; + + explicit PreISelIntrinsicLowering( + function_ref LookupTTI_, + function_ref LookupLibInfo_, + bool UseMemIntrinsicLibFunc_ = true) + : LookupTTI(LookupTTI_), LookupLibInfo(LookupLibInfo_), + UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {} + + static bool shouldExpandMemIntrinsicWithSize(Value *Size, + const TargetTransformInfo &TTI); + bool expandMemIntrinsicUses(Function &F) const; + bool lowerIntrinsics(Module &M) const; +}; + +} // namespace + static bool lowerLoadRelative(Function &F) { if (F.use_empty()) return false; @@ -133,12 +170,96 @@ return true; } -static bool lowerIntrinsics(Module &M) { +// TODO: Should refine based on estimated number of accesses (e.g. does it +// require splitting based on alignment) +bool PreISelIntrinsicLowering::shouldExpandMemIntrinsicWithSize( + Value *Size, const TargetTransformInfo &TTI) { + ConstantInt *CI = dyn_cast(Size); + if (!CI) + return true; + int64_t Threshold = MemIntrinsicExpandSizeThresholdOpt.getNumOccurrences() + ? MemIntrinsicExpandSizeThresholdOpt + : TTI.getMaxMemIntrinsicInlineSizeThreshold(); + return CI->getSExtValue() > Threshold; +} + +// TODO: Handle atomic memcpy and memcpy.inline +// TODO: Pass ScalarEvolution +bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { + Intrinsic::ID ID = F.getIntrinsicID(); + bool Changed = false; + + for (User *U : llvm::make_early_inc_range(F.users())) { + Instruction *Inst = cast(U); + + switch (ID) { + case Intrinsic::memcpy: { + auto *Memcpy = cast(Inst); + Function *ParentFunc = Memcpy->getFunction(); + const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); + if (shouldExpandMemIntrinsicWithSize(Memcpy->getLength(), TTI)) { + if (UseMemIntrinsicLibFunc && + LookupLibInfo(*ParentFunc).has(LibFunc_memcpy)) + break; + + expandMemCpyAsLoop(Memcpy, TTI); + Changed = true; + Memcpy->eraseFromParent(); + } + + break; + } + case Intrinsic::memmove: { + auto *Memmove = cast(Inst); + Function *ParentFunc = Memmove->getFunction(); + const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); + if (shouldExpandMemIntrinsicWithSize(Memmove->getLength(), TTI)) { + if (UseMemIntrinsicLibFunc && + LookupLibInfo(*ParentFunc).has(LibFunc_memmove)) + break; + + expandMemMoveAsLoop(Memmove); + Changed = true; + Memmove->eraseFromParent(); + } + + break; + } + case Intrinsic::memset: { + auto *Memset = cast(Inst); + Function *ParentFunc = Memset->getFunction(); + const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); + if (shouldExpandMemIntrinsicWithSize(Memset->getLength(), TTI)) { + if (UseMemIntrinsicLibFunc && + LookupLibInfo(*Memset->getFunction()).has(LibFunc_memset)) + break; + + expandMemSetAsLoop(Memset); + Changed = true; + Memset->eraseFromParent(); + } + + break; + } + default: + llvm_unreachable("unhandled intrinsic"); + } + } + + return Changed; +} + +bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { bool Changed = false; for (Function &F : M) { switch (F.getIntrinsicID()) { default: break; + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + Changed |= expandMemIntrinsicUses(F); + break; case Intrinsic::load_relative: Changed |= lowerLoadRelative(F); break; @@ -230,7 +351,23 @@ PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {} - bool runOnModule(Module &M) override { return lowerIntrinsics(M); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + + bool runOnModule(Module &M) override { + auto LookupTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis().getTTI(F); + }; + + auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + + PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI); + return Lowering.lowerIntrinsics(M); + } }; } // end anonymous namespace @@ -247,7 +384,18 @@ PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M, ModuleAnalysisManager &AM) { - if (!lowerIntrinsics(M)) + auto &FAM = AM.getResult(M).getManager(); + + auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + + auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + + PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI); + if (!Lowering.lowerIntrinsics(M)) return PreservedAnalyses::all(); else return PreservedAnalyses::none(); Index: llvm/lib/CodeGen/TargetPassConfig.cpp =================================================================== --- llvm/lib/CodeGen/TargetPassConfig.cpp +++ llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1088,8 +1088,8 @@ if (TM->useEmulatedTLS()) addPass(createLowerEmuTLSPass()); - addPass(createPreISelIntrinsicLoweringPass()); PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + addPass(createPreISelIntrinsicLoweringPass()); addPass(createExpandLargeDivRemPass()); addPass(createExpandLargeFpConvertPass()); addIRPasses(); Index: llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -25,16 +25,6 @@ namespace { -static int MaxStaticSize; - -static cl::opt MemIntrinsicExpandSizeThresholdOpt( - "amdgpu-mem-intrinsic-expand-size", - cl::desc("Set minimum mem intrinsic size to expand in IR"), - cl::location(MaxStaticSize), - cl::init(1024), - cl::Hidden); - - class AMDGPULowerIntrinsics : public ModulePass { private: bool makeLIDRangeMetadata(Function &F) const; @@ -64,62 +54,6 @@ INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false, false) -// TODO: Should refine based on estimated number of accesses (e.g. does it -// require splitting based on alignment) -static bool shouldExpandOperationWithSize(Value *Size) { - ConstantInt *CI = dyn_cast(Size); - return !CI || (CI->getSExtValue() > MaxStaticSize); -} - -bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) { - Intrinsic::ID ID = F.getIntrinsicID(); - bool Changed = false; - - for (User *U : llvm::make_early_inc_range(F.users())) { - Instruction *Inst = cast(U); - - switch (ID) { - case Intrinsic::memcpy: { - auto *Memcpy = cast(Inst); - if (shouldExpandOperationWithSize(Memcpy->getLength())) { - Function *ParentFunc = Memcpy->getParent()->getParent(); - const TargetTransformInfo &TTI = - getAnalysis().getTTI(*ParentFunc); - expandMemCpyAsLoop(Memcpy, TTI); - Changed = true; - Memcpy->eraseFromParent(); - } - - break; - } - case Intrinsic::memmove: { - auto *Memmove = cast(Inst); - if (shouldExpandOperationWithSize(Memmove->getLength())) { - expandMemMoveAsLoop(Memmove); - Changed = true; - Memmove->eraseFromParent(); - } - - break; - } - case Intrinsic::memset: { - auto *Memset = cast(Inst); - if (shouldExpandOperationWithSize(Memset->getLength())) { - expandMemSetAsLoop(Memset); - Changed = true; - Memset->eraseFromParent(); - } - - break; - } - default: - break; - } - } - - return Changed; -} - bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const { auto *TPC = getAnalysisIfAvailable(); if (!TPC) @@ -148,13 +82,6 @@ continue; switch (F.getIntrinsicID()) { - case Intrinsic::memcpy: - case Intrinsic::memmove: - case Intrinsic::memset: - if (expandMemIntrinsicUses(F)) - Changed = true; - break; - case Intrinsic::r600_read_tidig_x: case Intrinsic::r600_read_tidig_y: case Intrinsic::r600_read_tidig_z: Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -55,6 +55,8 @@ void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); + + int64_t getMaxInlineSizeThreshold() const; }; class GCNTTIImpl final : public BasicTTIImplBase { @@ -132,6 +134,8 @@ unsigned AddrSpace) const; bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; + + int64_t getMaxInlineSizeThreshold() const; Type *getMemcpyLoopLoweringType( LLVMContext & Context, Value * Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -267,6 +267,10 @@ BaseT::getPeelingPreferences(L, SE, PP); } +int64_t AMDGPUTTIImpl::getMaxInlineSizeThreshold() const { + return 1024; +} + const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = { // Codegen control options which don't matter. AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler, @@ -395,6 +399,10 @@ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); } +int64_t GCNTTIImpl::getMaxInlineSizeThreshold() const { + return 1024; +} + // FIXME: Really we would like to issue multiple 128-bit loads and stores per // iteration. Should we report a larger size and let it legalize? // Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -210,6 +210,10 @@ InstructionCost getMemcpyCost(const Instruction *I); + int64_t getMaxInlineSizeThreshold() const { + return ST->getMaxInlineSizeThreshold(); + } + int getNumMemOps(const IntrinsicInst *I) const; InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, Index: llvm/lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- llvm/lib/Target/X86/X86TargetTransformInfo.h +++ llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -273,6 +273,11 @@ const Function *Callee) const; bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef &Type) const; + + int64_t getMaxInlineSizeThreshold() const { + return ST->getMaxInlineSizeThreshold(); + } + TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; bool prefersVectorizedAddressing() const; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=GCN %s -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=GCN %s declare void @llvm.memcpy.inline.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=19 %s -o - | FileCheck -check-prefix=LOOP %s -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=21 %s -o - | FileCheck -check-prefix=UNROLL %s +; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=19 %s -o - | FileCheck -check-prefix=LOOP %s +; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=21 %s -o - | FileCheck -check-prefix=UNROLL %s declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s +; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s +; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s declare void @llvm.memmove.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s +; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s +; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s declare void @llvm.memset.p1.i32(ptr addrspace(1), i8, i32, i1) Index: llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll +++ llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=8 %s | FileCheck -check-prefix=OPT8 %s -; RUN: opt -S -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=4 %s | FileCheck -check-prefix=OPT4 %s -; RUN: opt -S -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=0 %s | FileCheck -check-prefix=OPT0 %s -; RUN: opt -S -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=-1 %s | FileCheck -check-prefix=OPT_NEG %s +; RUN: opt -S -mtriple=amdgcn-- -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=8 %s | FileCheck -check-prefix=OPT8 %s +; RUN: opt -S -mtriple=amdgcn-- -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=4 %s | FileCheck -check-prefix=OPT4 %s +; RUN: opt -S -mtriple=amdgcn-- -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=0 %s | FileCheck -check-prefix=OPT0 %s +; RUN: opt -S -mtriple=amdgcn-- -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=-1 %s | FileCheck -check-prefix=OPT_NEG %s -; Test the -amdgpu-mem-intrinsic-expand-size flag works. +; Test the -mem-intrinsic-expand-size flag works. ; Make sure we can always eliminate the intrinsic, even at 0. define amdgpu_kernel void @memset_size_0(ptr addrspace(1) %dst, i8 %val) { Index: llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll +++ llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=1024 %s | FileCheck -check-prefixes=OPT,MAX1024 %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=-1 %s | FileCheck -check-prefixes=OPT,ALL %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=1024 %s | FileCheck -check-prefixes=OPT,MAX1024 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=-1 %s | FileCheck -check-prefixes=OPT,ALL %s declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1 declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1