Index: llvm/include/llvm/CodeGen/CodeGenPassBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -579,7 +579,7 @@ if (TM.useEmulatedTLS()) addPass(LowerEmuTLSPass()); - addPass(PreISelIntrinsicLoweringPass()); + addPass(PreISelIntrinsicLoweringPass(TM)); derived().addIRPasses(addPass); derived().addCodeGenPrepare(addPass); Index: llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h =================================================================== --- llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h +++ llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h @@ -18,9 +18,13 @@ namespace llvm { class Module; +class TargetMachine; struct PreISelIntrinsicLoweringPass : PassInfoMixin { + const TargetMachine &TM; + + PreISelIntrinsicLoweringPass(const TargetMachine &TM) : TM(TM) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; Index: llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp =================================================================== --- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -14,9 +14,10 @@ #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/ObjCARCUtil.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -26,6 +27,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" using namespace llvm; @@ -41,19 +43,19 @@ namespace { struct PreISelIntrinsicLowering { + const TargetMachine &TM; const function_ref LookupTTI; - const function_ref LookupLibInfo; /// If this is true, assume it's preferably to leave memory intrinsic calls /// for replacement with a library call later. Otherwise this depends on - /// TargetLibraryInfo availability of the corresponding function. + /// TargetLoweringInfo availability of the corresponding function. const bool UseMemIntrinsicLibFunc; explicit PreISelIntrinsicLowering( + const TargetMachine &TM_, function_ref LookupTTI_, - function_ref LookupLibInfo_, bool UseMemIntrinsicLibFunc_ = true) - : LookupTTI(LookupTTI_), LookupLibInfo(LookupLibInfo_), + : TM(TM_), LookupTTI(LookupTTI_), UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {} static bool shouldExpandMemIntrinsicWithSize(Value *Size, @@ -195,9 +197,15 @@ return SizeVal > Threshold || Threshold == 0; } +static bool canEmitLibcall(const TargetLowering &TLI, RTLIB::Libcall LC) { + // TODO: Should this consider the address space of the memcpy? + return TLI.getLibcallName(LC) != nullptr; +} + // TODO: Handle atomic memcpy and memcpy.inline // TODO: Pass ScalarEvolution bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { + const TargetLowering *TLI = TM.getSubtargetImpl(F)->getTargetLowering(); Intrinsic::ID ID = F.getIntrinsicID(); bool Changed = false; @@ -210,10 +218,10 @@ Function *ParentFunc = Memcpy->getFunction(); const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memcpy->getLength(), TTI)) { - if (UseMemIntrinsicLibFunc && - LookupLibInfo(*ParentFunc).has(LibFunc_memcpy)) + if (UseMemIntrinsicLibFunc && canEmitLibcall(*TLI, RTLIB::MEMCPY)) break; + // TODO: For optsize, emit the loop into a separate function expandMemCpyAsLoop(Memcpy, TTI); Changed = true; Memcpy->eraseFromParent(); @@ -226,8 +234,7 @@ Function *ParentFunc = Memmove->getFunction(); const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memmove->getLength(), TTI)) { - if (UseMemIntrinsicLibFunc && - LookupLibInfo(*ParentFunc).has(LibFunc_memmove)) + if (UseMemIntrinsicLibFunc && canEmitLibcall(*TLI, RTLIB::MEMMOVE)) break; if (expandMemMoveAsLoop(Memmove, TTI)) { @@ -243,8 +250,7 @@ Function *ParentFunc = Memset->getFunction(); const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memset->getLength(), TTI)) { - if (UseMemIntrinsicLibFunc && - LookupLibInfo(*Memset->getFunction()).has(LibFunc_memset)) + if (UseMemIntrinsicLibFunc && canEmitLibcall(*TLI, RTLIB::MEMSET)) break; expandMemSetAsLoop(Memset); @@ -365,8 +371,8 @@ PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); AU.addRequired(); + AU.addRequired(); } bool runOnModule(Module &M) override { @@ -374,11 +380,8 @@ return this->getAnalysis().getTTI(F); }; - auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - - PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI); + const auto &TM = getAnalysis().getTM(); + PreISelIntrinsicLowering Lowering(TM, LookupTTI); return Lowering.lowerIntrinsics(M); } }; @@ -387,27 +390,28 @@ char PreISelIntrinsicLoweringLegacyPass::ID; -INITIALIZE_PASS(PreISelIntrinsicLoweringLegacyPass, - "pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering", - false, false) +INITIALIZE_PASS_BEGIN(PreISelIntrinsicLoweringLegacyPass, + "pre-isel-intrinsic-lowering", + "Pre-ISel Intrinsic Lowering", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(PreISelIntrinsicLoweringLegacyPass, + "pre-isel-intrinsic-lowering", + "Pre-ISel Intrinsic Lowering", false, false) ModulePass *llvm::createPreISelIntrinsicLoweringPass() { - return new PreISelIntrinsicLoweringLegacyPass; + return new PreISelIntrinsicLoweringLegacyPass(); } PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); - auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { - return FAM.getResult(F); - }; - auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult(F); }; - PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI); + PreISelIntrinsicLowering Lowering(TM, LookupTTI); if (!Lowering.lowerIntrinsics(M)) return PreservedAnalyses::all(); else Index: llvm/test/CodeGen/ARM/no-expand-memcpy-no-builtins.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/no-expand-memcpy-no-builtins.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=thumbv7em-apple-unknown-macho < %s | FileCheck %s + +target datalayout = "e-m:o-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 +declare void @llvm.memmove.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 +declare void @llvm.memset.p0.i32(ptr noalias nocapture writeonly, i8, i32, i1 immarg) #0 + +; Check we don't expand memcpy to a loop when the caller +; even if we have no-builtins attached. + +; CHECK: bl _memcpy +define arm_aapcs_vfpcc void @test_memcpy(ptr %p1, ptr %p2) #1 { + call void @llvm.memcpy.p0.p0.i32(ptr %p1, ptr %p2, i32 128, i1 false) + ret void +} + +; CHECK: bl _memmove +define arm_aapcs_vfpcc void @test_memmove(ptr %p1, ptr %p2) #1 { + call void @llvm.memmove.p0.p0.i32(ptr %p1, ptr %p2, i32 128, i1 false) + ret void +} + +; CHECK: bl _memset +define arm_aapcs_vfpcc void @test_memset(ptr %p1) #1 { + call void @llvm.memset.p0.i32(ptr %p1, i8 0, i32 128, i1 false) + ret void +} + +attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { "no-builtins" } Index: llvm/test/Transforms/PreISelIntrinsicLowering/X86/lit.local.cfg =================================================================== --- /dev/null +++ llvm/test/Transforms/PreISelIntrinsicLowering/X86/lit.local.cfg @@ -0,0 +1,2 @@ +if not "X86" in config.root.targets: + config.unsupported = True Index: llvm/test/Transforms/PreISelIntrinsicLowering/X86/load-relative.ll =================================================================== --- llvm/test/Transforms/PreISelIntrinsicLowering/X86/load-relative.ll +++ llvm/test/Transforms/PreISelIntrinsicLowering/X86/load-relative.ll @@ -1,4 +1,4 @@ -; RUN: opt -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s +; RUN: opt -mtriple=x86_64-pc-linux-gnu -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s ; CHECK: define ptr @foo32(ptr [[P:%.*]], i32 [[O:%.*]]) define ptr @foo32(ptr %p, i32 %o) { Index: llvm/test/Transforms/PreISelIntrinsicLowering/X86/objc-arc.ll =================================================================== --- llvm/test/Transforms/PreISelIntrinsicLowering/X86/objc-arc.ll +++ llvm/test/Transforms/PreISelIntrinsicLowering/X86/objc-arc.ll @@ -1,4 +1,4 @@ -; RUN: opt -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s +; RUN: opt -mtriple=x86_64-pc-linux-gnu -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s ; Make sure calls to the objc intrinsics are translated to calls in to the ; runtime