Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -234,6 +234,7 @@ void initializeLoopVersioningLICMPass(PassRegistry&); void initializeLoopVersioningPassPass(PassRegistry&); void initializeLowerAtomicLegacyPassPass(PassRegistry&); +void initializeLowerBestEffortIntrinsicsLegacyPassPass(PassRegistry&); void initializeLowerEmuTLSPass(PassRegistry&); void initializeLowerExpectIntrinsicPass(PassRegistry&); void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -138,6 +138,7 @@ (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLoopIdiomPass(); (void) llvm::createLoopRotatePass(); + (void) llvm::createLowerBestEffortIntrinsicsPass(); (void) llvm::createLowerExpectIntrinsicPass(); (void) llvm::createLowerInvokePass(); (void) llvm::createLowerSwitchPass(); Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -274,8 +274,10 @@ /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging = false); + ModulePassManager + buildModuleOptimizationPipeline(OptimizationLevel Level, + bool DebugLogging = false, + bool ForceLowerIntrinsics = true); /// Build a per-module default optimization pipeline. /// @@ -288,8 +290,10 @@ /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false); + ModulePassManager + buildPerModuleDefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false, + bool ForceLowerIntrinsics = true); /// Build a pre-link, ThinLTO-targeting default optimization pipeline to /// a pass manager. Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -378,6 +378,12 @@ FunctionPass *createInferAddressSpacesPass(); extern char &InferAddressSpacesID; +//===----------------------------------------------------------------------===// +// +// LowerBestEffortIntrinsics - Forces all best-effort intrinsics, like +// llvm.objectsize, to be lowered immediately. +FunctionPass *createLowerBestEffortIntrinsicsPass(); + //===----------------------------------------------------------------------===// // // LowerExpectIntrinsics - Removes llvm.expect intrinsics and creates Index: include/llvm/Transforms/Scalar/LowerBestEffortIntrinsics.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/LowerBestEffortIntrinsics.h @@ -0,0 +1,32 @@ +//===- LowerBestEffortIntrinsics.h - LowerBestEffortIntrinsics --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The header file for the LowerBestEffortIntrinsicsPass pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOWERBESTEFFORTINTRINSICS_H +#define LLVM_TRANSFORMS_SCALAR_LOWERBESTEFFORTINTRINSICS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct LowerBestEffortIntrinsicsPass + : PassInfoMixin { + /// \brief Run the pass over the function. + /// + /// This will lower all of the best-effort llvm.objectsize intrinsics in the + // given module. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; +} // namespace llvm + +#endif Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1687,24 +1687,23 @@ switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { - // Lower all uses of llvm.objectsize.* ConstantInt *RetVal = - lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); - // Substituting this can cause recursive simplifications, which can - // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case - // this - // happens. - Value *CurValue = &*CurInstIterator; - WeakTrackingVH IterHandle(CurValue); + lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); - - // If the iterator instruction was recursively deleted, start over at the - // start of the block. - if (IterHandle != CurValue) { - CurInstIterator = BB->begin(); - SunkAddrs.clear(); - } + // In a regular optimization pipeline, we have a pass to forcibly lower + // this intrinsic, so we have a chance to simplify code that depends on + // its result. + // + // Hence, if we've reached this, either: + // - the optimization pipeline is misconfigured, + // - we're just running a small set of of manually-specified passes on the + // IR, or + // - there's a bug elsewhere. + // + // In any case, there's probably not much reason to try to simplify users + // of `RetVal`. + II->replaceAllUsesWith(RetVal); + II->eraseFromParent(); return true; } case Intrinsic::aarch64_stlxr: Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -128,6 +128,7 @@ #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerBestEffortIntrinsics.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" @@ -731,7 +732,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool ForceLowerIntrinsics) { ModulePassManager MPM(DebugLogging); // Optimize globals now that the module is fully simplified. @@ -772,6 +774,9 @@ // FIXME: We need to run some loop optimizations to re-rotate loops after // simplify-cfg and others undo their rotation. + if (ForceLowerIntrinsics) + OptimizePM.addPass(LowerBestEffortIntrinsicsPass()); + // Optimize the loop execution. These passes operate on entire loop nests // rather than on each loop in an inside-out manner, and so they are actually // function passes. @@ -879,7 +884,8 @@ ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool ForceLowerIntrinsics) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -991,8 +997,12 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); + // Skip force-lowering best-effort intrinsics until we have a global view of + // the program. + // // FIXME: We should use a customized pre-link pipeline! - return buildPerModuleDefaultPipeline(Level, DebugLogging); + return buildPerModuleDefaultPipeline(Level, DebugLogging, + /*ForceLowerIntrinsics=*/false); } ModulePassManager Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -171,6 +171,7 @@ FUNCTION_PASS("no-op-function", NoOpFunctionPass()) FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) +FUNCTION_PASS("lower-best-effort-intrinsics", LowerBestEffortIntrinsicsPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -624,6 +624,14 @@ MPM.add(createFloat2IntPass()); + // If we're in the pre-link stage for LTO, we want to avoid lowering these + // intrinsics for now: we'll have a global view later, which may give us + // higher-quality answers. + // + // PrepareForThinLTO gave up above, so it's not a problem. + if (!PrepareForLTO) + MPM.add(createLowerBestEffortIntrinsicsPass()); + addExtensionsToPM(EP_VectorizerStart, MPM); // Re-rotate loops in all our loop nests. These may have fallout out of Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -43,6 +43,7 @@ LoopUnswitch.cpp LoopVersioningLICM.cpp LowerAtomic.cpp + LowerBestEffortIntrinsics.cpp LowerExpectIntrinsic.cpp LowerGuardIntrinsic.cpp MemCpyOptimizer.cpp Index: lib/Transforms/Scalar/LowerBestEffortIntrinsics.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/LowerBestEffortIntrinsics.cpp @@ -0,0 +1,105 @@ +//===- LowerBestEffortIntrinsics.cpp - Lower best-effort intrinsics -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass forcibly lowers 'best-effort' intrinsics that generally require +// some amount of optimization to be run in order to produce high-quality +// results. Namely, llvm.objectsize and (coming soon) llvm.is.constant. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LowerBestEffortIntrinsics.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static bool runOnFunction(Function &F, const TargetLibraryInfo *TLI) { + const DataLayout &DL = F.getParent()->getDataLayout(); + + bool Changed = false; + for (BasicBlock &BB : F) + for (auto Iter = BB.begin(); Iter != BB.end();) { + Instruction *I = &*Iter; + // We may erase `I`; don't hang on to the iterator. + ++Iter; + + auto *II = dyn_cast(I); + if (!II || II->getIntrinsicID() != llvm::Intrinsic::objectsize) + continue; + + ConstantInt *NewVal = + lowerObjectSizeCall(II, DL, TLI, /*MustSucceed=*/true); + II->replaceAllUsesWith(NewVal); + II->eraseFromParent(); + Changed = true; + } + + return Changed; +} + +PreservedAnalyses +LowerBestEffortIntrinsicsPass::run(Function &F, FunctionAnalysisManager &FAM) { + const TargetLibraryInfo *TLI = &FAM.getResult(F); + if (!::runOnFunction(F, TLI)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet(); + PA.preserve(); + PA.preserve(); + return PA; +} + +namespace { +/// Legacy pass for lowering best-effort intrinsics out of the IR. +struct LowerBestEffortIntrinsicsLegacyPass : FunctionPass { + static char ID; + LowerBestEffortIntrinsicsLegacyPass() : FunctionPass(ID) { + initializeLowerBestEffortIntrinsicsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + } + + bool runOnFunction(Function &F) override { + const TargetLibraryInfo *TLI = + &getAnalysis().getTLI(); + return ::runOnFunction(F, TLI); + } +}; +} // namespace + +char LowerBestEffortIntrinsicsLegacyPass::ID = 0; +INITIALIZE_PASS(LowerBestEffortIntrinsicsLegacyPass, + "lower-best-effort-intrinsics", "Lower Best-Effort Intrinsics", + false, false) + +FunctionPass *llvm::createLowerBestEffortIntrinsicsPass() { + return new LowerBestEffortIntrinsicsLegacyPass(); +} Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -76,6 +76,7 @@ initializeLoopVersioningLICMPass(Registry); initializeLoopIdiomRecognizeLegacyPassPass(Registry); initializeLowerAtomicLegacyPassPass(Registry); + initializeLowerBestEffortIntrinsicsLegacyPassPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeLowerGuardIntrinsicLegacyPassPass(Registry); initializeMemCpyOptLegacyPassPass(Registry); Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -223,6 +223,7 @@ ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: Float2IntPass +; CHECK-O-NEXT: Running pass: LowerBestEffortIntrinsicsPass ; CHECK-EP-VECTORIZER-START-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopRotatePass ; CHECK-O-NEXT: Starting llvm::Function pass manager run. Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -207,6 +207,7 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-POSTLINK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-POSTLINK-O-NEXT: Running pass: Float2IntPass +; CHECK-POSTLINK-O-NEXT: Running pass: LowerBestEffortIntrinsicsPass ; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopRotatePass ; CHECK-POSTLINK-O-NEXT: Starting llvm::Function pass manager run ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSimplifyPass Index: test/Other/opt-O2-pipeline.ll =================================================================== --- test/Other/opt-O2-pipeline.ll +++ test/Other/opt-O2-pipeline.ll @@ -184,6 +184,7 @@ ; CHECK-NEXT: Globals Alias Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Float to int +; CHECK-NEXT: Lower Best-Effort Intrinsics ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops Index: test/Other/opt-O3-pipeline.ll =================================================================== --- test/Other/opt-O3-pipeline.ll +++ test/Other/opt-O3-pipeline.ll @@ -188,6 +188,7 @@ ; CHECK-NEXT: Globals Alias Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Float to int +; CHECK-NEXT: Lower Best-Effort Intrinsics ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops Index: test/Other/opt-Os-pipeline.ll =================================================================== --- test/Other/opt-Os-pipeline.ll +++ test/Other/opt-Os-pipeline.ll @@ -171,6 +171,7 @@ ; CHECK-NEXT: Globals Alias Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Float to int +; CHECK-NEXT: Lower Best-Effort Intrinsics ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops Index: test/Other/opt-hot-cold-split.ll =================================================================== --- test/Other/opt-hot-cold-split.ll +++ test/Other/opt-hot-cold-split.ll @@ -170,6 +170,7 @@ ; CHECK-NEXT: Globals Alias Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Float to int +; CHECK-NEXT: Lower Best-Effort Intrinsics ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops Index: test/Transforms/LowerBestEffortIntrinsics/objectsize.ll =================================================================== --- /dev/null +++ test/Transforms/LowerBestEffortIntrinsics/objectsize.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -lower-best-effort-intrinsics < %s | FileCheck %s +; RUN: opt -S -passes='function(lower-best-effort-intrinsics)' < %s | FileCheck %s + +; CHECK-LABEL: define i32 @unresolvable +define i32 @unresolvable(i8* %p) { + ; CHECK: ret i32 -1 + %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %p, i1 false, i1 true) + ret i32 %1 +} + +; CHECK-LABEL: define i32 @trivially_resolvable +define i32 @trivially_resolvable() { + ; CHECK: ret i32 1 + %p = alloca i8 + %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %p, i1 false, i1 true) + ret i32 %1 +} + +; CHECK-LABEL: define i1 @only_objectsize +define i1 @only_objectsize(i1 %x) { + ; CHECK: call i1 @llvm.expect + %1 = call i1 @llvm.expect.i1(i1 %x, i1 0) + ret i1 %1 +} + +declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) nounwind readnone +declare i1 @llvm.expect.i1(i1, i1) nounwind readnone