Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -4916,6 +4916,24 @@ ... !0 = !{i64 (i64, i64)* @add, i64 (i64, i64)* @sub} +'``speculation.marker``' Metadata +^^^^^^^^^^^^^^^^^^^^^^ + +``speculation.marker`` metadata must be attached to a load. It consist of +set of ``i64`` type offsets indicating that the memory from load pointer +address is proven to be dereferanceable for read operation with provided +offsets. The intent of this metadata is to keep track of dereferanceable +memory locations assosiated with load operations to that memory were +deleted, as it might be beneficial for future optimizations. Offsets aren't +required to be sorted. + +.. code-block:: llvm + + %ld1 = load double, double* %arrayidx1, align 8, !speculation.marker !0 + + ... + !0 = !{i64 -1, i64 2} + '``unpredictable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: include/llvm/IR/LLVMContext.h =================================================================== --- include/llvm/IR/LLVMContext.h +++ include/llvm/IR/LLVMContext.h @@ -102,6 +102,7 @@ MD_associated = 22, // "associated" MD_callees = 23, // "callees" MD_irr_loop = 24, // "irr_loop" + MD_speculation_marker = 25, // "speculation.marker" }; /// Known operand bundle tag IDs, which always have the same value. All Index: lib/IR/LLVMContext.cpp =================================================================== --- lib/IR/LLVMContext.cpp +++ lib/IR/LLVMContext.cpp @@ -61,6 +61,7 @@ {MD_associated, "associated"}, {MD_callees, "callees"}, {MD_irr_loop, "irr_loop"}, + {MD_speculation_marker, "speculation.marker"}, }; for (auto &MDKind : MDKinds) { Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -212,6 +212,9 @@ } } +typedef DenseMap> BasePtrInfoTy; +typedef DenseMap> PtrInfoTy; + /// \brief The core instruction combiner logic. /// /// This class provides both the logic to recursively visit instructions and @@ -248,6 +251,8 @@ // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. LoopInfo *LI; + BasePtrInfoTy &Bases; + PtrInfoTy &Ptrs; bool MadeIRChange = false; @@ -256,10 +261,11 @@ bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, const DataLayout &DL, - LoopInfo *LI) + LoopInfo *LI, BasePtrInfoTy &Bases, PtrInfoTy &Ptrs) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI), + Bases(Bases), Ptrs(Ptrs) {} /// \brief Run the combiner over the entire worklist until it is empty. /// Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -81,6 +81,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Pass.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" @@ -1498,6 +1499,28 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); + // Here we have to avoid any GEPs with vector types or with + // number of operands that is not equal to two or GEPs + // in a loop, except it's origin is in the same basic block. + if (!dyn_cast(Ops[0]->getType()) && GEP.getNumOperands() == 2 && + LI != nullptr) { + bool ValidGEP = (LI->getLoopDepth(GEP.getParent()) == 0); + Value *Ptr = cast(&GEP); + if (!ValidGEP) + if (Instruction *PtrInst = dyn_cast(Ptr)) + ValidGEP = PtrInst->getParent() == GEP.getParent(); + ConstantInt *C = dyn_cast(GEP.getOperand(1)); + if (ValidGEP && C != nullptr) { + Value *BasePtr = GEP.getOperand(0); + Ptrs[Ptr].first = BasePtr; + Ptrs[Ptr].second = C->getZExtValue(); + if (Ptrs.find(BasePtr) == Ptrs.end()) { + Ptrs[BasePtr].first = BasePtr; + Ptrs[BasePtr].second = 0; + } + } + } + if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); @@ -2933,6 +2956,14 @@ // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, &TLI)) { DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); + if (InsertElementInst *Insert = dyn_cast(I)) + if (LoadInst *Load = dyn_cast(Insert->getOperand(1))) { + Value *Ptr = Load->getOperand(0); + if (Ptrs.find(Ptr) != Ptrs.end()) { + Value *BasePtr = Ptrs[Ptr].first; + Bases[BasePtr].insert(Ptrs[Ptr].second); + } + } eraseInstFromFunction(*I); ++NumDeadInst; MadeIRChange = true; @@ -3091,7 +3122,9 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, SmallPtrSetImpl &Visited, InstCombineWorklist &ICWorklist, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + BasePtrInfoTy &Bases, + PtrInfoTy &Ptrs) { bool MadeIRChange = false; SmallVector Worklist; Worklist.push_back(BB); @@ -3113,6 +3146,14 @@ if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); + if (InsertElementInst *Insert = dyn_cast(Inst)) + if (LoadInst *Load = dyn_cast(Insert->getOperand(1))) { + Value *Ptr = Load->getOperand(0); + if (Ptrs.find(Ptr) != Ptrs.end()) { + Value *BasePtr = Ptrs[Ptr].first; + Bases[BasePtr].insert(Ptrs[Ptr].second); + } + } salvageDebugInfo(*Inst); Inst->eraseFromParent(); MadeIRChange = true; @@ -3198,7 +3239,9 @@ /// the combiner itself run much faster. static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, TargetLibraryInfo *TLI, - InstCombineWorklist &ICWorklist) { + InstCombineWorklist &ICWorklist, + BasePtrInfoTy &Bases, + PtrInfoTy &Ptrs) { bool MadeIRChange = false; // Do a depth-first traversal of the function, populate the worklist with @@ -3206,7 +3249,8 @@ // track of which blocks we visit. SmallPtrSet Visited; MadeIRChange |= - AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI); + AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI, + Bases, Ptrs); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents @@ -3241,6 +3285,9 @@ AC.registerAssumption(cast(I)); })); + BasePtrInfoTy Bases; + PtrInfoTy Ptrs; + // Lower dbg.declare intrinsics otherwise their value may be clobbered // by instcombiner. bool MadeIRChange = false; @@ -3254,16 +3301,51 @@ DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); - MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); + MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist, Bases, + Ptrs); InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, - AC, TLI, DT, ORE, DL, LI); + AC, TLI, DT, ORE, DL, LI, Bases, Ptrs); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) break; } + bool SpeculativeLoad = false; + for (auto x : Bases) + if (!x.second.empty()) + SpeculativeLoad = true; + + if (SpeculativeLoad) + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I++) + if (LoadInst *Load = dyn_cast(*&I)) { + Value *Ptr = Load->getOperand(0); + if (Ptrs.find(Ptr) != Ptrs.end()) { + SmallVector Vals; + SmallDenseSet &Set = Bases[Ptrs[Ptr].first]; + int64_t LoadOffset = Ptrs[Ptr].second; + MDBuilder MDB(Load->getContext()); + if (MDNode *MD = Load->getMetadata(LLVMContext::MD_speculation_marker)) + for (int i = 0, e = MD->getNumOperands(); i < e; i++) { + ConstantInt *C = + mdconst::dyn_extract(MD->getOperand(i)); + int64_t Diff = LoadOffset + (int64_t)C->getSExtValue(); + if (Set.count(Diff) == 0) + Set.insert(Diff); + } + for (auto Off : Set) { + int64_t Offset = (int64_t)Off; + Constant *C = ConstantInt::get(Load->getContext(), + APInt(64, Offset - LoadOffset)); + Vals.push_back(MDB.createConstant(C)); + } + Load->setMetadata(LLVMContext::MD_speculation_marker, + MDNode::get(Load->getContext(), Vals)); + } + } + return MadeIRChange || Iteration > 1; } Index: test/ThinLTO/X86/lazyload_metadata.ll =================================================================== --- test/ThinLTO/X86/lazyload_metadata.ll +++ test/ThinLTO/X86/lazyload_metadata.ll @@ -10,13 +10,13 @@ ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY -; LAZY: 55 bitcode-reader - Number of Metadata records loaded +; LAZY: 57 bitcode-reader - Number of Metadata records loaded ; LAZY: 2 bitcode-reader - Number of MDStrings loaded ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY -; NOTLAZY: 64 bitcode-reader - Number of Metadata records loaded +; NOTLAZY: 66 bitcode-reader - Number of Metadata records loaded ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded Index: test/Transforms/InstCombine/speculation_marker.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/speculation_marker.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +define <4 x double> @foo(double* %ptr) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 2 +; CHECK-NEXT: [[LD0:%.*]] = load double, double* [[PTR]], align 8, !speculation.marker !0 +; CHECK-NEXT: [[LD2:%.*]] = load double, double* [[ARRAYIDX2]], align 8, !speculation.marker !1 +; CHECK-NEXT: [[INS0:%.*]] = insertelement <4 x double> undef, double [[LD0]], i32 0 +; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x double> [[INS0]], double [[LD2]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[INS2]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[SHUFFLE]] +; + %arrayidx0 = getelementptr inbounds double, double* %ptr, i64 0 + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1 + %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2 + %arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3 + + %ld0 = load double, double* %arrayidx0 + %ld1 = load double, double* %arrayidx1 + %ld2 = load double, double* %arrayidx2 + %ld3 = load double, double* %arrayidx3 + + %ins0 = insertelement <4 x double> undef, double %ld0, i32 0 + %ins1 = insertelement <4 x double> %ins0, double %ld1, i32 1 + %ins2 = insertelement <4 x double> %ins1, double %ld2, i32 2 + %ins3 = insertelement <4 x double> %ins2, double %ld3, i32 3 + + %shuffle = shufflevector <4 x double> %ins3, <4 x double> undef, <4 x i32> + ret <4 x double> %shuffle +} + +define <4 x double> @bar(double* %ptr) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 2 +; CHECK-NEXT: [[LD1:%.*]] = load double, double* [[ARRAYIDX1]], align 8, !speculation.marker !2 +; CHECK-NEXT: [[LD2:%.*]] = load double, double* [[ARRAYIDX2]], align 8, !speculation.marker !3 +; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x double> undef, double [[LD1]], i32 1 +; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x double> [[INS1]], double [[LD2]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[INS2]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[SHUFFLE]] +; + %arrayidx0 = getelementptr inbounds double, double* %ptr, i64 0 + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1 + %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2 + %arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3 + + %ld0 = load double, double* %arrayidx0 + %ld1 = load double, double* %arrayidx1 + %ld2 = load double, double* %arrayidx2 + %ld3 = load double, double* %arrayidx3 + + %ins0 = insertelement <4 x double> undef, double %ld0, i32 0 + %ins1 = insertelement <4 x double> %ins0, double %ld1, i32 1 + %ins2 = insertelement <4 x double> %ins1, double %ld2, i32 2 + %ins3 = insertelement <4 x double> %ins2, double %ld3, i32 3 + + %shuffle = shufflevector <4 x double> %ins3, <4 x double> undef, <4 x i32> + ret <4 x double> %shuffle +} + +; CHECK: !0 = !{i64 1, i64 3} +; CHECK: !1 = !{i64 -1, i64 1} +; CHECK: !2 = !{i64 -1, i64 2} +; CHECK: !3 = !{i64 -2, i64 1}