Index: llvm/include/llvm/Analysis/MemoryBuiltins.h =================================================================== --- llvm/include/llvm/Analysis/MemoryBuiltins.h +++ llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -180,6 +180,17 @@ return const_cast(isFreeCall((const Value*)I, TLI)); } +//===----------------------------------------------------------------------===// +// Utility functions to reason about whether memory might be deallocated +// + +/// Return false iff we can prove that the object associated with the pointer +/// 'V' can not be deallocated in the scope of Function F. Note that callers +/// need to be careful when reasoning about potentially recursive functions as +/// there are potentially many copies of V and Scope, the result describes a +/// single dynamic frame of the Scope function. +bool mayObjectBeDeallocatedInScope(const Value *V, const Function *Scope); + //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. // Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -460,7 +460,8 @@ /// for such instructions, moving them may change the resulting value. bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// Returns true if the result or effects of the given instructions \p I /// depend on or influence global memory. Index: llvm/lib/Analysis/Loads.cpp =================================================================== --- llvm/lib/Analysis/Loads.cpp +++ llvm/lib/Analysis/Loads.cpp @@ -133,19 +133,11 @@ Opts.RoundToAlign = false; Opts.NullIsUnknownSize = true; uint64_t ObjSize; - // TODO: Plumb through TLI so that malloc routines and such working. - if (getObjectSize(V, ObjSize, DL, nullptr, Opts)) { + if (getObjectSize(V, ObjSize, DL, TLI, Opts)) { APInt KnownDerefBytes(Size.getBitWidth(), ObjSize); if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) && isKnownNonZero(V, DL, 0, nullptr, CtxI, DT) && - // TODO: We're currently inconsistent about whether deref(N) is a - // global fact or a point in time fact. Once D61652 eventually - // lands, this check will be restricted to the point in time - // variant. For that variant, we need to prove that object hasn't - // been conditionally freed before ontext instruction - if it has, we - // might be hoisting over the inverse conditional and creating a - // dynamic use after free. - !PointerMayBeCapturedBefore(V, true, true, CtxI, DT, true)) { + !mayObjectBeDeallocatedInScope(V, CtxI->getFunction())) { // As we recursed through GEPs to get here, we've incrementally // checked that each step advanced by a multiple of the alignment. If // our base is properly aligned, then the original offset accessed Index: llvm/lib/Analysis/MemoryBuiltins.cpp =================================================================== --- llvm/lib/Analysis/MemoryBuiltins.cpp +++ llvm/lib/Analysis/MemoryBuiltins.cpp @@ -498,6 +498,33 @@ } +bool llvm::mayObjectBeDeallocatedInScope(const Value *V, + const Function *Scope) { + // Global variables are never deallocated. + if (isa(V)) + return false; + + // Constants aren't allocated per se, thus they're never deallocated either. + if (isa(V)) + return false; + + // An alloca in the current function is deallocated at the end of the + // function scope. An alloca in another function might be in a callee. + if (auto *AI = dyn_cast(V)) + return AI->getFunction() != Scope; + + // TODO: We can most likely add the following cases, subject to individual + // review and discussion to make sure we're not missing anything. + // * function w/nofree and nosync attributes + // * noalias + nofree argument => no-free object + // (seperately need either nosync or not captured) + // * GC allocated objects (before lowering from abstract machine) + // * others? + + // conservative answer + return true; +} + //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. // Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -4359,7 +4359,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI, - const DominatorTree *DT) { + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { const Operator *Inst = dyn_cast(V); if (!Inst) return false; @@ -4406,7 +4407,7 @@ const DataLayout &DL = LI->getModule()->getDataLayout(); return isDereferenceableAndAlignedPointer( LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()), - DL, CtxI, DT); + DL, CtxI, DT, TLI); } case Instruction::Call: { auto *CI = cast(Inst); Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -162,6 +162,7 @@ OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, + const TargetLibraryInfo *TLI, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, @@ -885,7 +886,7 @@ ORE) && worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) && isSafeToExecuteUnconditionally( - I, DT, CurLoop, SafetyInfo, ORE, + I, DT, TLI, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); @@ -1780,11 +1781,12 @@ /// or if it is a trapping instruction and is guaranteed to execute. static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, + const TargetLibraryInfo *TLI, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI) { - if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) + if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) return true; bool GuaranteedToExecute = @@ -2056,8 +2058,9 @@ // to execute does as well. Thus we can increase our guaranteed // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) - if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo, - ORE, Preheader->getTerminator())) { + if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, + SafetyInfo, ORE, + Preheader->getTerminator())) { DereferenceableInPH = true; Alignment = std::max(Alignment, InstAlignment); } @@ -2104,7 +2107,7 @@ if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( Store->getPointerOperand(), Store->getValueOperand()->getType(), - Store->getAlign(), MDL, Preheader->getTerminator(), DT); + Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI); } } else return false; // Not a load or store. Index: llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -2299,24 +2299,24 @@ ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0 ; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4 ; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP71]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8 ; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP73]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12 ; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP75]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) ; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], ; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], ; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], ; CHECK-NEXT: [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]] ; CHECK-NEXT: [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]] @@ -2467,24 +2467,24 @@ ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0 ; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4 ; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP71]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8 ; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP73]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12 ; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP75]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) ; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], ; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], ; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], ; CHECK-NEXT: [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]] ; CHECK-NEXT: [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]