Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -460,7 +460,8 @@ /// for such instructions, moving them may change the resulting value. bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// Returns true if the result or effects of the given instructions \p I /// depend on or influence global memory. Index: llvm/lib/Analysis/Loads.cpp =================================================================== --- llvm/lib/Analysis/Loads.cpp +++ llvm/lib/Analysis/Loads.cpp @@ -133,8 +133,7 @@ Opts.RoundToAlign = false; Opts.NullIsUnknownSize = true; uint64_t ObjSize; - // TODO: Plumb through TLI so that malloc routines and such working. - if (getObjectSize(V, ObjSize, DL, nullptr, Opts)) { + if (getObjectSize(V, ObjSize, DL, TLI, Opts)) { APInt KnownDerefBytes(Size.getBitWidth(), ObjSize); if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) && isKnownNonZero(V, DL, 0, nullptr, CtxI, DT) && Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -4359,7 +4359,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI, - const DominatorTree *DT) { + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { const Operator *Inst = dyn_cast(V); if (!Inst) return false; @@ -4406,7 +4407,7 @@ const DataLayout &DL = LI->getModule()->getDataLayout(); return isDereferenceableAndAlignedPointer( LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()), - DL, CtxI, DT); + DL, CtxI, DT, TLI); } case Instruction::Call: { auto *CI = cast(Inst); Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -162,6 +162,7 @@ OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, + const TargetLibraryInfo *TLI, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, @@ -885,7 +886,7 @@ ORE) && worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) && isSafeToExecuteUnconditionally( - I, DT, CurLoop, SafetyInfo, ORE, + I, DT, TLI, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); @@ -1780,11 +1781,12 @@ /// or if it is a trapping instruction and is guaranteed to execute. static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, + const TargetLibraryInfo *TLI, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI) { - if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) + if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) return true; bool GuaranteedToExecute = @@ -2056,8 +2058,9 @@ // to execute does as well. Thus we can increase our guaranteed // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) - if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo, - ORE, Preheader->getTerminator())) { + if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, + SafetyInfo, ORE, + Preheader->getTerminator())) { DereferenceableInPH = true; Alignment = std::max(Alignment, InstAlignment); } @@ -2104,7 +2107,7 @@ if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( Store->getPointerOperand(), Store->getValueOperand()->getType(), - Store->getAlign(), MDL, Preheader->getTerminator(), DT); + Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI); } } else return false; // Not a load or store. Index: llvm/test/Transforms/LICM/hoist-alloc.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/hoist-alloc.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -basic-aa -licm < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @unknown() +declare void @init(i8* nocapture) +declare void @use(i8) + +define i8 @test_sink_alloca() { +; CHECK-LABEL: @test_sink_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: [[A_RAW:%.*]] = bitcast [32 x i8]* [[A]] to i8* +; CHECK-NEXT: call void @init(i8* [[A_RAW]]) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: [[ADDR_LE:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 +; CHECK-NEXT: [[RES_LE:%.*]] = load i8, i8* [[ADDR_LE]], align 1 +; CHECK-NEXT: ret i8 [[RES_LE]] +; +entry: + %a = alloca [32 x i8] + %a.raw = bitcast [32 x i8]* %a to i8* + call void @init(i8* %a.raw) + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + call void @unknown() ;; may throw + %addr = getelementptr i8, i8* %a.raw, i32 31 + %res = load i8, i8* %addr + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i8 %res +} + +define i8 @test_hoist_alloca() { +; CHECK-LABEL: @test_hoist_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: [[A_RAW:%.*]] = bitcast [32 x i8]* [[A]] to i8* +; CHECK-NEXT: call void @init(i8* [[A_RAW]]) +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @use(i8 [[RES]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i8 [[RES_LCSSA]] +; +entry: + %a = alloca [32 x i8] + %a.raw = bitcast [32 x i8]* %a to i8* + call void @init(i8* %a.raw) + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + call void @unknown() ;; may throw + %addr = getelementptr i8, i8* %a.raw, i32 31 + %res = load i8, i8* %addr + call void @use(i8 %res) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i8 %res +} + +; The attributes listed here are a) inferred by -O3 from the names +; and b) required for a standalone test. We're very inconsistent about +; which decisions we drive from TLI vs assume attributes have been infered. +declare void @free(i8* nocapture) +declare noalias i8* @malloc(i64) + +define i8 @test_sink_malloc() { +; CHECK-LABEL: @test_sink_malloc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_RAW:%.*]] = call nonnull i8* @malloc(i64 32) +; CHECK-NEXT: call void @init(i8* [[A_RAW]]) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: [[ADDR_LE:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 +; CHECK-NEXT: [[RES_LE:%.*]] = load i8, i8* [[ADDR_LE]], align 1 +; CHECK-NEXT: call void @free(i8* [[A_RAW]]) +; CHECK-NEXT: ret i8 [[RES_LE]] +; +entry: + ; Mark as nonnull to simplify test + %a.raw = call nonnull i8* @malloc(i64 32) + call void @init(i8* %a.raw) + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + call void @unknown() ;; may throw + %addr = getelementptr i8, i8* %a.raw, i32 31 + %res = load i8, i8* %addr + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + call void @free(i8* %a.raw) + ret i8 %res +} + +define i8 @test_hoist_malloc() { +; CHECK-LABEL: @test_hoist_malloc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_RAW:%.*]] = call nonnull i8* @malloc(i64 32) +; CHECK-NEXT: call void @init(i8* [[A_RAW]]) +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @use(i8 [[RES]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], [[FOR_BODY]] ] +; CHECK-NEXT: call void @free(i8* [[A_RAW]]) +; CHECK-NEXT: ret i8 [[RES_LCSSA]] +; +entry: + %a.raw = call nonnull i8* @malloc(i64 32) + call void @init(i8* %a.raw) + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + call void @unknown() ;; may throw + %addr = getelementptr i8, i8* %a.raw, i32 31 + %res = load i8, i8* %addr + call void @use(i8 %res) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + call void @free(i8* %a.raw) + ret i8 %res +} + +define i8 @test_hoist_malloc_leak() { +; CHECK-LABEL: @test_hoist_malloc_leak( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_RAW:%.*]] = call nonnull i8* @malloc(i64 32) +; CHECK-NEXT: call void @init(i8* [[A_RAW]]) +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @use(i8 [[RES]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i8 [[RES_LCSSA]] +; +entry: + %a.raw = call nonnull i8* @malloc(i64 32) + call void @init(i8* %a.raw) + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + call void @unknown() ;; may throw + %addr = getelementptr i8, i8* %a.raw, i32 31 + %res = load i8, i8* %addr + call void @use(i8 %res) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i8 %res +} + +; In this case, we can't hoist the load out of the loop as the memory it +; accesses may have been conditionally freed in a manner correlated with +; whether the load is reached in the loop. +define void @test_hoist_malloc_cond_free(i1 %c) { +; CHECK-LABEL: @test_hoist_malloc_cond_free( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_RAW:%.*]] = call nonnull i8* @malloc(i64 32) +; CHECK-NEXT: call void @init(i8* [[A_RAW]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[COND_FREE:%.*]], label [[PREHEADER:%.*]] +; CHECK: cond.free: +; CHECK-NEXT: call void @free(i8* [[A_RAW]]) +; CHECK-NEXT: br label [[PREHEADER]] +; CHECK: preheader: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[PREHEADER]] ] +; CHECK-NEXT: br i1 [[C]], label [[FOR_END:%.*]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @use(i8 [[RES]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %a.raw = call nonnull i8* @malloc(i64 32) + call void @init(i8* %a.raw) + br i1 %c, label %cond.free, label %preheader +cond.free: + call void @free(i8* %a.raw) + br label %preheader +preheader: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %preheader ] + br i1 %c, label %for.end, label %loop.latch + +loop.latch: + call void @unknown() ;; may throw + %addr = getelementptr i8, i8* %a.raw, i32 31 + %res = load i8, i8* %addr + call void @use(i8 %res) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} +