Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -999,14 +999,35 @@ const DataLayout &MDL = Preheader->getModule()->getDataLayout(); + // This really is a cache for whether the pointer is captured or not. + // PointerMayBeCaptured can be expensive, so its worth caching it. + Optional AllocFnCaptureStatus = None; if (SafetyInfo->MayThrow) { // If a loop can throw, we have to insert a store along each unwind edge. // That said, we can't actually make the unwind edge explicit. Therefore, // we have to prove that the store is dead along the unwind edge. // - // Currently, this code just special-cases alloca instructions. - if (!isa(GetUnderlyingObject(SomePtr, MDL))) + // If the underlying object is not an alloca, nor a pointer that does not + // escape, then we can not effectively prove that the store is dead along + // the unwind edge. i.e. the caller of this function could have ways to + // access the pointed object. + // + // NOTE: PointerMayBeCaptured is not enough as the pointer may have escaped + // even though its not captured by the enclosing function. Standard allocation + // functions like malloc, calloc, and operator new return values which can + // be assumed not to have previously escaped. + Value *Object = GetUnderlyingObject(SomePtr, MDL); + // If this is some base pointer we do not understand, simply bail. + if (!isa(Object) && !isAllocLikeFn(Object, TLI)) return false; + // This is a pointer from alloc-like fn, there are extra constraints we + // have to verify. More specifically, we must make sure that the pointer + // can not be captured. + if (isAllocLikeFn(Object, TLI)) { + if (PointerMayBeCaptured(Object, true, true)) + return false; + AllocFnCaptureStatus = false; + } } // Check that all of the pointers in the alias set have the same type. We @@ -1110,8 +1131,13 @@ // memory model. if (!SafeToInsertStore) { Value *Object = GetUnderlyingObject(SomePtr, MDL); - SafeToInsertStore = - (isAllocLikeFn(Object, TLI) || isa(Object)) && + // We have computed the capture status of the object returned by the + // alloc-like calls. + if (isAllocLikeFn(Object, TLI) && AllocFnCaptureStatus.hasValue()) + SafeToInsertStore = !*AllocFnCaptureStatus; + else + SafeToInsertStore = + (isAllocLikeFn(Object, TLI) || isa(Object)) && !PointerMayBeCaptured(Object, true, true); } Index: test/Transforms/LICM/scalar-promote-unwind.ll =================================================================== --- test/Transforms/LICM/scalar-promote-unwind.ll +++ test/Transforms/LICM/scalar-promote-unwind.ll @@ -137,14 +137,125 @@ resume { i8*, i32 } %lpad.val3 } -declare void @boo() -declare i32 @__gxx_personality_v0(...) +; The malloc'ed memory is not capture and therefore promoted. +define void @malloc_no_capture() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %call = call i8* @malloc(i64 4) + %0 = bitcast i8* %call to i32* + br label %for.body -declare i32 @llvm.eh.typeid.for(i8*) +; CHECK: for.body: +; CHECK-NOT: load +; CHECK-NOT: store +; CHECK: br +for.body: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.latch ] + %1 = load i32, i32* %0, align 4 + %add = add nsw i32 %1, 1 + store i32 %add, i32* %0, align 4 + br label %for.call + +for.call: + invoke void @boo() + to label %invoke.cont unwind label %lpad + +invoke.cont: + br label %for.latch + +for.latch: + %inc = add i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, 1024 + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %fun.ret + +lpad: + %2 = landingpad { i8*, i32 } + catch i8* null + %3 = extractvalue { i8*, i32 } %2, 0 + %4 = extractvalue { i8*, i32 } %2, 1 + br label %catch + +catch: + %5 = call i8* @__cxa_begin_catch(i8* %3) #4 + %6 = bitcast i32* %0 to i8* + call void @free(i8* %6) + call void @__cxa_end_catch() + br label %fun.ret + +fun.ret: + ret void +} + +; The malloc'ed memory can be captured and therefore not promoted. +define void @malloc_capture() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %call = call i8* @malloc(i64 4) + %0 = bitcast i8* %call to i32* + br label %for.body + +; CHECK: for.body: +; CHECK: load +; CHECK: store +; CHECK: br +for.body: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.latch ] + %1 = load i32, i32* %0, align 4 + %add = add nsw i32 %1, 1 + store i32 %add, i32* %0, align 4 + br label %for.call + +for.call: + invoke void @boo_with_arg(i32* %0) + to label %invoke.cont unwind label %lpad + +invoke.cont: + br label %for.latch + +for.latch: + %inc = add i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, 1024 + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %fun.ret + +lpad: + %2 = landingpad { i8*, i32 } + catch i8* null + %3 = extractvalue { i8*, i32 } %2, 0 + %4 = extractvalue { i8*, i32 } %2, 1 + br label %catch + +catch: + %5 = call i8* @__cxa_begin_catch(i8* %3) #4 + %6 = bitcast i32* %0 to i8* + call void @free(i8* %6) + call void @__cxa_end_catch() + br label %fun.ret + +fun.ret: + ret void +} + +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) + +; Function Attrs: nounwind +declare void @free(i8* nocapture) + +declare void @boo() + +declare void @boo_with_arg(i32 *) + +declare i32 @__gxx_personality_v0(...) declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() +declare i32 @llvm.eh.typeid.for(i8*) + declare void @f() uwtable