Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp @@ -1034,7 +1034,8 @@ if (!SafeToInsertStore) { Value *Object = GetUnderlyingObject(SomePtr, MDL); SafeToInsertStore = - isAllocLikeFn(Object, TLI) && !PointerMayBeCaptured(Object, true, true); + (isAllocLikeFn(Object, TLI) || isa(Object)) && + !PointerMayBeCaptured(Object, true, true); } // If we've still failed to prove we can sink the store, give up. Index: llvm/trunk/test/Transforms/LICM/promote-tls.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/promote-tls.ll +++ llvm/trunk/test/Transforms/LICM/promote-tls.ll @@ -12,6 +12,7 @@ declare i8* @malloc(i64) ; Exercise the TLS case +; CHECK-LABEL: @test define i32* @test(i32 %n) { entry: ;; ignore the required null check for simplicity @@ -49,9 +50,48 @@ ret i32* null } +; Stack allocations can also be thread-local +; CHECK-LABEL: @test2 +define i32* @test2(i32 %n) { +entry: + %mem = alloca i8, i32 16 + %addr = bitcast i8* %mem to i32* + br label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + br label %for.header + +for.header: + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %old = load i32, i32* %addr, align 4 + ; deliberate impossible to analyze branch + %guard = load atomic i8*, i8** @p monotonic, align 8 + %exitcmp = icmp eq i8* %guard, null + br i1 %exitcmp, label %for.body, label %early-exit + +early-exit: +; CHECK-LABEL: early-exit: +; CHECK: store i32 %new1.lcssa, i32* %addr, align 1 + ret i32* null + +for.body: + %new = add i32 %old, 1 + store i32 %new, i32* %addr, align 4 + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body +; CHECK-LABEL: for.cond.for.end_crit_edge: +; CHECK: store i32 %new.lcssa, i32* %addr, align 1 + %split = phi i32* [ %addr, %for.body ] + ret i32* null +} + declare i8* @not_malloc(i64) ; Negative test - not TLS +; CHECK-LABEL: @test_neg define i32* @test_neg(i32 %n) { entry: ;; ignore the required null check for simplicity @@ -93,6 +133,7 @@ ; Negative test - can't speculate load since branch ; may control alignment +; CHECK-LABEL: @test_neg2 define i32* @test_neg2(i32 %n) { entry: ;; ignore the required null check for simplicity @@ -131,4 +172,3 @@ %split = phi i32* [ %addr, %for.body ] ret i32* null } -