Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GuardUtils.h" @@ -2250,6 +2251,19 @@ return SI->getValueOperand(); return nullptr; // Unknown store. } + + if (auto *LI = dyn_cast(&CurI)) { + if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy && + LI->isSimple()) { + // "Stack" memory is always writable, so we cannot trap. If the address + // does not escape, we cannot introduce a data race either. + auto *AI = dyn_cast(getUnderlyingObject(StorePtr)); + if (AI && !PointerMayBeCaptured(AI, false, true)) + // Found a previous load, return it. + return LI; + } + // The load didn't work out, but we may still find a store. + } } return nullptr; Index: llvm/test/Transforms/SimplifyCFG/speculate-store.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/speculate-store.ll +++ llvm/test/Transforms/SimplifyCFG/speculate-store.ll @@ -175,6 +175,47 @@ ret void } +;; Speculate a store, preceded by a local, non-escaping load +define i32 @f(i64 %i, i32 %b) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[A]] to i64* +; CHECK-NEXT: store i64 4294967296, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 [[I:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[TMP1]] +; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + %a = alloca [2 x i32], align 8 + %0 = bitcast [2 x i32]* %a to i64* + store i64 4294967296, i64* %0, align 8 + %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 %i + %1 = load i32, i32* %arrayidx, align 4 + %cmp = icmp slt i32 %1, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 %b, i32* %arrayidx, align 4 + br label %if.end + +if.end: + %arrayidx1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 + %2 = load i32, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 + %3 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %2, %3 + ret i32 %add +} + ; CHECK: !0 = !{!"branch_weights", i32 3, i32 5} !0 = !{!"branch_weights", i32 3, i32 5}