Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2188,6 +2188,68 @@ return Changed; } +/// +/// Check whether the address `AI` of a local variable may escape from the +/// function +/// +static bool isEscaping(const AllocaInst *AI) { + SmallPtrSet Visited; + SmallVector Worklist; + + Worklist.push_back(AI); + while (!Worklist.empty()) { + const Value *V = Worklist.pop_back_val(); + for (const User *U : V->users()) { + const Instruction *I = dyn_cast(U); + if (I == nullptr) + continue; + switch (I->getOpcode()) { + default: + // If not handled in one of the other cases, conservatively assume the + // value escapes. + return true; + case Instruction::Load: + case Instruction::Ret: + case Instruction::AtomicRMW: + // A pointer cannot escape via these instructions. + break; + case Instruction::Call: { + const auto *CI = cast(I); + if (CI->isDebugOrPseudoInst() || CI->isLifetimeStartOrEnd()) + break; + } + LLVM_FALLTHROUGH; + case Instruction::Invoke: + case Instruction::CallBr: + if (any_of(cast(I)->data_ops(), + [=](const Use &Arg) { return Arg.get() == V; })) + return true; + break; + case Instruction::Store: + if (V == cast(I)->getValueOperand()) + return true; + break; + case Instruction::AtomicCmpXchg: + if (V == cast(I)->getNewValOperand()) + return true; + break; + case Instruction::PHI: + if (!Visited.insert(cast(I)).second) + break; + LLVM_FALLTHROUGH; + case Instruction::GetElementPtr: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + case Instruction::Select: + Worklist.push_back(I); + break; + } + } + } + + return false; +} + /// Determine if we can hoist sink a sole store instruction out of a /// conditional block. /// @@ -2250,6 +2312,19 @@ return SI->getValueOperand(); return nullptr; // Unknown store. } + + if (auto *LI = dyn_cast(&CurI)) { + if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy && + LI->isSimple()) { + // "Stack" memory is always writable, so we cannot trap. If the address + // does not escape, we cannot introduce a data race either. + auto *AI = dyn_cast(getUnderlyingObject(StorePtr)); + if (AI && !isEscaping(AI)) + // Found a previous load, return it. + return LI; + } + // The load didn't work out, but we may still find a store. + } } return nullptr; Index: llvm/test/Transforms/SimplifyCFG/speculate-store.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/speculate-store.ll +++ llvm/test/Transforms/SimplifyCFG/speculate-store.ll @@ -175,6 +175,47 @@ ret void } +;; Speculate a store, preceded by a local, non-escaping load +define i32 @f(i64 %i, i32 %b) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[A]] to i64* +; CHECK-NEXT: store i64 4294967296, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 [[I:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[TMP1]] +; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + %a = alloca [2 x i32], align 8 + %0 = bitcast [2 x i32]* %a to i64* + store i64 4294967296, i64* %0, align 8 + %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 %i + %1 = load i32, i32* %arrayidx, align 4 + %cmp = icmp slt i32 %1, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 %b, i32* %arrayidx, align 4 + br label %if.end + +if.end: + %arrayidx1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 + %2 = load i32, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 + %3 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %2, %3 + ret i32 %add +} + ; CHECK: !0 = !{!"branch_weights", i32 3, i32 5} !0 = !{!"branch_weights", i32 3, i32 5}