diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -621,6 +621,8 @@ to be placed in and may have an optional explicit alignment specified. If there is a mismatch between the explicit or inferred section information for the variable declaration and its definition the resulting behavior is undefined. +Global variables with explicit sections will not be constant propagated between +functions with differing explicit sections. A variable may be defined as a global ``constant``, which indicates that the contents of the variable will **never** be modified (enabling better diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -176,6 +176,7 @@ LLVMContext &Ctx; public: + SmallPtrSet NonReplaceable; void addAnalysis(Function &F, AnalysisResultsForFn A) { AnalysisResults.insert({&F, std::move(A)}); } @@ -354,6 +355,18 @@ return nullptr; } + bool isGVSection(Constant *C) const { + if (auto *GV = dyn_cast(C)) + if (GV->hasSection()) + return NonReplaceable.count(C); + if (auto *CE = dyn_cast(C)) + for (Use &Op : CE->operands()) + if (auto *COp = dyn_cast(Op)) + if (isGVSection(COp)) + return true; + return false; + } + private: ConstantInt *getConstantInt(const ValueLatticeElement &IV) const { return dyn_cast_or_null(getConstant(IV)); @@ -1236,6 +1249,12 @@ void SCCPSolver::handleCallArguments(CallBase &CB) { Function *F = CB.getCalledFunction(); + bool SectionMismatch = false; + + if (!isa(CB) && F) + if (F->getSection() != CB.getParent()->getParent()->getSection()) + SectionMismatch = true; + // If this is a local function that doesn't have its address taken, mark its // entry block executable and merge in the actual arguments to the call into // the formal arguments of the function. @@ -1256,12 +1275,18 @@ if (auto *STy = dyn_cast(AI->getType())) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - ValueLatticeElement CallArg = getStructValueState(*CAI, i); + ValueLatticeElement &CallArg = getStructValueState(*CAI, i); + if (SectionMismatch && CallArg.isConstant()) + NonReplaceable.insert(CallArg.getConstant()); mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg, getMaxWidenStepsOpts()); } - } else - mergeInValue(&*AI, getValueState(*CAI), getMaxWidenStepsOpts()); + } else { + ValueLatticeElement &CallArg = getValueState(*CAI); + if (SectionMismatch && CallArg.isConstant()) + NonReplaceable.insert(CallArg.getConstant()); + mergeInValue(&*AI, CallArg, getMaxWidenStepsOpts()); + } } } } @@ -1658,6 +1683,9 @@ return false; } + if (Solver.isGVSection(Const)) + return false; + LLVM_DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n'); // Replaces all of the uses of a variable with uses of the constant. diff --git a/llvm/test/Transforms/SCCP/ipsccp-function-sections.ll b/llvm/test/Transforms/SCCP/ipsccp-function-sections.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SCCP/ipsccp-function-sections.ll @@ -0,0 +1,142 @@ +; RUN: opt -passes=ipsccp -S -o - %s | FileCheck %s +; load test +@foo = dso_local global i64 zeroinitializer, section ".init.data", align 8 +@bar = dso_local global i64 zeroinitializer, align 8 + +define internal void @quux(i64* %x, i64* %y) { +; The load of %y should be replaced with a load of @bar, because @bar is not in +; any explicit section. +; +; The load of %x should *not* be replaced with a load of @foo, because @foo is +; in an explicit section, and the explicit sections of the caller and callee do +; not match. +; CHECK-LABEL: @quux( +; CHECK-NEXT: %z = load i64, i64* %x, align 4 +; CHECK-NEXT: %w = load i64, i64* @bar, align 4 + + %z = load i64, i64* %x + %w = load i64, i64* %y + ret void +} + +define dso_local void @baz() section ".init.text" { + call void @quux(i64* @foo, i64* @bar) + ret void +} + +; store test +@g1 = dso_local global i64 zeroinitializer, section ".init.data", align 8 +@g2 = dso_local global i64 zeroinitializer, align 8 + +define internal void @g3(i64* %x, i64* %y) { +; The store of %y should be replaced with a store of @g2, because @g2 is not in +; any explicit section. +; +; The store of %x should *not* be replaced with a store of @g1, because @g1 is +; in an explicit section, and the explicit sections of the caller and callee do +; not match. +; CHECK-LABEL: @g3( +; CHECK-NEXT: store i64 42, i64* %x, align 4 +; CHECK-NEXT: store i64 42, i64* @g2, align 4 + store i64 42, i64* %x + store i64 42, i64* %y + ret void +} +define dso_local void @g4() section ".init.text" { + call void @g3(i64* @g1, i64* @g2) + ret void +} + +; GEP test +%struct.e = type { i64 } +@numa_nodes_parsed = dso_local global %struct.e zeroinitializer, section ".init.data", align 8 +@numa_nodes_parsed2 = dso_local global %struct.e zeroinitializer, align 8 + +define internal i64 @__nodes_weight(%struct.e* %g, %struct.e* %h) section ".init.text" { +; The GEP of %g should not be replaced with a GEP of @numa_nodes_parsed. +; CHECK-LABEL: @__nodes_weight( +; CHECK-NEXT: %bits = getelementptr inbounds %struct.e, %struct.e* %g, i32 0, i32 0 +; CHECK-NEXT: %x = load i64, i64* %bits, align 8 +; CHECK-NEXT: %x2 = load i64, i64* getelementptr inbounds (%struct.e, %struct.e* @numa_nodes_parsed2, i32 0, i32 0), align 8 + + %bits = getelementptr inbounds %struct.e, %struct.e* %g, i32 0, i32 0 + %x = load i64, i64* %bits, align 8 + %bits2 = getelementptr inbounds %struct.e, %struct.e* %h, i32 0, i32 0 + %x2 = load i64, i64* %bits2, align 8 + ret i64 %x +} + +define dso_local void @amd_numa_init() { +; CHECK-LABEL: @amd_numa_init( +; CHECK-NEXT: %call6 = call i64 @__nodes_weight(%struct.e* @numa_nodes_parsed, %struct.e* @numa_nodes_parsed2) + + %call6 = call i64 @__nodes_weight(%struct.e* @numa_nodes_parsed, %struct.e* @numa_nodes_parsed2) + ret void +} + +; nested GEP +declare dso_local void @some_func(i8*) + +define internal void @test_bit(i64* %addr) { +; CHECK-LABEL: @test_bit( +; CHECK-NEXT: %add.ptr = getelementptr i64, i64* %addr, i64 1 +; CHECK-NEXT: %foo = bitcast i64* %add.ptr to i8* +; CHECK-NEXT: call void @some_func(i8* %foo) + + %add.ptr = getelementptr i64, i64* %addr, i64 1 + %foo = bitcast i64* %add.ptr to i8* + call void @some_func(i8* %foo) + ret void +} +define dso_local void @caller2() section ".init.text" { + call void @test_bit(i64* getelementptr inbounds (%struct.e, %struct.e* @numa_nodes_parsed, i32 0, i32 0)) + ret void +} + +; Multiple callers +define internal void @test5_callee(i64* %x, i64* %y) { +; CHECK-LABEL: @test5_callee( +; CHECK-NEXT: %z = load i64, i64* %x, align 4 +; CHECK-NEXT: %w = load i64, i64* @bar, align 4 + %z = load i64, i64* %x + %w = load i64, i64* %y + ret void +} + +define dso_local void @test5_caller1(i64* %z) section ".init.text" { + %c = icmp eq i64* %z, @foo + br i1 %c, label %bb.1, label %bb.2 + +bb.1: + call void @test5_callee(i64* @foo, i64* @bar) + ret void + +bb.2: + ret void +} + +define dso_local void @test5_caller2() section ".init.text" { + call void @test5_callee(i64* @foo, i64* @bar) + ret void +} + +; Test return value propagation. While we should not sink @foo in +; @test6_callee, we should be able to propagate the constant true back out into +; @test6_caller. +define internal i1 @test6_callee(i64* %x, i64* %y) { +; We don't really care about the callee, so long as it does not contain a +; reference to @foo. +; CHECK-LABEL: @test6_callee( +; CHECK-NEXT: ret i1 undef + %c = icmp eq i64* %x, %y + ret i1 %c +} + +define dso_local i1 @test6_caller() section ".init.text" { +; Check that we're able to replace the return with a constant true value. +; CHECK-LABEL: @test6_caller( +; CHECK-NEXT: %c = call i1 @test6_callee(i64* @foo, i64* @foo) +; CHECK-NEXT: ret i1 true + %c = call i1 @test6_callee(i64* @foo, i64* @foo) + ret i1 %c +}