Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -3670,7 +3670,43 @@ // The called function could have undefined behavior or side-effects, even // if marked readnone nounwind. - return Callee && Callee->isSpeculatable(); + if (Callee && Callee->isSpeculatable()) { + const DataLayout &DL = CI->getModule()->getDataLayout(); + auto CS = ImmutableCallSite(CI); + + // Function arguments can have constraints, such as nonnull, the violation of + // which yield undefined behavior (not just a poison value). Functions with + // such attributed arguments cannot be speculated unless we can validate the + // conditions at the point of speculation. + for (auto &A : CS.args()) { + unsigned AIdx = A.getOperandNo(); + if (CS.paramHasAttr(AIdx, Attribute::NonNull) && + // FIXME: Pass AC here. + !isKnownNonZero(&*A, DL, 0, nullptr, CtxI, DT)) + return false; + + if (uint64_t B = CS.getDereferenceableBytes(AIdx)) { + bool CanBeNull; + if (B < A->getPointerDereferenceableBytes(DL, CanBeNull) || + CanBeNull) + return false; + } + + if (uint64_t B = CS.getDereferenceableOrNullBytes(AIdx)) { + bool CanBeNull; + if (B < A->getPointerDereferenceableBytes(DL, CanBeNull)) + return false; + } + + if (!CS.isByValOrInAllocaArgument(AIdx)) { + unsigned Align = CS.getParamAlignment(AIdx); + if (Align > 1 && Align < A->getPointerAlignment(DL)) + return false; + } + } + + return true; + } } case Instruction::VAArg: case Instruction::Alloca: Index: lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- lib/Transforms/IPO/FunctionAttrs.cpp +++ lib/Transforms/IPO/FunctionAttrs.cpp @@ -74,6 +74,7 @@ STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); +STATISTIC(NumSpeculatable, "Number of functions marked as speculatable"); // FIXME: This is disabled by default to avoid exposing security vulnerabilities // in C/C++ code compiled by clang: @@ -87,6 +88,10 @@ "disable-nounwind-inference", cl::Hidden, cl::desc("Stop inferring nounwind attribute during function-attrs pass")); +static cl::opt DisableSpeculatableInference( + "disable-speculatable-inference", cl::Hidden, + cl::desc("Stop inferring speculatable attribute during function-attrs pass")); + namespace { using SCCNodeSet = SmallSetVector; @@ -1193,6 +1198,102 @@ return true; } +/// Helper for Speculatable inference predicate InstrBreaksAttribute. +static bool InstrBreaksSpeculatable(Instruction &I) { + // Note that we don't bother to check here for calls to functions within the + // current SCC for two reasons: + // 1. It wouldn't clearly be useful. As we don't allow functions with + // branches any call would be called unconditionally, and we shouldn't + // speculate infinite recursion. + // 2. We'd need to avoid triggering UB by passing poison values, or + // otherwise invalid values, to function arguments with constraints (e.g., + // nonnull). + + // If instructions have metadata that can't be speculated, then a call to the + // function can't be speculated. Generally, if we speculate an instruction + // with metadata we can just drop the metadata. However, if we speculate a + // call, then we can't drop the metadata inside the called function. + if (I.hasMetadataOtherThanDebugLoc()) + return true; + + auto &DL = I.getParent()->getParent()->getParent()->getDataLayout(); + + // If a load has an alignment requirement, we need to make sure that we can + // justify this constraint using only function-argument attributes or global + // information (otherwise, this requirement might be violated when the call + // is executed speculatively). + if (auto *LI = dyn_cast(&I)) { + unsigned Align = LI->getAlignment(); + if (!Align) + Align = DL.getABITypeAlignment(LI->getType()); + if (Align < LI->getPointerOperand()->getPointerAlignment(DL)) + return true; + } + + // Note that we need to assume here that any inputs to the function might be + // poison, and in response, we can generate more poison, but not undefined + // behavior. This means no branching (and essentially no stores). + + // Returns are okay in this context. + if (isa(I)) + return false; + + return !isSafeToSpeculativelyExecute(&I); +} + +static bool FuncHasNonSpecRetAttrs(const Function &F) { + // If the function has return-value attributes that impose value constraints + // then we cannot speculate the execution of the function (as the violation + // of those constraints is UB, and they might not be true when the execution + // is speculative). This is true unless with an independently prove the + // constraint (this should be safe because we later disqualify functions that + // have local metadata that might prove the constraint but can't be + // speculated). + + // Collect all return instructions. + SmallVector RetInsts; + for (auto &BB : F) + for (auto &I : BB) + if (auto *RI = dyn_cast(&I)) + if (RI->getNumOperands() != 0) + RetInsts.push_back(RI); + + const DataLayout &DL = F.getParent()->getDataLayout(); + + if (F.returnDoesNotAlias()) + return true; + + if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, + Attribute::NonNull)) + for (auto *RI : RetInsts) + if (!isKnownNonZero(RI->getReturnValue(), DL)) + return true; + + if (uint64_t B = F.getDereferenceableBytes(AttributeList::ReturnIndex)) + for (auto *RI : RetInsts) { + bool CanBeNull; + if (B < RI->getReturnValue()-> + getPointerDereferenceableBytes(DL, CanBeNull) || CanBeNull) + return true; + } + + if (uint64_t B = F.getDereferenceableOrNullBytes(AttributeList::ReturnIndex)) + for (auto *RI : RetInsts) { + bool CanBeNull; + if (B < RI->getReturnValue()-> + getPointerDereferenceableBytes(DL, CanBeNull)) + return true; + } + + unsigned Align = F.getAttributes().getRetAlignment(); + if (Align > 1) + for (auto *RI : RetInsts) + if (Align > RI->getReturnValue()->getPointerAlignment(DL)) + return true; + + return false; +} + /// Infer attributes from all functions in the SCC by scanning every /// instruction for compliance to the attribute assumptions. Currently it /// does: @@ -1246,6 +1347,29 @@ }, /* RequiresExactDefinition= */ true}); + if (!DisableSpeculatableInference) + // Request to infer speculatable attribute for all the functions in the SCC if + // every instruction within the SCC is safe to speculatively execute (except + // for calls to functions within the SCC). Note that the speculatable attribute + // suffers from derefinement - results may change depending on how functions are + // optimized. Thus it can be inferred only from exact definitions. + AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ + Attribute::Speculatable, + // Skip speculatable functions. + [](const Function &F) { return F.isSpeculatable() || + FuncHasNonSpecRetAttrs(F); }, + // Instructions that break non-throwing assumption. + [](Instruction &I) { + return InstrBreaksSpeculatable(I); + }, + [](Function &F) { + LLVM_DEBUG(dbgs() + << "Adding speculatable attr to fn " << F.getName() << "\n"); + F.setSpeculatable(); + ++NumSpeculatable; + }, + /* RequiresExactDefinition= */ true}); + // Perform all the requested attribute inference actions. return AI.run(SCCNodes); } Index: test/CodeGen/AMDGPU/inline-attr.ll =================================================================== --- test/CodeGen/AMDGPU/inline-attr.ll +++ test/CodeGen/AMDGPU/inline-attr.ll @@ -6,13 +6,13 @@ ; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 { ; GCN: %mul.i = fmul float %load, 1.500000e+01 -; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } +; UNSAFE: attributes #0 = { norecurse nounwind readnone speculatable "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } ; UNSAFE: attributes #1 = { norecurse nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } -; NOINFS: attributes #0 = { norecurse nounwind readnone "no-infs-fp-math"="true" } +; NOINFS: attributes #0 = { norecurse nounwind readnone speculatable "no-infs-fp-math"="true" } ; NOINFS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } -; NONANS: attributes #0 = { norecurse nounwind readnone "no-nans-fp-math"="true" } +; NONANS: attributes #0 = { norecurse nounwind readnone speculatable "no-nans-fp-math"="true" } ; NONANS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } define float @foo(float %x) #0 { Index: test/Transforms/FunctionAttrs/atomic.ll =================================================================== --- test/Transforms/FunctionAttrs/atomic.ll +++ test/Transforms/FunctionAttrs/atomic.ll @@ -20,5 +20,59 @@ ret i32 %r } +define i32 @test3(i32* align 4 %x) uwtable ssp { +; CHECK: define i32 @test3(i32* nocapture readonly align 4 %x) #2 { +entry: + %r = load i32, i32* %x, align 4 + ret i32 %r +} + +define i32 @test4(i32* align 4 dereferenceable(4) %x) uwtable ssp { +; CHECK: define i32 @test4(i32* nocapture readonly align 4 dereferenceable(4) %x) #3 { +entry: + %r = load i32, i32* %x, align 4 + ret i32 %r +} + +define i32* @test5(i32** align 4 dereferenceable(4) %x) uwtable ssp { +; CHECK: define nonnull i32* @test5(i32** nocapture readonly align 4 dereferenceable(4) %x) #2 { +entry: + %r = load i32*, i32** %x, align 4, !nonnull !0 + ret i32* %r +} + +define i32 @test6(i32* align 4 dereferenceable(4) %x) uwtable ssp { +; CHECK: define i32 @test6(i32* nocapture readonly align 4 dereferenceable(4) %x) #2 { +entry: + %r = load i32, i32* %x, align 4, !range !1 + ret i32 %r +} + +define noalias i32* @test7(i32** align 4 dereferenceable(4) %x) uwtable ssp { +; CHECK: define noalias i32* @test7(i32** nocapture readonly align 4 dereferenceable(4) %x) #2 { +entry: + %r = load i32*, i32** %x, align 4 + ret i32* %r +} + +define align 4 i32* @test8(i32* align 4 %x) uwtable ssp { +; CHECK: define align 4 i32* @test8(i32* readnone returned align 4 %x) #4 { +entry: + ret i32* %x +} + +define align 8 i32* @test9(i32* align 4 %x) uwtable ssp { +; CHECK: define align 8 i32* @test9(i32* readnone returned align 4 %x) #0 { +entry: + ret i32* %x +} + +!0 = !{} +!1 = !{ i32 0, i32 2000 } + ; CHECK: attributes #0 = { norecurse nounwind readnone ssp uwtable } ; CHECK: attributes #1 = { norecurse nounwind ssp uwtable } +; CHECK: attributes #2 = { norecurse nounwind readonly ssp uwtable } +; CHECK: attributes #3 = { norecurse nounwind readonly speculatable ssp uwtable } +; CHECK: attributes #4 = { norecurse nounwind readnone speculatable ssp uwtable } + Index: test/Transforms/FunctionAttrs/optnone.ll =================================================================== --- test/Transforms/FunctionAttrs/optnone.ll +++ test/Transforms/FunctionAttrs/optnone.ll @@ -20,6 +20,6 @@ ; CHECK: (i8*) #1 ; CHECK-LABEL: attributes #0 -; CHECK: = { norecurse nounwind readnone } +; CHECK: = { norecurse nounwind readnone speculatable } ; CHECK-LABEL: attributes #1 ; CHECK: = { noinline optnone } Index: test/Transforms/Reassociate/reassociate-deadinst.ll =================================================================== --- test/Transforms/Reassociate/reassociate-deadinst.ll +++ test/Transforms/Reassociate/reassociate-deadinst.ll @@ -1,4 +1,7 @@ -; RUN: opt < %s -inline -functionattrs -reassociate -S | FileCheck %s +; RUN: opt < %s -inline -functionattrs -reassociate -disable-speculatable-inference -S | FileCheck %s +; Note: We disable inference of the speculatable attribute here so that the +; call to @func1 ends up in the ValueRankMap (requiring mayBeMemoryDependent to +; return true) so that the call will be removed after its uses are removed. ; CHECK-NOT: func1 ; CHECK-LABEL: main