diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -151,18 +151,25 @@ /// result will be based only on AA results for the function declaration; it /// will be assumed that some other (perhaps less optimized) version of the /// function may be selected at link time. -static MemoryEffects checkFunctionMemoryAccess(Function &F, bool ThisBody, - AAResults &AAR, - const SCCNodeSet &SCCNodes) { +/// +/// The return value is split into two parts: Memory effects that always apply, +/// and additional memory effects that apply if any of the functions in the SCC +/// can access argmem. +static std::pair +checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, + const SCCNodeSet &SCCNodes) { MemoryEffects OrigME = AAR.getMemoryEffects(&F); if (OrigME.doesNotAccessMemory()) // Already perfect! - return OrigME; + return {OrigME, MemoryEffects::none()}; if (!ThisBody) - return OrigME; + return {OrigME, MemoryEffects::none()}; MemoryEffects ME = MemoryEffects::none(); + // Additional locations accessed if the SCC accesses argmem. + MemoryEffects RecursiveArgME = MemoryEffects::none(); + // Inalloca and preallocated arguments are always clobbered by the call. if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) @@ -173,13 +180,19 @@ // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. if (auto *Call = dyn_cast(&I)) { - // Ignore calls to functions in the same SCC, as long as the call sites - // don't have operand bundles. Calls with operand bundles are allowed to - // have memory effects not described by the memory effects of the call - // target. + // We can optimistically ignore calls to functions in the same SCC, with + // two caveats: + // * Calls with operand bundles may have additional effects. + // * Argument memory accesses may imply additional effects depending on + // what the argument location is. if (!Call->hasOperandBundles() && Call->getCalledFunction() && - SCCNodes.count(Call->getCalledFunction())) + SCCNodes.count(Call->getCalledFunction())) { + // Keep track of which additional locations are accessed if the SCC + // turns out to access argmem. + addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR); continue; + } + MemoryEffects CallME = AAR.getMemoryEffects(Call); // If the call doesn't access memory, we're done. @@ -232,12 +245,12 @@ addLocAccess(ME, *Loc, MR, AAR); } - return OrigME & ME; + return {OrigME & ME, RecursiveArgME}; } MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR) { - return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); + return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first; } /// Deduce readonly/readnone/writeonly attributes for the SCC. @@ -245,18 +258,27 @@ static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, SmallSet &Changed) { MemoryEffects ME = MemoryEffects::none(); + MemoryEffects RecursiveArgME = MemoryEffects::none(); for (Function *F : SCCNodes) { // Call the callable parameter to look up AA results for this function. AAResults &AAR = AARGetter(*F); // Non-exact function definitions may not be selected at link time, and an // alternative version that writes to memory may be selected. See the // comment on GlobalValue::isDefinitionExact for more details. - ME |= checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); + auto [FnME, FnRecursiveArgME] = + checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); + ME |= FnME; + RecursiveArgME |= FnRecursiveArgME; // Reached bottom of the lattice, we will not be able to improve the result. if (ME == MemoryEffects::unknown()) return; } + // If the SCC accesses argmem, add recursive accesses resulting from that. + ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); + if (ArgMR != ModRefInfo::NoModRef) + ME |= RecursiveArgME & MemoryEffects(ArgMR); + for (Function *F : SCCNodes) { MemoryEffects OldME = F->getMemoryEffects(); MemoryEffects NewME = ME & OldME; diff --git a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll --- a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll @@ -334,7 +334,7 @@ ; FNATTRS: Function Attrs: nofree memory(inaccessiblemem: read) ; FNATTRS-LABEL: define void @test_inaccessiblememonly_readonly ; FNATTRS-SAME: () #[[ATTR13:[0-9]+]] { -; FNATTRS-NEXT: call void @fn_inaccessiblememonly() #[[ATTR18:[0-9]+]] +; FNATTRS-NEXT: call void @fn_inaccessiblememonly() #[[ATTR19:[0-9]+]] ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nosync memory(inaccessiblemem: read) @@ -352,7 +352,7 @@ ; FNATTRS-LABEL: define void @test_inaccessibleorargmemonly_readonly ; FNATTRS-SAME: (ptr nocapture readonly [[ARG:%.*]]) #[[ATTR14:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG]], align 4 -; FNATTRS-NEXT: call void @fn_inaccessiblememonly() #[[ATTR18]] +; FNATTRS-NEXT: call void @fn_inaccessiblememonly() #[[ATTR19]] ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nosync memory(argmem: read, inaccessiblemem: read) @@ -372,7 +372,7 @@ ; FNATTRS-LABEL: define void @test_inaccessibleorargmemonly_readwrite ; FNATTRS-SAME: (ptr nocapture writeonly [[ARG:%.*]]) #[[ATTR15:[0-9]+]] { ; FNATTRS-NEXT: store i32 0, ptr [[ARG]], align 4 -; FNATTRS-NEXT: call void @fn_inaccessiblememonly() #[[ATTR18]] +; FNATTRS-NEXT: call void @fn_inaccessiblememonly() #[[ATTR19]] ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nosync memory(argmem: readwrite, inaccessiblemem: readwrite) @@ -388,7 +388,7 @@ } define void @test_recursive_argmem_read(ptr %p) { -; FNATTRS: Function Attrs: nofree nosync nounwind memory(argmem: read) +; FNATTRS: Function Attrs: nofree nosync nounwind memory(read, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test_recursive_argmem_read ; FNATTRS-SAME: (ptr nocapture readonly [[P:%.*]]) #[[ATTR16:[0-9]+]] { ; FNATTRS-NEXT: [[PVAL:%.*]] = load ptr, ptr [[P]], align 8 @@ -408,7 +408,7 @@ } define void @test_recursive_argmem_readwrite(ptr %p) { -; FNATTRS: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) +; FNATTRS: Function Attrs: nofree nosync nounwind memory(readwrite, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test_recursive_argmem_readwrite ; FNATTRS-SAME: (ptr nocapture [[P:%.*]]) #[[ATTR17:[0-9]+]] { ; FNATTRS-NEXT: [[PVAL:%.*]] = load ptr, ptr [[P]], align 8 @@ -433,7 +433,7 @@ define void @test_recursive_argmem_read_alloca(ptr %p) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(argmem: read) ; FNATTRS-LABEL: define void @test_recursive_argmem_read_alloca -; FNATTRS-SAME: (ptr nocapture readonly [[P:%.*]]) #[[ATTR16]] { +; FNATTRS-SAME: (ptr nocapture readonly [[P:%.*]]) #[[ATTR18:[0-9]+]] { ; FNATTRS-NEXT: [[A:%.*]] = alloca ptr, align 8 ; FNATTRS-NEXT: [[TMP1:%.*]] = load i32, ptr [[P]], align 4 ; FNATTRS-NEXT: call void @test_recursive_argmem_read_alloca(ptr [[A]]) @@ -454,7 +454,7 @@ } define void @test_scc_argmem_read_1(ptr %p) { -; FNATTRS: Function Attrs: nofree nosync nounwind memory(argmem: read) +; FNATTRS: Function Attrs: nofree nosync nounwind memory(read, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test_scc_argmem_read_1 ; FNATTRS-SAME: (ptr nocapture readonly [[P:%.*]]) #[[ATTR16]] { ; FNATTRS-NEXT: [[PVAL:%.*]] = load ptr, ptr [[P]], align 8 @@ -474,7 +474,7 @@ } define void @test_scc_argmem_read_2(ptr %p) { -; FNATTRS: Function Attrs: nofree nosync nounwind memory(argmem: read) +; FNATTRS: Function Attrs: nofree nosync nounwind memory(read, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test_scc_argmem_read_2 ; FNATTRS-SAME: (ptr nocapture readonly [[P:%.*]]) #[[ATTR16]] { ; FNATTRS-NEXT: call void @test_scc_argmem_read_1(ptr [[P]])