Index: llvm/lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1720,7 +1720,8 @@ template static SmallSet -deriveAttrsInPostOrder(ArrayRef Functions, AARGetterT &&AARGetter) { +deriveAttrsInPostOrder(ArrayRef Functions, AARGetterT &&AARGetter, + bool ArgAttrsOnly) { SCCNodesResult Nodes = createSCCNodeSet(Functions); // Bail if the SCC only contains optnone functions. @@ -1728,6 +1729,10 @@ return {}; SmallSet Changed; + if (ArgAttrsOnly) { + addArgumentAttrs(Nodes.SCCNodes, Changed); + return Changed; + } addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); @@ -1762,10 +1767,11 @@ LazyCallGraph &CG, CGSCCUpdateResult &) { // Skip non-recursive functions if requested. + bool ArgAttrsOnly = false; if (C.size() == 1 && SkipNonRecursive) { LazyCallGraph::Node &N = *C.begin(); if (!N->lookup(N)) - return PreservedAnalyses::all(); + ArgAttrsOnly = true; } FunctionAnalysisManager &FAM = @@ -1782,7 +1788,8 @@ Functions.push_back(&N.getFunction()); } - auto ChangedFunctions = deriveAttrsInPostOrder(Functions, AARGetter); + auto ChangedFunctions = + deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); if (ChangedFunctions.empty()) return PreservedAnalyses::all(); Index: llvm/test/Transforms/InstCombine/unused-nonnull.ll =================================================================== --- llvm/test/Transforms/InstCombine/unused-nonnull.ll +++ llvm/test/Transforms/InstCombine/unused-nonnull.ll @@ -9,7 +9,7 @@ define i32 @main(i32 %argc, ptr %argv) #0 { ; CHECK-LABEL: define {{[^@]+}}@main -; CHECK-SAME: (i32 [[ARGC:%.*]], ptr nocapture readnone [[ARGV:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i32 [[ARGC:%.*]], ptr nocapture readonly [[ARGV:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[ARGC]], 2 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP0]], i32 0, i32 [[ARGC]] Index: llvm/test/Transforms/PhaseOrdering/arg-attrs-affect-earlycse.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PhaseOrdering/arg-attrs-affect-earlycse.ll @@ -0,0 +1,174 @@ +;RUN: opt < %s -O3 -disable-output -debug-only=early-cse 2>&1 | FileCheck %s + +; CHECK: EarlyCSE CSE LOAD + +%struct.foo = type { double, double } +%struct.widget = type { i8 } + +$_Z15store_intrinsicI14rocfft_complexIdEEvPT_jjS2_b = comdat any + +@global.1 = external hidden addrspace(3) global [192 x %struct.foo], align 16 +@global.2 = external protected addrspace(4) externally_initialized constant %struct.widget, align 1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture) #0 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #0 + +; Function Attrs: convergent mustprogress norecurse nounwind +define protected amdgpu_kernel void @arg_attrs_affect_earlycse(ptr addrspace(1) noalias %arg, ptr addrspace(1) %arg1, ptr addrspace(1) noalias %arg2, ptr addrspace(1) noalias %arg3, ptr addrspace(1) noalias %arg4, i64 %arg5, i32 %arg6, ptr addrspace(1) noalias %arg7, ptr addrspace(1) noalias %arg8, i32 %arg9, ptr addrspace(1) noalias %arg10, ptr addrspace(1) noalias %arg11, ptr addrspace(1) noalias %arg12, ptr addrspace(1) noalias %arg13) #1 { +bb: + %alloca = alloca ptr, align 8, addrspace(5) + %alloca14 = alloca ptr, align 8, addrspace(5) + %alloca15 = alloca i32, align 4, addrspace(5) + %addrspacecast = addrspacecast ptr addrspace(5) %alloca to ptr + %addrspacecast16 = addrspacecast ptr addrspace(5) %alloca14 to ptr + %addrspacecast17 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast18 = addrspacecast ptr addrspace(5) %alloca15 to ptr + %addrspacecast19 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast20 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast21 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast22 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast23 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast24 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast25 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast26 = addrspacecast ptr addrspace(5) null to ptr + %addrspacecast27 = addrspacecast ptr addrspace(1) %arg2 to ptr + store ptr %addrspacecast27, ptr %addrspacecast, align 8 + %load = load ptr, ptr %addrspacecast, align 8 + store ptr %load, ptr %addrspacecast16, align 8 + %add = add i64 0, 1 + %mul = mul i64 %add, 8 + %load28 = load ptr, ptr %addrspacecast16, align 8 + %getelementptr = getelementptr inbounds i64, ptr %load28, i64 1 + %load29 = load i64, ptr %getelementptr, align 8 + %icmp = icmp ugt i64 %mul, %load29 + %select = select i1 %icmp, i1 false, i1 true + %zext = zext i1 %select to i8 + store i8 %zext, ptr %addrspacecast17, align 1 + %call = call noundef i32 @foo.6(ptr nonnull align 1 dereferenceable(1) addrspacecast (ptr addrspace(4) @global.2 to ptr)) #4 + %udiv = udiv i32 %call, 8 + store i32 %udiv, ptr %addrspacecast18, align 4 + %call30 = call noundef i32 @foo.6(ptr nonnull align 1 dereferenceable(1) addrspacecast (ptr addrspace(4) @global.2 to ptr)) #4 + %call31 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %getelementptr32 = getelementptr inbounds %struct.foo, ptr %addrspacecast19, i32 0, i32 0 + store double 0.000000e+00, ptr %getelementptr32, align 8 + %getelementptr33 = getelementptr inbounds %struct.foo, ptr %addrspacecast19, i32 0, i32 1 + store double 0.000000e+00, ptr %getelementptr33, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 %addrspacecast19, i64 16, i1 false) + call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) null) #5 + %call34 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 poison, ptr align 8 null, i64 16, i1 false) + call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) null) #5 + %call35 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %getelementptr36 = getelementptr inbounds %struct.foo, ptr %addrspacecast20, i32 0, i32 0 + store double 0.000000e+00, ptr %getelementptr36, align 8 + %getelementptr37 = getelementptr inbounds %struct.foo, ptr %addrspacecast20, i32 0, i32 1 + store double 0.000000e+00, ptr %getelementptr37, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 %addrspacecast20, i64 16, i1 false) + %call38 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %getelementptr39 = getelementptr inbounds %struct.foo, ptr %addrspacecast21, i32 0, i32 0 + store double 0.000000e+00, ptr %getelementptr39, align 8 + %getelementptr40 = getelementptr inbounds %struct.foo, ptr %addrspacecast21, i32 0, i32 1 + store double 0.000000e+00, ptr %getelementptr40, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 %addrspacecast21, i64 16, i1 false) + %call41 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %getelementptr42 = getelementptr inbounds %struct.foo, ptr %addrspacecast22, i32 0, i32 0 + store double 0.000000e+00, ptr %getelementptr42, align 8 + %getelementptr43 = getelementptr inbounds %struct.foo, ptr %addrspacecast22, i32 0, i32 1 + store double 0.000000e+00, ptr %getelementptr43, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 %addrspacecast22, i64 16, i1 false) + %call44 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %getelementptr45 = getelementptr inbounds %struct.foo, ptr %addrspacecast23, i32 0, i32 0 + store double 0.000000e+00, ptr %getelementptr45, align 8 + %getelementptr46 = getelementptr inbounds %struct.foo, ptr %addrspacecast23, i32 0, i32 1 + store double 0.000000e+00, ptr %getelementptr46, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 %addrspacecast23, i64 16, i1 false) + %call47 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %call48 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 poison, i64 16, i1 false) + %call49 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %getelementptr50 = getelementptr inbounds %struct.foo, ptr %addrspacecast24, i32 0, i32 0 + store double 0.000000e+00, ptr %getelementptr50, align 8 + %getelementptr51 = getelementptr inbounds %struct.foo, ptr %addrspacecast24, i32 0, i32 1 + store double 0.000000e+00, ptr %getelementptr51, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 %addrspacecast24, i64 16, i1 false) + %call52 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %getelementptr53 = getelementptr inbounds %struct.foo, ptr %addrspacecast25, i32 0, i32 0 + store double 0.000000e+00, ptr %getelementptr53, align 8 + %getelementptr54 = getelementptr inbounds %struct.foo, ptr %addrspacecast25, i32 0, i32 1 + store double 0.000000e+00, ptr %getelementptr54, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 %addrspacecast25, i64 16, i1 false) + %call55 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %getelementptr56 = getelementptr inbounds %struct.foo, ptr %addrspacecast26, i32 0, i32 0 + store double 0.000000e+00, ptr %getelementptr56, align 8 + %getelementptr57 = getelementptr inbounds %struct.foo, ptr %addrspacecast26, i32 0, i32 1 + store double 0.000000e+00, ptr %getelementptr57, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 null, ptr align 8 %addrspacecast26, i64 16, i1 false) + %call58 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 poison, ptr align 8 null, i64 16, i1 false) + call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) null) #5 + %load59 = load i32, ptr %addrspacecast18, align 4 + %add60 = add i32 %load59, 14 + %icmp61 = icmp ult i32 %add60, 24 + br i1 %icmp61, label %bb62, label %bb65 + +bb62: ; preds = %bb + %load63 = load i8, ptr %addrspacecast17, align 1 + %trunc = trunc i8 %load63 to i1 + %select64 = select i1 %trunc, i1 true, i1 false + br label %bb65 + +bb65: ; preds = %bb62, %bb + %phi = phi i1 [ false, %bb ], [ %select64, %bb62 ] + %call66 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext %phi) #4 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 poison, ptr align 8 null, i64 16, i1 false) + %load67 = load i32, ptr %addrspacecast18, align 4 + %add68 = add i32 %load67, 14 + %icmp69 = icmp ult i32 %add68, 24 + br i1 %icmp69, label %bb70, label %bb72 + +bb70: ; preds = %bb65 + %load71 = load i8, ptr %addrspacecast17, align 1 + br label %bb72 + +bb72: ; preds = %bb70, %bb65 + %call73 = call %struct.foo @baz.7(ptr poison, i32 poison, i32 poison, i1 zeroext poison) #4 + %call74 = call noundef i32 @foo.6(ptr nonnull align 1 dereferenceable(1) addrspacecast (ptr addrspace(4) @global.2 to ptr)) #4 + call void @barney.8(ptr null, ptr poison, ptr poison, ptr null, i32 poison, i32 poison, i32 poison, i1 zeroext true, ptr addrspacecast (ptr addrspace(3) @global.1 to ptr), i64 poison) #4 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 16 null, i64 16, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 16 null, i64 16, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 16 null, i64 16, i1 false) + %load75 = load ptr, ptr %addrspacecast16, align 8 + %getelementptr76 = getelementptr inbounds i64, ptr %load75, i64 1 + %load77 = load i64, ptr %getelementptr76, align 8 + %icmp78 = icmp ult i64 0, %load77 + call void @snork(ptr poison, i32 poison, i32 poison, double poison, double poison, i1 zeroext %icmp78) #4 + ret void +} + +declare hidden i32 @foo.6(ptr) #2 align 2 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #3 + +declare hidden %struct.foo @baz.7(ptr, i32, i32, i1) #2 + +declare hidden void @barney.8(ptr, ptr, ptr, ptr, i32, i32, i32, i1, ptr, i64) #2 + +define weak_odr hidden void @snork(ptr %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, i1 %arg5) #2 comdat($_Z15store_intrinsicI14rocfft_complexIdEEvPT_jjS2_b) { +bb: + ret void +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,112" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+cumode,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" } +attributes #2 = { "target-features"="+16-bit-insts,+ci-insts,+cumode,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +attributes #4 = { convergent nounwind } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} + +!0 = !{i32 8, !"PIC Level", i32 1}