diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1643,6 +1643,9 @@ /// false, but it shouldn't matter what it returns anyway. bool hasArmWideBranch(bool Thumb) const; + /// \return The maximum number of function arguments the target supports. + unsigned getMaxNumArgs() const; + /// @} private: @@ -2003,6 +2006,7 @@ virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; virtual bool hasArmWideBranch(bool Thumb) const = 0; + virtual unsigned getMaxNumArgs() const = 0; }; template @@ -2694,6 +2698,10 @@ bool hasArmWideBranch(bool Thumb) const override { return Impl.hasArmWideBranch(Thumb); } + + unsigned getMaxNumArgs() const override { + return Impl.getMaxNumArgs(); + } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -878,6 +878,8 @@ bool hasArmWideBranch(bool) const { return false; } + unsigned getMaxNumArgs() const { return UINT_MAX; } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1194,6 +1194,10 @@ return TTIImpl->hasArmWideBranch(Thumb); } +unsigned TargetTransformInfo::getMaxNumArgs() const { + return TTIImpl->getMaxNumArgs(); +} + bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -77,6 +77,10 @@ return Options; } + unsigned getMaxNumArgs() const { + return 5; + } + }; } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -781,6 +781,7 @@ // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. DenseMap> ArgsToPromote; + unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams(); for (Argument *PtrArg : PointerArgs) { // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. @@ -804,6 +805,7 @@ Types.push_back(Pair.second.Ty); if (areTypesABICompatible(Types, *F, TTI)) { + NumArgsAfterPromote += ArgParts.size() - 1; ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); } } @@ -813,6 +815,9 @@ if (ArgsToPromote.empty()) return nullptr; + if (NumArgsAfterPromote > TTI.getMaxNumArgs()) + return nullptr; + return doPromotion(F, FAM, ArgsToPromote); } diff --git a/llvm/test/CodeGen/BPF/argumentpromotion.ll b/llvm/test/CodeGen/BPF/argumentpromotion.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/argumentpromotion.ll @@ -0,0 +1,145 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux -S %s | FileCheck %s +; Source: +; struct t { +; int a, b, c, d, e, f, g; +; }; +; __attribute__((noinline)) static int foo1(struct t *p1, struct t *p2, struct t *p3) { +; return p1->a + p1->b + p2->c + p2->e + p3->f + p3->g; +; } +; __attribute__((noinline)) static int foo2(struct t *p1, struct t *p2, struct t *p3) { +; return p1->a + p1->b + p2->c + p2->e + p3->f; +; } +; void init(void *); +; int bar(void) { +; struct t v1, v2, v3; +; init(&v1); init(&v2); init(&v3); +; return foo1(&v1, &v2, &v3) + foo2(&v1, &v2, &v3); +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm t.c -Xclang -disable-llvm-passes + +%struct.t = type { i32, i32, i32, i32, i32, i32, i32 } + +; Function Attrs: nounwind +define dso_local i32 @bar() #0 { +entry: + %v1 = alloca %struct.t, align 4 + %v2 = alloca %struct.t, align 4 + %v3 = alloca %struct.t, align 4 + call void @llvm.lifetime.start.p0(i64 28, ptr %v1) #4 + call void @llvm.lifetime.start.p0(i64 28, ptr %v2) #4 + call void @llvm.lifetime.start.p0(i64 28, ptr %v3) #4 + call void @init(ptr noundef %v1) + call void @init(ptr noundef %v2) + call void @init(ptr noundef %v3) + %call = call i32 @foo1(ptr noundef %v1, ptr noundef %v2, ptr noundef %v3) + %call1 = call i32 @foo2(ptr noundef %v1, ptr noundef %v2, ptr noundef %v3) + %add = add nsw i32 %call, %call1 + call void @llvm.lifetime.end.p0(i64 28, ptr %v3) #4 + call void @llvm.lifetime.end.p0(i64 28, ptr %v2) #4 + call void @llvm.lifetime.end.p0(i64 28, ptr %v1) #4 + ret i32 %add +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare dso_local void @init(ptr noundef) #2 + +; Function Attrs: noinline nounwind +define internal i32 @foo1(ptr noundef %p1, ptr noundef %p2, ptr noundef %p3) #3 { +entry: + %p1.addr = alloca ptr, align 8 + %p2.addr = alloca ptr, align 8 + %p3.addr = alloca ptr, align 8 + store ptr %p1, ptr %p1.addr, align 8, !tbaa !3 + store ptr %p2, ptr %p2.addr, align 8, !tbaa !3 + store ptr %p3, ptr %p3.addr, align 8, !tbaa !3 + %0 = load ptr, ptr %p1.addr, align 8, !tbaa !3 + %a = getelementptr inbounds %struct.t, ptr %0, i32 0, i32 0 + %1 = load i32, ptr %a, align 4, !tbaa !7 + %2 = load ptr, ptr %p1.addr, align 8, !tbaa !3 + %b = getelementptr inbounds %struct.t, ptr %2, i32 0, i32 1 + %3 = load i32, ptr %b, align 4, !tbaa !10 + %add = add nsw i32 %1, %3 + %4 = load ptr, ptr %p2.addr, align 8, !tbaa !3 + %c = getelementptr inbounds %struct.t, ptr %4, i32 0, i32 2 + %5 = load i32, ptr %c, align 4, !tbaa !11 + %add1 = add nsw i32 %add, %5 + %6 = load ptr, ptr %p2.addr, align 8, !tbaa !3 + %e = getelementptr inbounds %struct.t, ptr %6, i32 0, i32 4 + %7 = load i32, ptr %e, align 4, !tbaa !12 + %add2 = add nsw i32 %add1, %7 + %8 = load ptr, ptr %p3.addr, align 8, !tbaa !3 + %f = getelementptr inbounds %struct.t, ptr %8, i32 0, i32 5 + %9 = load i32, ptr %f, align 4, !tbaa !13 + %add3 = add nsw i32 %add2, %9 + %10 = load ptr, ptr %p3.addr, align 8, !tbaa !3 + %g = getelementptr inbounds %struct.t, ptr %10, i32 0, i32 6 + %11 = load i32, ptr %g, align 4, !tbaa !14 + %add4 = add nsw i32 %add3, %11 + ret i32 %add4 +} + +; CHECK: i32 @foo1(ptr nocapture noundef readonly %p1, ptr nocapture noundef readonly %p2, ptr nocapture noundef readonly %p3) + +; Function Attrs: noinline nounwind +define internal i32 @foo2(ptr noundef %p1, ptr noundef %p2, ptr noundef %p3) #3 { +entry: + %p1.addr = alloca ptr, align 8 + %p2.addr = alloca ptr, align 8 + %p3.addr = alloca ptr, align 8 + store ptr %p1, ptr %p1.addr, align 8, !tbaa !3 + store ptr %p2, ptr %p2.addr, align 8, !tbaa !3 + store ptr %p3, ptr %p3.addr, align 8, !tbaa !3 + %0 = load ptr, ptr %p1.addr, align 8, !tbaa !3 + %a = getelementptr inbounds %struct.t, ptr %0, i32 0, i32 0 + %1 = load i32, ptr %a, align 4, !tbaa !7 + %2 = load ptr, ptr %p1.addr, align 8, !tbaa !3 + %b = getelementptr inbounds %struct.t, ptr %2, i32 0, i32 1 + %3 = load i32, ptr %b, align 4, !tbaa !10 + %add = add nsw i32 %1, %3 + %4 = load ptr, ptr %p2.addr, align 8, !tbaa !3 + %c = getelementptr inbounds %struct.t, ptr %4, i32 0, i32 2 + %5 = load i32, ptr %c, align 4, !tbaa !11 + %add1 = add nsw i32 %add, %5 + %6 = load ptr, ptr %p2.addr, align 8, !tbaa !3 + %e = getelementptr inbounds %struct.t, ptr %6, i32 0, i32 4 + %7 = load i32, ptr %e, align 4, !tbaa !12 + %add2 = add nsw i32 %add1, %7 + %8 = load ptr, ptr %p3.addr, align 8, !tbaa !3 + %f = getelementptr inbounds %struct.t, ptr %8, i32 0, i32 5 + %9 = load i32, ptr %f, align 4, !tbaa !13 + %add3 = add nsw i32 %add2, %9 + ret i32 %add3 +} + +; CHECK: i32 @foo2(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val) + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { noinline nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 48ad4296f7847db15b6bee7a465fc2cbe687ba6a)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"any pointer", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !9, i64 0} +!8 = !{!"t", !9, i64 0, !9, i64 4, !9, i64 8, !9, i64 12, !9, i64 16, !9, i64 20, !9, i64 24} +!9 = !{!"int", !5, i64 0} +!10 = !{!8, !9, i64 4} +!11 = !{!8, !9, i64 8} +!12 = !{!8, !9, i64 16} +!13 = !{!8, !9, i64 20} +!14 = !{!8, !9, i64 24}