diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -817,7 +817,18 @@ bool IsRecursive = C.size() > 1; for (LazyCallGraph::Node &N : C) { Function &OldF = N.getFunction(); - Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive); + + // If the IR specifies a maximum element count via !argpromotion + // metadata, take that into account. + unsigned MaxElems = MaxElements; + if (MDNode *MD = OldF.getMetadata("argpromotion")) + MaxElems = + mdconst::extract(MD->getOperand(0))->getZExtValue(); + + if (MaxElems == 0) + continue; + + Function *NewF = promoteArguments(&OldF, FAM, MaxElems, IsRecursive); if (!NewF) continue; LocalChange = true; diff --git a/llvm/test/Transforms/ArgumentPromotion/arg-promotion-metadata.ll b/llvm/test/Transforms/ArgumentPromotion/arg-promotion-metadata.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/arg-promotion-metadata.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Tests that !argpromotion metadata allows the frontend to specify the maximum +; number of elements we choose to split. +; RUN: opt < %s -passes=argpromotion -S | FileCheck %S + +%struct.Foo = type { i32, i32, i32, i32 } + +@constant = private constant %struct.Foo { i32 1, i32 2, i32 3, i32 4 }, align 4 + +; Argument promotion defaults to only promoting 2 elements; with no metadata +; we won't promote anything. +define internal void @no_metadata(ptr noundef dereferenceable(4) align 4 %0) { +; CHECK-LABEL: @no_metadata( +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[TMP0:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: ret void +; + %2 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 0 + %3 = load i32, ptr %2, align 4 + %4 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 1 + %5 = load i32, ptr %4, align 4 + %6 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 2 + %7 = load i32, ptr %6, align 4 + %8 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 3 + %9 = load i32, ptr %8, align 4 + ret void +} + +; When we override the maximum number of elements with metadata, we promote +; arguments. +define internal void @metadata_4(ptr noundef dereferenceable(4) align 4 %0) !argpromotion !{i64 4} { +; CHECK-LABEL: @metadata_4( +; CHECK-NEXT: ret void +; + %2 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 0 + %3 = load i32, ptr %2, align 4 + %4 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 1 + %5 = load i32, ptr %4, align 4 + %6 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 2 + %7 = load i32, ptr %6, align 4 + %8 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 3 + %9 = load i32, ptr %8, align 4 + ret void +} + +; The metadata requested a maximum number of 3 elements, but we have 4, so +; don't promote anything. +define internal void @metadata_3(ptr noundef dereferenceable(4) align 4 %0) !argpromotion !{i64 3} { +; CHECK-LABEL: @metadata_3( +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[TMP0:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: ret void +; + %2 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 0 + %3 = load i32, ptr %2, align 4 + %4 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 1 + %5 = load i32, ptr %4, align 4 + %6 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 2 + %7 = load i32, ptr %6, align 4 + %8 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 3 + %9 = load i32, ptr %8, align 4 + ret void +} + +define i32 @main() { +; CHECK-LABEL: @main( +; CHECK-NEXT: call void @no_metadata(ptr noundef align 4 dereferenceable(4) @constant) +; CHECK-NEXT: [[CONSTANT_VAL:%.*]] = load i32, ptr @constant, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr @constant, i64 4 +; CHECK-NEXT: [[CONSTANT_VAL1:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr @constant, i64 8 +; CHECK-NEXT: [[CONSTANT_VAL2:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr @constant, i64 12 +; CHECK-NEXT: [[CONSTANT_VAL3:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: call void @metadata_4(i32 [[CONSTANT_VAL]], i32 [[CONSTANT_VAL1]], i32 [[CONSTANT_VAL2]], i32 [[CONSTANT_VAL3]]) +; CHECK-NEXT: call void @metadata_3(ptr noundef align 4 dereferenceable(4) @constant) +; CHECK-NEXT: ret i32 0 +; + call void @no_metadata(ptr noundef dereferenceable(4) align 4 @constant) + call void @metadata_4(ptr noundef dereferenceable(4) align 4 @constant) + call void @metadata_3(ptr noundef dereferenceable(4) align 4 @constant) + ret i32 0 +}