diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-02-01-ReturnAttrs.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-02-01-ReturnAttrs.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+
+define internal i32 @deref(i32* %x) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@deref
+; CHECK-SAME: (i32 [[X_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL]]
+;
+entry:
+  %tmp2 = load i32, i32* %x, align 4
+  ret i32 %tmp2
+}
+
+define i32 @f(i32 %x) {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
+; CHECK-NEXT:    [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %x_addr = alloca i32
+  store i32 %x, i32* %x_addr, align 4
+  %tmp1 = call i32 @deref( i32* %x_addr ) nounwind
+  ret i32 %tmp1
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-07-02-array-indexing.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-07-02-array-indexing.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; PR2498
+
+; This test tries to convince CHECK about promoting the load from %A + 2,
+; because there is a load of %A in the entry block
+define internal i32 @callee(i1 %C, i32* %A) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32* [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_0:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 [[A_0]]
+; CHECK:       F:
+; CHECK-NEXT:    [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[A_2]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  ; Unconditonally load the element at %A
+  %A.0 = load i32, i32* %A
+  br i1 %C, label %T, label %F
+
+T:
+  ret i32 %A.0
+
+F:
+  ; Load the element at offset two from %A. This should not be promoted!
+  %A.2 = getelementptr i32, i32* %A, i32 2
+  %R = load i32, i32* %A.2
+  ret i32 %R
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32* null)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %X = call i32 @callee(i1 false, i32* null)             ; <i32> [#uses=1]
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-07-CGUpdate.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-07-CGUpdate.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -inline -argpromotion -maxar=0 -disable-output
+
+define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind {
+entry:
+  unreachable
+}
+
+define void @encode(i32* %m, i32* %ts, i32* %new) nounwind {
+entry:
+  %0 = call fastcc i32 @hash( i32* %ts, i32 0 ) nounwind		; <i32> [#uses=0]
+  unreachable
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-08-CGUpdateSelfEdge.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-08-CGUpdateSelfEdge.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -disable-output
+
+define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind {
+entry:
+  br i1 false, label %bb, label %bb5
+
+bb:		; preds = %entry
+  %0 = call fastcc i32 @term_SharingList( i32* null, i32* %List ) nounwind		; <i32> [#uses=0]
+  unreachable
+
+bb5:		; preds = %entry
+  ret i32 0
+}
+
+define i32 @term_Sharing(i32* %Term) nounwind {
+entry:
+  br i1 false, label %bb.i, label %bb14
+
+bb.i:		; preds = %entry
+  %0 = call fastcc i32 @term_SharingList( i32* null, i32* null ) nounwind		; <i32> [#uses=0]
+  ret i32 1
+
+bb14:		; preds = %entry
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/aggregate-promote.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/aggregate-promote.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+@G = constant %T { i32 0, i32 0, i32 17, i32 25 }
+
+define internal i32 @test(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[P_0_2_VAL:%.*]], i32 [[P_0_3_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  ret i32 %v
+}
+
+define i32 @caller() {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_IDX:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 2
+; CHECK-NEXT:    [[G_IDX_VAL:%.*]] = load i32, i32* [[G_IDX]]
+; CHECK-NEXT:    [[G_IDX1:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 3
+; CHECK-NEXT:    [[G_IDX1_VAL:%.*]] = load i32, i32* [[G_IDX1]]
+; CHECK-NEXT:    [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %v = call i32 @test(%T* @G)
+  ret i32 %v
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attributes.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attributes.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=0 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=0 < %s | FileCheck %s
+; Test that we only promote arguments when the caller/callee have compatible
+; function attrubtes.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <4 x i64> [[TMP]], <4 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <4 x i64>, <4 x i64>* %arg1
+  store <4 x i64> %tmp, <4 x i64>* %arg
+  ret void
+}
+
+define void @no_promote(<4 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@no_promote
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @no_promote_avx2(<4 x i64>* [[TMP2]], <4 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <4 x i64>, align 32
+  %tmp2 = alloca <4 x i64>, align 32
+  %tmp3 = bitcast <4 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
+  %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
+  store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
+  ret void
+}
+
+define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@promote_avx2
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <4 x i64>, <4 x i64>* %arg1
+  store <4 x i64> %tmp, <4 x i64>* %arg
+  ret void
+}
+
+define void @promote(<4 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@promote
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* [[TMP2]], <4 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <4 x i64>, align 32
+  %tmp2 = alloca <4 x i64>, align 32
+  %tmp3 = bitcast <4 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
+  %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
+  store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2
+
+attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attrs.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attrs.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+%struct.ss = type { i32, i64 }
+
+; Don't drop 'byval' on %X here.
+define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[X]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+
+  store i32 0, i32* %X
+  ret void
+}
+
+; Also make sure we don't drop the call zeroext attribute.
+define i32 @test(i32* %X) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32* [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]], i32 zeroext 0)
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+
+  call void @f( %struct.ss* byval %S, i32* byval %X, i32 zeroext 0)
+
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/basictest.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/basictest.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s  -basicaa -argpromotion -maxar=0 -mem2reg -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define internal i32 @test(i32* %X, i32* %Y) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define internal i32 @caller(i32* %B) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i32 [[B_VAL1:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @test(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+define i32 @callercaller() {
+; CHECK-LABEL: define {{[^@]+}}@callercaller()
+; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32 2)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %B = alloca i32
+  store i32 2, i32* %B
+  %X = call i32 @caller(i32* %B)
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval-2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval-2.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+; Arg promotion eliminates the struct argument.
+; FIXME: We should eliminate the i32* argument.
+
+%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[X]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+
+  store i32 0, i32* %X
+  ret void
+}
+
+define i32 @test(i32* %X) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32* [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+  call void @f( %struct.ss* byval %S, i32* byval %X)
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
+}
+
+
+define internal void @g(%struct.ss* byval align 32 %b) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
+}
+
+
+define i32 @main() nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@main()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]])
+; CHECK-NEXT:    [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4
+; CHECK-NEXT:    [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4
+; CHECK-NEXT:    call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+  call void @f(%struct.ss* byval %S) nounwind
+  call void @g(%struct.ss* byval %S) nounwind
+  ret i32 0
+}
+
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/chained.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/chained.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/chained.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+@G1 = constant i32 0
+@G2 = constant i32* @G1
+
+define internal i32 @test(i32** %x) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL_VAL]]
+;
+entry:
+  %y = load i32*, i32** %x
+  %z = load i32, i32* %y
+  ret i32 %z
+}
+
+define i32 @caller() {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G2_VAL:%.*]] = load i32*, i32** @G2
+; CHECK-NEXT:    [[G2_VAL_VAL:%.*]] = load i32, i32* [[G2_VAL]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  %x = call i32 @test(i32** @G2)
+  ret i32 %x
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+; Don't promote around control flow.
+define internal i32 @callee(i1 %C, i32* %P) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32* [[P:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 17
+; CHECK:       F:
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  br i1 %C, label %T, label %F
+
+T:
+  ret i32 17
+
+F:
+  %X = load i32, i32* %P
+  ret i32 %X
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 true, i32* null)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  %X = call i32 @callee(i1 true, i32* null)
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow2.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define internal i32 @callee(i1 %C, i32* %P) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]])
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 17
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 [[P_VAL]]
+;
+  br i1 %C, label %T, label %F
+
+T:              ; preds = %0
+  ret i32 17
+
+F:              ; preds = %0
+  %X = load i32, i32* %P               ; <i32> [#uses=1]
+  ret i32 %X
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 17, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %A = alloca i32         ; <i32*> [#uses=2]
+  store i32 17, i32* %A
+  %X = call i32 @callee( i1 false, i32* %A )              ; <i32> [#uses=1]
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/crash.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/crash.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/crash.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S < %s -inline -argpromotion -maxar=0 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_OLDPM
+; RUN: opt -S < %s -passes=inline,argpromotion -maxar=0 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_NEWPM
+
+%S = type { %S* }
+
+; Inlining should nuke the invoke (and any inlined calls) here even with
+; argument promotion running along with it.
+define void @zot() personality i32 (...)* @wibble {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    unreachable
+; ARGPROMOTION:       hoge.exit:
+; ARGPROMOTION-NEXT:    br label [[BB1:%.*]]
+; ARGPROMOTION:       bb1:
+; ARGPROMOTION-NEXT:    unreachable
+; ARGPROMOTION:       bb2:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = landingpad { i8*, i32 }
+; ARGPROMOTION-NEXT:    cleanup
+; ARGPROMOTION-NEXT:    unreachable
+;
+bb:
+  invoke void @hoge()
+  to label %bb1 unwind label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  %tmp = landingpad { i8*, i32 }
+  cleanup
+  unreachable
+}
+
+define internal void @hoge() {
+bb:
+  %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs)
+  %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney)
+  unreachable
+}
+
+define internal fastcc i8* @spam(i1 (i8*)* %arg) {
+bb:
+  unreachable
+}
+
+define internal i1 @eggs(i8* %arg) {
+; ALL_NEWPM-LABEL: define {{[^@]+}}@eggs()
+; ALL_NEWPM-NEXT:  bb:
+; ALL_NEWPM-NEXT:    unreachable
+;
+bb:
+  %tmp = call zeroext i1 @barney(i8* %arg)
+  unreachable
+}
+
+define internal i1 @barney(i8* %arg) {
+bb:
+  ret i1 undef
+}
+
+define i32 @test_inf_promote_caller(i32 %arg) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_caller
+; ARGPROMOTION-SAME: (i32 [[ARG:%.*]])
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]]
+; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = alloca [[S]]
+; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]])
+; ARGPROMOTION-NEXT:    ret i32 0
+;
+bb:
+  %tmp = alloca %S
+  %tmp1 = alloca %S
+  %tmp2 = call i32 @test_inf_promote_callee(%S* %tmp, %S* %tmp1)
+
+  ret i32 0
+}
+
+define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_callee
+; ARGPROMOTION-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]])
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = load %S*, %S** [[TMP]]
+; ARGPROMOTION-NEXT:    [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[TMP4:%.*]] = load %S*, %S** [[TMP3]]
+; ARGPROMOTION-NEXT:    [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]])
+; ARGPROMOTION-NEXT:    ret i32 0
+;
+bb:
+  %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0
+  %tmp2 = load %S*, %S** %tmp
+  %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0
+  %tmp4 = load %S*, %S** %tmp3
+  %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2)
+
+  ret i32 0
+}
+
+declare i32 @wibble(...)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/dbg.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/dbg.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+declare void @sink(i32)
+
+define internal void @test(i32** %X) !dbg !2 {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) !dbg !3
+; CHECK-NEXT:    call void @sink(i32 [[X_VAL_VAL]])
+; CHECK-NEXT:    ret void
+;
+  %1 = load i32*, i32** %X, align 8
+  %2 = load i32, i32* %1, align 8
+  call void @sink(i32 %2)
+  ret void
+}
+
+%struct.pair = type { i32, i32 }
+
+define internal void @test_byval(%struct.pair* byval %P) {
+; CHECK-LABEL: define {{[^@]+}}@test_byval
+; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]])
+; CHECK-NEXT:    [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[P_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[P_1]], i32* [[DOT1]], align 4
+; CHECK-NEXT:    ret void
+;
+  ret void
+}
+
+define void @caller(i32** %Y, %struct.pair* %P) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i32** [[Y:%.*]], %struct.pair* [[P:%.*]])
+; CHECK-NEXT:    [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg !4
+; CHECK-NEXT:    [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg !4
+; CHECK-NEXT:    call void @test(i32 [[Y_VAL_VAL]]), !dbg !4
+; CHECK-NEXT:    [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg !5
+; CHECK-NEXT:    [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg !5
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5
+; CHECK-NEXT:    [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg !5
+; CHECK-NEXT:    call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg !5
+; CHECK-NEXT:    ret void
+;
+  call void @test(i32** %Y), !dbg !1
+
+  call void @test_byval(%struct.pair* %P), !dbg !6
+  ret void
+}
+
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!3}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !DILocation(line: 8, scope: !2)
+!2 = distinct !DISubprogram(name: "test", file: !5, line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, scopeLine: 3, scope: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !5)
+!5 = !DIFile(filename: "test.c", directory: "")
+!6 = !DILocation(line: 9, scope: !2)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/fp80.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/fp80.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.u = type { x86_fp80 }
+%struct.s = type { double, i16, i8, [5 x i8] }
+
+@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16
+
+%struct.Foo = type { i32, i64 }
+@a = internal global %struct.Foo { i32 1, i64 2 }, align 8
+
+define void @run() {
+; CHECK-LABEL: define {{[^@]+}}@run()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0
+; CHECK-NEXT:    [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
+  call i64 @CaptureAStruct(%struct.Foo* @a)
+  ret void
+}
+
+define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
+; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely
+; CHECK-SAME: (%union.u* byval align 16 [[ARG:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s*
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2
+; CHECK-NEXT:    [[RESULT:%.*]] = load i8, i8* [[GEP]]
+; CHECK-NEXT:    ret i8 [[RESULT]]
+;
+entry:
+  %bitcast = bitcast %union.u* %arg to %struct.s*
+  %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2
+  %result = load i8, i8* %gep
+  ret i8 %result
+}
+
+define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) {
+; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely
+; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]])
+; CHECK-NEXT:    [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0
+; CHECK-NEXT:    store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]]
+; CHECK-NEXT:    ret x86_fp80 [[FP80]]
+;
+  %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
+  %fp80 = load x86_fp80, x86_fp80* %gep
+  ret x86_fp80 %fp80
+}
+
+define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
+; CHECK-LABEL: define {{[^@]+}}@AccessPaddingOfStruct
+; CHECK-SAME: (%struct.Foo* byval [[A:%.*]])
+; CHECK-NEXT:    [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64*
+; CHECK-NEXT:    [[V:%.*]] = load i64, i64* [[P]]
+; CHECK-NEXT:    ret i64 [[V]]
+;
+  %p = bitcast %struct.Foo* %a to i64*
+  %v = load i64, i64* %p
+  ret i64 %v
+}
+
+define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
+; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct
+; CHECK-SAME: (%struct.Foo* byval [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ]
+; CHECK-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]]
+; CHECK-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  %a_ptr = alloca %struct.Foo*
+  br label %loop
+
+loop:
+  %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ]
+  %0   = phi %struct.Foo* [ %a, %entry ],   [ %0, %loop ]
+  store %struct.Foo* %phi, %struct.Foo** %a_ptr
+  %gep = getelementptr %struct.Foo, %struct.Foo* %a, i64 0
+  br label %loop
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/inalloca.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/inalloca.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt %s -globalopt -argpromotion -maxar=0 -sroa -S | FileCheck %s
+; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -maxar=0 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.ss = type { i32, i32 }
+
+; Argpromote + sroa should change this to passing the two integers by value.
+define internal i32 @f(%struct.ss* inalloca  %s) {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[S_0_0_VAL]], [[S_0_1_VAL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
+  %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1
+  %a = load i32, i32* %f0, align 4
+  %b = load i32, i32* %f1, align 4
+  %r = add i32 %a, %b
+  ret i32 %r
+}
+
+define i32 @main() {
+; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  %S = alloca inalloca %struct.ss
+  %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i32 1, i32* %f0, align 4
+  store i32 2, i32* %f1, align 4
+  %r = call i32 @f(%struct.ss* inalloca %S)
+  ret i32 %r
+}
+
+; Argpromote can't promote %a because of the icmp use.
+define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: (%struct.ss* [[A:%.*]], %struct.ss* [[B:%.*]]) unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq %struct.ss* [[A]], [[B]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+entry:
+  %c = icmp eq %struct.ss* %a, %b
+  ret i1 %c
+}
+
+define i32 @test() {
+; CHECK-LABEL: define {{[^@]+}}@test() local_unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca inalloca %struct.ss
+  %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/invalidation.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/invalidation.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; Check that when argument promotion changes a function in some parent node of
+; the call graph, any analyses that happened to be cached for that function are
+; actually invalidated. We are using `demanded-bits` here because when printed
+; it will end up caching a value for every instruction, making it easy to
+; detect the instruction-level changes that will fail here. With improper
+; invalidation this will crash in the second printer as it tries to reuse
+; now-invalid demanded bits.
+;
+; RUN: opt < %s -passes='function(print<demanded-bits>),cgscc(argpromotion,function(print<demanded-bits>))' -maxar=0 -S | FileCheck %s
+
+@G = constant i32 0
+
+define internal i32 @a(i32* %x) {
+; CHECK-LABEL: define {{[^@]+}}@a
+; CHECK-SAME: (i32 [[X_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL]]
+;
+entry:
+  %v = load i32, i32* %x
+  ret i32 %v
+}
+
+define i32 @b() {
+; CHECK-LABEL: define {{[^@]+}}@b()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @G
+; CHECK-NEXT:    [[V:%.*]] = call i32 @a(i32 [[G_VAL]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %v = call i32 @a(i32* @G)
+  ret i32 %v
+}
+
+define i32 @c() {
+; CHECK-LABEL: define {{[^@]+}}@c()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @G
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @a(i32 [[G_VAL]])
+; CHECK-NEXT:    [[V2:%.*]] = call i32 @b()
+; CHECK-NEXT:    [[RESULT:%.*]] = add i32 [[V1]], [[V2]]
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+entry:
+  %v1 = call i32 @a(i32* @G)
+  %v2 = call i32 @b()
+  %result = add i32 %v1, %v2
+  ret i32 %result
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/lit.local.cfg b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/lit.local.cfg
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/min-legal-vector-width.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/min-legal-vector-width.ll
@@ -0,0 +1,387 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=0 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=0 < %s | FileCheck %s
+; Test that we only promote arguments when the caller/callee have compatible
+; function attrubtes.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should not promote
+define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should not promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 {
+; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 {
+; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; If the arguments are scalar, its ok to promote.
+define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %X, i32* %Y) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %B) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (i32* [[B:%.*]])
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 1, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, i32* [[B]]
+; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+; If the arguments are scalar, its ok to promote.
+define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %X, i32* %Y) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %B) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (i32* [[B:%.*]])
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 1, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, i32* [[B]]
+; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
+
+attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
+attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
+attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
+attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
+attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
+attributes #5 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/musttail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/musttail.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/musttail.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; PR36543
+
+; Don't promote arguments of musttail callee
+
+%T = type { i32, i32, i32, i32 }
+
+define internal i32 @test(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (%T* [[P:%.*]])
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3
+; CHECK-NEXT:    [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[A_GEP]]
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[B_GEP]]
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  ret i32 %v
+}
+
+define i32 @caller(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (%T* [[P:%.*]])
+; CHECK-NEXT:    [[V:%.*]] = musttail call i32 @test(%T* [[P]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %v = musttail call i32 @test(%T* %p)
+  ret i32 %v
+}
+
+; Don't promote arguments of musttail caller
+
+define i32 @foo(%T* %p, i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo
+; CHECK-SAME: (%T* [[P:%.*]], i32 [[V:%.*]])
+; CHECK-NEXT:    ret i32 0
+;
+  ret i32 0
+}
+
+define internal i32 @test2(%T* %p, i32 %p2) {
+; CHECK-LABEL: define {{[^@]+}}@test2
+; CHECK-SAME: (%T* [[P:%.*]], i32 [[P2:%.*]])
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3
+; CHECK-NEXT:    [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[A_GEP]]
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[B_GEP]]
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    [[CA:%.*]] = musttail call i32 @foo(%T* undef, i32 [[V]])
+; CHECK-NEXT:    ret i32 [[CA]]
+;
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  %ca = musttail call i32 @foo(%T* undef, i32 %v)
+  ret i32 %ca
+}
+
+define i32 @caller2(%T* %g) {
+; CHECK-LABEL: define {{[^@]+}}@caller2
+; CHECK-SAME: (%T* [[G:%.*]])
+; CHECK-NEXT:    [[V:%.*]] = call i32 @test2(%T* [[G]], i32 0)
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %v = call i32 @test2(%T* %g, i32 0)
+  ret i32 %v
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/naked_functions.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/naked_functions.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+
+; Don't promote paramaters of/arguments to naked functions
+
+@g = common global i32 0, align 4
+
+define i32 @bar() {
+; CHECK-LABEL: define {{[^@]+}}@bar()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo(i32* @g)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+entry:
+  %call = call i32 @foo(i32* @g)
+  ret i32 %call
+}
+
+define internal i32 @foo(i32*) #0 {
+; CHECK-LABEL: define {{[^@]+}}@foo
+; CHECK-SAME: (i32* [[TMP0:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %retval = alloca i32, align 4
+  call void asm sideeffect "ldr r0, [r0] \0Abx lr        \0A", ""()
+  unreachable
+}
+
+
+attributes #0 = { naked }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/nonzero-address-spaces.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/nonzero-address-spaces.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+
+; ArgumentPromotion should preserve the default function address space
+; from the data layout.
+
+target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
+
+@g = common global i32 0, align 4
+
+define i32 @bar() {
+; CHECK-LABEL: define {{[^@]+}}@bar() addrspace(1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call addrspace(1) i32 @foo()
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+
+entry:
+  %call = call i32 @foo(i32* @g)
+  ret i32 %call
+}
+
+define internal i32 @foo(i32*) {
+; CHECK-LABEL: define {{[^@]+}}@foo() addrspace(1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %retval = alloca i32, align 4
+  call void asm sideeffect "ldr r0, [r0] \0Abx lr        \0A", ""()
+  unreachable
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr27568.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr27568.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=0 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=0 < %s | FileCheck %s
+; RUN: opt -S -maxar=0 -debugify -o /dev/null < %s
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @callee(i8*) {
+; CHECK-LABEL: define {{[^@]+}}@callee()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @thunk()
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @thunk()
+  ret void
+}
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @thunk()
+; CHECK-NEXT:    to label [[OUT:%.*]] unwind label [[CPAD:%.*]]
+; CHECK:       out:
+; CHECK-NEXT:    ret void
+; CHECK:       cpad:
+; CHECK-NEXT:    [[PAD:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @callee() [ "funclet"(token [[PAD]]) ]
+; CHECK-NEXT:    cleanupret from [[PAD]] unwind to caller
+;
+entry:
+  invoke void @thunk()
+  to label %out unwind label %cpad
+
+out:
+  ret void
+
+cpad:
+  %pad = cleanuppad within none []
+  call void @callee(i8* null) [ "funclet"(token %pad) ]
+  cleanupret from %pad unwind to caller
+}
+
+
+declare void @thunk()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr32917.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr32917.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; PR 32917
+
+@b = common local_unnamed_addr global i32 0, align 4
+@a = common local_unnamed_addr global i32 0, align 4
+
+define i32 @fn2() local_unnamed_addr {
+; CHECK-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[DOTIDX:%.*]] = getelementptr i32, i32* [[TMP3]], i64 -1
+; CHECK-NEXT:    [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4
+; CHECK-NEXT:    call fastcc void @fn1(i32 [[DOTIDX_VAL]])
+; CHECK-NEXT:    ret i32 undef
+;
+  %1 = load i32, i32* @b, align 4
+  %2 = sext i32 %1 to i64
+  %3 = inttoptr i64 %2 to i32*
+  call fastcc void @fn1(i32* %3)
+  ret i32 undef
+}
+
+define internal fastcc void @fn1(i32* nocapture readonly) unnamed_addr {
+; CHECK-LABEL: define {{[^@]+}}@fn1
+; CHECK-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr
+; CHECK-NEXT:    store i32 [[DOT18446744073709551615_VAL]], i32* @a, align 4
+; CHECK-NEXT:    ret void
+;
+  %2 = getelementptr inbounds i32, i32* %0, i64 -1
+  %3 = load i32, i32* %2, align 4
+  store i32 %3, i32* @a, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr33641_remove_arg_dbgvalue.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr33641_remove_arg_dbgvalue.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -argpromotion -maxar=0 -verify -dse -S %s -o - | FileCheck %s
+
+; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to
+; dbg.value which still used the removed argument.
+
+; The %p argument should be removed, and the use of it in dbg.value should be
+; changed to undef.
+
+%p_t = type i16*
+%fun_t = type void (%p_t)*
+
+define void @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    ret void
+;
+  %tmp = alloca %fun_t
+  store %fun_t @bar, %fun_t* %tmp
+  ret void
+}
+
+define internal void @bar(%p_t %p)  {
+; CHECK-LABEL: define {{[^@]+}}@bar()
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i16* undef, metadata !3, metadata !DIExpression()), !dbg !5
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.dbg.value(metadata %p_t %p, metadata !4, metadata !5), !dbg !6
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "bar", unit: !0)
+!4 = !DILocalVariable(name: "p", scope: !3)
+!5 = !DIExpression()
+!6 = !DILocation(line: 1, column: 1, scope: !3)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/profile.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/profile.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/profile.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -argpromotion -maxar=0 -mem2reg -S < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Checks if !prof metadata is corret in deadargelim.
+
+define void @caller() #0 {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:    call void @promote_i32_ptr(i32 42), !prof !0
+; CHECK-NEXT:    ret void
+;
+  %x = alloca i32
+  store i32 42, i32* %x
+  call void @promote_i32_ptr(i32* %x), !prof !0
+  ret void
+}
+
+define internal void @promote_i32_ptr(i32* %xp) {
+; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr
+; CHECK-SAME: (i32 [[XP_VAL:%.*]])
+; CHECK-NEXT:    call void @use_i32(i32 [[XP_VAL]])
+; CHECK-NEXT:    ret void
+;
+  %x = load i32, i32* %xp
+  call void @use_i32(i32 %x)
+  ret void
+}
+
+declare void @use_i32(i32)
+
+!0 = !{!"branch_weights", i32 30}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/reserve-tbaa.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/reserve-tbaa.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+; PR17906
+; When we promote two arguments in a single function with different types,
+; before the fix, we used the same tag for the newly-created two loads.
+; This testing case makes sure that we correctly transfer the tbaa tags from the
+; original loads to the newly-created loads when promoting pointer arguments.
+
+@a = global i32* null, align 8
+@e = global i32** @a, align 8
+@g = global i32 0, align 4
+@c = global i64 0, align 8
+@d = global i8 0, align 1
+
+define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
+; CHECK-LABEL: define {{[^@]+}}@fn
+; CHECK-SAME: (i32 [[P1_VAL:%.*]], i64 [[P2_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[P2_VAL]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[P1_VAL]] to i8
+; CHECK-NEXT:    store i8 [[CONV1]], i8* @d, align 1, !tbaa !0
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i64, i64* %p2, align 8, !tbaa !1
+  %conv = trunc i64 %0 to i32
+  %1 = load i32, i32* %p1, align 4, !tbaa !5
+  %conv1 = trunc i32 %1 to i8
+  store i8 %conv1, i8* @d, align 1, !tbaa !7
+  ret void
+}
+
+define i32 @main() {
+; CHECK-LABEL: define {{[^@]+}}@main()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !3
+; CHECK-NEXT:    store i32* @g, i32** [[TMP0]], align 8, !tbaa !3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !3
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 4, !tbaa !5
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @g, align 4, !tbaa !5
+; CHECK-NEXT:    [[C_VAL:%.*]] = load i64, i64* @c, align 8, !tbaa !7
+; CHECK-NEXT:    call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %0 = load i32**, i32*** @e, align 8, !tbaa !8
+  store i32* @g, i32** %0, align 8, !tbaa !8
+  %1 = load i32*, i32** @a, align 8, !tbaa !8
+  store i32 1, i32* %1, align 4, !tbaa !5
+  call fastcc void @fn(i32* @g, i64* @c)
+
+  ret i32 0
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"long", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
+!7 = !{!3, !3, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"any pointer", !3, i64 0}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/sret.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/sret.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/sret.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @add({i32, i32}* %this, i32* sret %r) {
+; CHECK-LABEL: define {{[^@]+}}@add
+; CHECK-SAME: (i32 [[THIS_0_0_VAL:%.*]], i32 [[THIS_0_1_VAL:%.*]], i32* noalias [[R:%.*]])
+; CHECK-NEXT:    [[AB:%.*]] = add i32 [[THIS_0_0_VAL]], [[THIS_0_1_VAL]]
+; CHECK-NEXT:    store i32 [[AB]], i32* [[R]]
+; CHECK-NEXT:    ret void
+;
+  %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0
+  %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1
+  %a = load i32, i32* %ap
+  %b = load i32, i32* %bp
+  %ab = add i32 %a, %b
+  store i32 %ab, i32* %r
+  ret void
+}
+
+define void @f() {
+; CHECK-LABEL: define {{[^@]+}}@f()
+; CHECK-NEXT:    [[R:%.*]] = alloca i32
+; CHECK-NEXT:    [[PAIR:%.*]] = alloca { i32, i32 }
+; CHECK-NEXT:    [[PAIR_IDX:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 0
+; CHECK-NEXT:    [[PAIR_IDX_VAL:%.*]] = load i32, i32* [[PAIR_IDX]]
+; CHECK-NEXT:    [[PAIR_IDX1:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 1
+; CHECK-NEXT:    [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]]
+; CHECK-NEXT:    call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]])
+; CHECK-NEXT:    ret void
+;
+  %r = alloca i32
+  %pair = alloca {i32, i32}
+
+  call void @add({i32, i32}* %pair, i32* sret %r)
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/tail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/tail.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/tail.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt %s -argpromotion -maxar=0 -S -o - | FileCheck %s
+; RUN: opt %s -passes=argpromotion -maxar=0 -S -o - | FileCheck %s
+; PR14710
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%pair = type { i32, i32 }
+
+declare i8* @foo(%pair*)
+
+define internal void @bar(%pair* byval %Data) {
+; CHECK-LABEL: define {{[^@]+}}@bar
+; CHECK-SAME: (i32 [[DATA_0:%.*]], i32 [[DATA_1:%.*]])
+; CHECK-NEXT:    [[DATA:%.*]] = alloca [[PAIR:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[DATA_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[DATA_1]], i32* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]])
+; CHECK-NEXT:    ret void
+;
+  tail call i8* @foo(%pair* %Data)
+  ret void
+}
+
+define void @zed(%pair* byval %Data) {
+; CHECK-LABEL: define {{[^@]+}}@zed
+; CHECK-SAME: (%pair* byval [[DATA:%.*]])
+; CHECK-NEXT:    [[DATA_0:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 0
+; CHECK-NEXT:    [[DATA_0_VAL:%.*]] = load i32, i32* [[DATA_0]], align 4
+; CHECK-NEXT:    [[DATA_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
+; CHECK-NEXT:    [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]], align 4
+; CHECK-NEXT:    call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]])
+; CHECK-NEXT:    ret void
+;
+  call void @bar(%pair* byval %Data)
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/thiscall.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/thiscall.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; In PR41658, argpromotion put an inalloca in a position that per the
+; calling convention is passed in a register. This test verifies that
+; we don't do that anymore. It also verifies that the combination of
+; globalopt and argpromotion is able to optimize the call safely.
+;
+; RUN: opt -S -argpromotion -maxar=0 %s | FileCheck %s --check-prefix=ARGPROMOTION
+; RUN: opt -S -globalopt -argpromotion -maxar=0 %s | FileCheck %s --check-prefix=GLOBALOPT_ARGPROMOTION
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc19.11.0"
+
+%struct.a = type { i8 }
+
+define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun
+; ARGPROMOTION-SAME: (%struct.a* [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca [[TMP0:%.*]])
+; ARGPROMOTION-NEXT:  entry:
+; ARGPROMOTION-NEXT:    [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4
+; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]])
+; ARGPROMOTION-NEXT:    call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; ARGPROMOTION-NEXT:    ret void
+;
+; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun
+; GLOBALOPT_ARGPROMOTION-SAME: (<{ [[STRUCT_A:%.*]] }>* [[TMP0:%.*]]) unnamed_addr
+; GLOBALOPT_ARGPROMOTION-NEXT:  entry:
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    ret void
+;
+entry:
+  %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
+  %argmem = alloca inalloca <{ %struct.a }>, align 4
+  %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0
+  %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a)
+  call void @ext(<{ %struct.a }>* inalloca %argmem)
+  ret void
+}
+
+; This is here to ensure @internalfun is live.
+define void @exportedfun(%struct.a* %a) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun
+; ARGPROMOTION-SAME: (%struct.a* [[A:%.*]])
+; ARGPROMOTION-NEXT:    [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave()
+; ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4
+; ARGPROMOTION-NEXT:    call x86_thiscallcc void @internalfun(%struct.a* [[A]], <{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; ARGPROMOTION-NEXT:    call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]])
+; ARGPROMOTION-NEXT:    ret void
+;
+; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun
+; GLOBALOPT_ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) local_unnamed_addr
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave()
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4
+; GLOBALOPT_ARGPROMOTION-NEXT:    call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    ret void
+;
+  %inalloca.save = tail call i8* @llvm.stacksave()
+  %argmem = alloca inalloca <{ %struct.a }>, align 4
+  call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem)
+  call void @llvm.stackrestore(i8* %inalloca.save)
+  ret void
+}
+
+declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1))
+declare void @ext(<{ %struct.a }>* inalloca)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/variadic.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/variadic.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s
+
+; Unused arguments from variadic functions cannot be eliminated as that changes
+; their classiciation according to the SysV amd64 ABI. Clang and other frontends
+; bake in the classification when they use things like byval, as in this test.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.tt0 = type { i64, i64 }
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+; CHECK-LABEL: define {{[^@]+}}@main
+; CHECK-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+  ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) {
+; CHECK-LABEL: define {{[^@]+}}@callee_t0f
+; CHECK-SAME: (i8* nocapture readnone [[TP13:%.*]], i8* nocapture readnone [[TP14:%.*]], i8* nocapture readnone [[TP15:%.*]], i8* nocapture readnone [[TP16:%.*]], i8* nocapture readnone [[TP17:%.*]], ...)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
+entry:
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-02-01-ReturnAttrs.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-02-01-ReturnAttrs.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+
+define internal i32 @deref(i32* %x) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@deref
+; CHECK-SAME: (i32 [[X_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL]]
+;
+entry:
+  %tmp2 = load i32, i32* %x, align 4
+  ret i32 %tmp2
+}
+
+define i32 @f(i32 %x) {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
+; CHECK-NEXT:    [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %x_addr = alloca i32
+  store i32 %x, i32* %x_addr, align 4
+  %tmp1 = call i32 @deref( i32* %x_addr ) nounwind
+  ret i32 %tmp1
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-07-02-array-indexing.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-07-02-array-indexing.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; PR2498
+
+; This test tries to convince CHECK about promoting the load from %A + 2,
+; because there is a load of %A in the entry block
+define internal i32 @callee(i1 %C, i32* %A) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32* [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_0:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 [[A_0]]
+; CHECK:       F:
+; CHECK-NEXT:    [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[A_2]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  ; Unconditonally load the element at %A
+  %A.0 = load i32, i32* %A
+  br i1 %C, label %T, label %F
+
+T:
+  ret i32 %A.0
+
+F:
+  ; Load the element at offset two from %A. This should not be promoted!
+  %A.2 = getelementptr i32, i32* %A, i32 2
+  %R = load i32, i32* %A.2
+  ret i32 %R
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32* null)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %X = call i32 @callee(i1 false, i32* null)             ; <i32> [#uses=1]
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-07-CGUpdate.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-07-CGUpdate.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -inline -argpromotion -maxar=3 -disable-output
+
+define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind {
+entry:
+  unreachable
+}
+
+define void @encode(i32* %m, i32* %ts, i32* %new) nounwind {
+entry:
+  %0 = call fastcc i32 @hash( i32* %ts, i32 0 ) nounwind		; <i32> [#uses=0]
+  unreachable
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-08-CGUpdateSelfEdge.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-08-CGUpdateSelfEdge.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -disable-output
+
+define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind {
+entry:
+  br i1 false, label %bb, label %bb5
+
+bb:		; preds = %entry
+  %0 = call fastcc i32 @term_SharingList( i32* null, i32* %List ) nounwind		; <i32> [#uses=0]
+  unreachable
+
+bb5:		; preds = %entry
+  ret i32 0
+}
+
+define i32 @term_Sharing(i32* %Term) nounwind {
+entry:
+  br i1 false, label %bb.i, label %bb14
+
+bb.i:		; preds = %entry
+  %0 = call fastcc i32 @term_SharingList( i32* null, i32* null ) nounwind		; <i32> [#uses=0]
+  ret i32 1
+
+bb14:		; preds = %entry
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/aggregate-promote.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/aggregate-promote.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+@G = constant %T { i32 0, i32 0, i32 17, i32 25 }
+
+define internal i32 @test(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[P_0_2_VAL:%.*]], i32 [[P_0_3_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  ret i32 %v
+}
+
+define i32 @caller() {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_IDX:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 2
+; CHECK-NEXT:    [[G_IDX_VAL:%.*]] = load i32, i32* [[G_IDX]]
+; CHECK-NEXT:    [[G_IDX1:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 3
+; CHECK-NEXT:    [[G_IDX1_VAL:%.*]] = load i32, i32* [[G_IDX1]]
+; CHECK-NEXT:    [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %v = call i32 @test(%T* @G)
+  ret i32 %v
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attributes.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attributes.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=3 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=3 < %s | FileCheck %s
+; Test that we only promote arguments when the caller/callee have compatible
+; function attrubtes.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <4 x i64> [[TMP]], <4 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <4 x i64>, <4 x i64>* %arg1
+  store <4 x i64> %tmp, <4 x i64>* %arg
+  ret void
+}
+
+define void @no_promote(<4 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@no_promote
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @no_promote_avx2(<4 x i64>* [[TMP2]], <4 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <4 x i64>, align 32
+  %tmp2 = alloca <4 x i64>, align 32
+  %tmp3 = bitcast <4 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
+  %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
+  store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
+  ret void
+}
+
+define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@promote_avx2
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <4 x i64>, <4 x i64>* %arg1
+  store <4 x i64> %tmp, <4 x i64>* %arg
+  ret void
+}
+
+define void @promote(<4 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@promote
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* [[TMP2]], <4 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <4 x i64>, align 32
+  %tmp2 = alloca <4 x i64>, align 32
+  %tmp3 = bitcast <4 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
+  %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
+  store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2
+
+attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attrs.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attrs.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+%struct.ss = type { i32, i64 }
+
+; Don't drop 'byval' on %X here.
+define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[X]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+
+  store i32 0, i32* %X
+  ret void
+}
+
+; Also make sure we don't drop the call zeroext attribute.
+define i32 @test(i32* %X) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32* [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]], i32 zeroext 0)
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+
+  call void @f( %struct.ss* byval %S, i32* byval %X, i32 zeroext 0)
+
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/basictest.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/basictest.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -basicaa -argpromotion -maxar=3 -mem2reg -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define internal i32 @test(i32* %X, i32* %Y) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define internal i32 @caller(i32* %B) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i32 [[B_VAL1:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @test(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+define i32 @callercaller() {
+; CHECK-LABEL: define {{[^@]+}}@callercaller()
+; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32 2)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %B = alloca i32
+  store i32 2, i32* %B
+  %X = call i32 @caller(i32* %B)
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval-2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval-2.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+; Arg promotion eliminates the struct argument.
+; FIXME: We should eliminate the i32* argument.
+
+%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[X]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+
+  store i32 0, i32* %X
+  ret void
+}
+
+define i32 @test(i32* %X) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32* [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+  call void @f( %struct.ss* byval %S, i32* byval %X)
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
+}
+
+
+define internal void @g(%struct.ss* byval align 32 %b) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
+}
+
+
+define i32 @main() nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@main()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]])
+; CHECK-NEXT:    [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4
+; CHECK-NEXT:    [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4
+; CHECK-NEXT:    call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+  call void @f(%struct.ss* byval %S) nounwind
+  call void @g(%struct.ss* byval %S) nounwind
+  ret i32 0
+}
+
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/chained.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/chained.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/chained.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+@G1 = constant i32 0
+@G2 = constant i32* @G1
+
+define internal i32 @test(i32** %x) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL_VAL]]
+;
+entry:
+  %y = load i32*, i32** %x
+  %z = load i32, i32* %y
+  ret i32 %z
+}
+
+define i32 @caller() {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G2_VAL:%.*]] = load i32*, i32** @G2
+; CHECK-NEXT:    [[G2_VAL_VAL:%.*]] = load i32, i32* [[G2_VAL]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  %x = call i32 @test(i32** @G2)
+  ret i32 %x
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+; Don't promote around control flow.
+define internal i32 @callee(i1 %C, i32* %P) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32* [[P:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 17
+; CHECK:       F:
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  br i1 %C, label %T, label %F
+
+T:
+  ret i32 17
+
+F:
+  %X = load i32, i32* %P
+  ret i32 %X
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 true, i32* null)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  %X = call i32 @callee(i1 true, i32* null)
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow2.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define internal i32 @callee(i1 %C, i32* %P) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]])
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 17
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 [[P_VAL]]
+;
+  br i1 %C, label %T, label %F
+
+T:              ; preds = %0
+  ret i32 17
+
+F:              ; preds = %0
+  %X = load i32, i32* %P               ; <i32> [#uses=1]
+  ret i32 %X
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 17, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %A = alloca i32         ; <i32*> [#uses=2]
+  store i32 17, i32* %A
+  %X = call i32 @callee( i1 false, i32* %A )              ; <i32> [#uses=1]
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/crash.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/crash.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/crash.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S < %s -inline -argpromotion -maxar=3 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_OLDPM
+; RUN: opt -S < %s -passes=inline,argpromotion -maxar=3 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_NEWPM
+
+%S = type { %S* }
+
+; Inlining should nuke the invoke (and any inlined calls) here even with
+; argument promotion running along with it.
+define void @zot() personality i32 (...)* @wibble {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    unreachable
+; ARGPROMOTION:       hoge.exit:
+; ARGPROMOTION-NEXT:    br label [[BB1:%.*]]
+; ARGPROMOTION:       bb1:
+; ARGPROMOTION-NEXT:    unreachable
+; ARGPROMOTION:       bb2:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = landingpad { i8*, i32 }
+; ARGPROMOTION-NEXT:    cleanup
+; ARGPROMOTION-NEXT:    unreachable
+;
+bb:
+  invoke void @hoge()
+  to label %bb1 unwind label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  %tmp = landingpad { i8*, i32 }
+  cleanup
+  unreachable
+}
+
+define internal void @hoge() {
+bb:
+  %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs)
+  %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney)
+  unreachable
+}
+
+define internal fastcc i8* @spam(i1 (i8*)* %arg) {
+bb:
+  unreachable
+}
+
+define internal i1 @eggs(i8* %arg) {
+; ALL_NEWPM-LABEL: define {{[^@]+}}@eggs()
+; ALL_NEWPM-NEXT:  bb:
+; ALL_NEWPM-NEXT:    unreachable
+;
+bb:
+  %tmp = call zeroext i1 @barney(i8* %arg)
+  unreachable
+}
+
+define internal i1 @barney(i8* %arg) {
+bb:
+  ret i1 undef
+}
+
+define i32 @test_inf_promote_caller(i32 %arg) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_caller
+; ARGPROMOTION-SAME: (i32 [[ARG:%.*]])
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]]
+; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = alloca [[S]]
+; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]])
+; ARGPROMOTION-NEXT:    ret i32 0
+;
+bb:
+  %tmp = alloca %S
+  %tmp1 = alloca %S
+  %tmp2 = call i32 @test_inf_promote_callee(%S* %tmp, %S* %tmp1)
+
+  ret i32 0
+}
+
+define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_callee
+; ARGPROMOTION-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]])
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = load %S*, %S** [[TMP]]
+; ARGPROMOTION-NEXT:    [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[TMP4:%.*]] = load %S*, %S** [[TMP3]]
+; ARGPROMOTION-NEXT:    [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]])
+; ARGPROMOTION-NEXT:    ret i32 0
+;
+bb:
+  %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0
+  %tmp2 = load %S*, %S** %tmp
+  %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0
+  %tmp4 = load %S*, %S** %tmp3
+  %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2)
+
+  ret i32 0
+}
+
+declare i32 @wibble(...)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/dbg.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/dbg.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+declare void @sink(i32)
+
+define internal void @test(i32** %X) !dbg !2 {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) !dbg !3
+; CHECK-NEXT:    call void @sink(i32 [[X_VAL_VAL]])
+; CHECK-NEXT:    ret void
+;
+  %1 = load i32*, i32** %X, align 8
+  %2 = load i32, i32* %1, align 8
+  call void @sink(i32 %2)
+  ret void
+}
+
+%struct.pair = type { i32, i32 }
+
+define internal void @test_byval(%struct.pair* byval %P) {
+; CHECK-LABEL: define {{[^@]+}}@test_byval
+; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]])
+; CHECK-NEXT:    [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[P_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[P_1]], i32* [[DOT1]], align 4
+; CHECK-NEXT:    ret void
+;
+  ret void
+}
+
+define void @caller(i32** %Y, %struct.pair* %P) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i32** [[Y:%.*]], %struct.pair* [[P:%.*]])
+; CHECK-NEXT:    [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg !4
+; CHECK-NEXT:    [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg !4
+; CHECK-NEXT:    call void @test(i32 [[Y_VAL_VAL]]), !dbg !4
+; CHECK-NEXT:    [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg !5
+; CHECK-NEXT:    [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg !5
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5
+; CHECK-NEXT:    [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg !5
+; CHECK-NEXT:    call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg !5
+; CHECK-NEXT:    ret void
+;
+  call void @test(i32** %Y), !dbg !1
+
+  call void @test_byval(%struct.pair* %P), !dbg !6
+  ret void
+}
+
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!3}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !DILocation(line: 8, scope: !2)
+!2 = distinct !DISubprogram(name: "test", file: !5, line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, scopeLine: 3, scope: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !5)
+!5 = !DIFile(filename: "test.c", directory: "")
+!6 = !DILocation(line: 9, scope: !2)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/fp80.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/fp80.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.u = type { x86_fp80 }
+%struct.s = type { double, i16, i8, [5 x i8] }
+
+@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16
+
+%struct.Foo = type { i32, i64 }
+@a = internal global %struct.Foo { i32 1, i64 2 }, align 8
+
+define void @run() {
+; CHECK-LABEL: define {{[^@]+}}@run()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0
+; CHECK-NEXT:    [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
+  call i64 @CaptureAStruct(%struct.Foo* @a)
+  ret void
+}
+
+define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
+; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely
+; CHECK-SAME: (%union.u* byval align 16 [[ARG:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s*
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2
+; CHECK-NEXT:    [[RESULT:%.*]] = load i8, i8* [[GEP]]
+; CHECK-NEXT:    ret i8 [[RESULT]]
+;
+entry:
+  %bitcast = bitcast %union.u* %arg to %struct.s*
+  %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2
+  %result = load i8, i8* %gep
+  ret i8 %result
+}
+
+define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) {
+; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely
+; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]])
+; CHECK-NEXT:    [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0
+; CHECK-NEXT:    store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]]
+; CHECK-NEXT:    ret x86_fp80 [[FP80]]
+;
+  %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
+  %fp80 = load x86_fp80, x86_fp80* %gep
+  ret x86_fp80 %fp80
+}
+
+define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
+; CHECK-LABEL: define {{[^@]+}}@AccessPaddingOfStruct
+; CHECK-SAME: (%struct.Foo* byval [[A:%.*]])
+; CHECK-NEXT:    [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64*
+; CHECK-NEXT:    [[V:%.*]] = load i64, i64* [[P]]
+; CHECK-NEXT:    ret i64 [[V]]
+;
+  %p = bitcast %struct.Foo* %a to i64*
+  %v = load i64, i64* %p
+  ret i64 %v
+}
+
+define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
+; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct
+; CHECK-SAME: (%struct.Foo* byval [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ]
+; CHECK-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]]
+; CHECK-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  %a_ptr = alloca %struct.Foo*
+  br label %loop
+
+loop:
+  %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ]
+  %0   = phi %struct.Foo* [ %a, %entry ],   [ %0, %loop ]
+  store %struct.Foo* %phi, %struct.Foo** %a_ptr
+  %gep = getelementptr %struct.Foo, %struct.Foo* %a, i64 0
+  br label %loop
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/inalloca.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/inalloca.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt %s -globalopt -argpromotion -maxar=3 -sroa -S | FileCheck %s
+; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -maxar=3 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.ss = type { i32, i32 }
+
+; Argpromote + sroa should change this to passing the two integers by value.
+define internal i32 @f(%struct.ss* inalloca  %s) {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[S_0_0_VAL]], [[S_0_1_VAL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
+  %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1
+  %a = load i32, i32* %f0, align 4
+  %b = load i32, i32* %f1, align 4
+  %r = add i32 %a, %b
+  ret i32 %r
+}
+
+define i32 @main() {
+; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  %S = alloca inalloca %struct.ss
+  %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i32 1, i32* %f0, align 4
+  store i32 2, i32* %f1, align 4
+  %r = call i32 @f(%struct.ss* inalloca %S)
+  ret i32 %r
+}
+
+; Argpromote can't promote %a because of the icmp use.
+define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: (%struct.ss* [[A:%.*]], %struct.ss* [[B:%.*]]) unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq %struct.ss* [[A]], [[B]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+entry:
+  %c = icmp eq %struct.ss* %a, %b
+  ret i1 %c
+}
+
+define i32 @test() {
+; CHECK-LABEL: define {{[^@]+}}@test() local_unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca inalloca %struct.ss
+  %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/invalidation.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/invalidation.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; Check that when argument promotion changes a function in some parent node of
+; the call graph, any analyses that happened to be cached for that function are
+; actually invalidated. We are using `demanded-bits` here because when printed
+; it will end up caching a value for every instruction, making it easy to
+; detect the instruction-level changes that will fail here. With improper
+; invalidation this will crash in the second printer as it tries to reuse
+; now-invalid demanded bits.
+;
+; RUN: opt < %s -passes='function(print<demanded-bits>),cgscc(argpromotion,function(print<demanded-bits>))' -maxar=3 -S | FileCheck %s
+
+@G = constant i32 0
+
+define internal i32 @a(i32* %x) {
+; CHECK-LABEL: define {{[^@]+}}@a
+; CHECK-SAME: (i32 [[X_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL]]
+;
+entry:
+  %v = load i32, i32* %x
+  ret i32 %v
+}
+
+define i32 @b() {
+; CHECK-LABEL: define {{[^@]+}}@b()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @G
+; CHECK-NEXT:    [[V:%.*]] = call i32 @a(i32 [[G_VAL]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %v = call i32 @a(i32* @G)
+  ret i32 %v
+}
+
+define i32 @c() {
+; CHECK-LABEL: define {{[^@]+}}@c()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @G
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @a(i32 [[G_VAL]])
+; CHECK-NEXT:    [[V2:%.*]] = call i32 @b()
+; CHECK-NEXT:    [[RESULT:%.*]] = add i32 [[V1]], [[V2]]
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+entry:
+  %v1 = call i32 @a(i32* @G)
+  %v2 = call i32 @b()
+  %result = add i32 %v1, %v2
+  ret i32 %result
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/lit.local.cfg b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/lit.local.cfg
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/min-legal-vector-width.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/min-legal-vector-width.ll
@@ -0,0 +1,387 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=3 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=3 < %s | FileCheck %s
+; Test that we only promote arguments when the caller/callee have compatible
+; function attrubtes.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should not promote
+define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should not promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 {
+; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 {
+; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; If the arguments are scalar, its ok to promote.
+define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %X, i32* %Y) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %B) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (i32* [[B:%.*]])
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 1, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, i32* [[B]]
+; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+; If the arguments are scalar, its ok to promote.
+define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %X, i32* %Y) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %B) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (i32* [[B:%.*]])
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 1, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, i32* [[B]]
+; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
+
+attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
+attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
+attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
+attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
+attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
+attributes #5 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/musttail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/musttail.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/musttail.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; PR36543
+
+; Don't promote arguments of musttail callee
+
+%T = type { i32, i32, i32, i32 }
+
+define internal i32 @test(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (%T* [[P:%.*]])
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3
+; CHECK-NEXT:    [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[A_GEP]]
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[B_GEP]]
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  ret i32 %v
+}
+
+define i32 @caller(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (%T* [[P:%.*]])
+; CHECK-NEXT:    [[V:%.*]] = musttail call i32 @test(%T* [[P]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %v = musttail call i32 @test(%T* %p)
+  ret i32 %v
+}
+
+; Don't promote arguments of musttail caller
+
+define i32 @foo(%T* %p, i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo
+; CHECK-SAME: (%T* [[P:%.*]], i32 [[V:%.*]])
+; CHECK-NEXT:    ret i32 0
+;
+  ret i32 0
+}
+
+define internal i32 @test2(%T* %p, i32 %p2) {
+; CHECK-LABEL: define {{[^@]+}}@test2
+; CHECK-SAME: (%T* [[P:%.*]], i32 [[P2:%.*]])
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3
+; CHECK-NEXT:    [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[A_GEP]]
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[B_GEP]]
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    [[CA:%.*]] = musttail call i32 @foo(%T* undef, i32 [[V]])
+; CHECK-NEXT:    ret i32 [[CA]]
+;
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  %ca = musttail call i32 @foo(%T* undef, i32 %v)
+  ret i32 %ca
+}
+
+define i32 @caller2(%T* %g) {
+; CHECK-LABEL: define {{[^@]+}}@caller2
+; CHECK-SAME: (%T* [[G:%.*]])
+; CHECK-NEXT:    [[V:%.*]] = call i32 @test2(%T* [[G]], i32 0)
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %v = call i32 @test2(%T* %g, i32 0)
+  ret i32 %v
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/naked_functions.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/naked_functions.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+
+; Don't promote paramaters of/arguments to naked functions
+
+@g = common global i32 0, align 4
+
+define i32 @bar() {
+; CHECK-LABEL: define {{[^@]+}}@bar()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo(i32* @g)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+entry:
+  %call = call i32 @foo(i32* @g)
+  ret i32 %call
+}
+
+define internal i32 @foo(i32*) #0 {
+; CHECK-LABEL: define {{[^@]+}}@foo
+; CHECK-SAME: (i32* [[TMP0:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %retval = alloca i32, align 4
+  call void asm sideeffect "ldr r0, [r0] \0Abx lr        \0A", ""()
+  unreachable
+}
+
+
+attributes #0 = { naked }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/nonzero-address-spaces.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/nonzero-address-spaces.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+
+; ArgumentPromotion should preserve the default function address space
+; from the data layout.
+
+target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
+
+@g = common global i32 0, align 4
+
+define i32 @bar() {
+; CHECK-LABEL: define {{[^@]+}}@bar() addrspace(1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call addrspace(1) i32 @foo()
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+
+entry:
+  %call = call i32 @foo(i32* @g)
+  ret i32 %call
+}
+
+define internal i32 @foo(i32*) {
+; CHECK-LABEL: define {{[^@]+}}@foo() addrspace(1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %retval = alloca i32, align 4
+  call void asm sideeffect "ldr r0, [r0] \0Abx lr        \0A", ""()
+  unreachable
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr27568.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr27568.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=3 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=3 < %s | FileCheck %s
+; RUN: opt -S -debugify -maxar=3 -o /dev/null < %s
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @callee(i8*) {
+; CHECK-LABEL: define {{[^@]+}}@callee()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @thunk()
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @thunk()
+  ret void
+}
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @thunk()
+; CHECK-NEXT:    to label [[OUT:%.*]] unwind label [[CPAD:%.*]]
+; CHECK:       out:
+; CHECK-NEXT:    ret void
+; CHECK:       cpad:
+; CHECK-NEXT:    [[PAD:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @callee() [ "funclet"(token [[PAD]]) ]
+; CHECK-NEXT:    cleanupret from [[PAD]] unwind to caller
+;
+entry:
+  invoke void @thunk()
+  to label %out unwind label %cpad
+
+out:
+  ret void
+
+cpad:
+  %pad = cleanuppad within none []
+  call void @callee(i8* null) [ "funclet"(token %pad) ]
+  cleanupret from %pad unwind to caller
+}
+
+
+declare void @thunk()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr32917.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr32917.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; PR 32917
+
+@b = common local_unnamed_addr global i32 0, align 4
+@a = common local_unnamed_addr global i32 0, align 4
+
+define i32 @fn2() local_unnamed_addr {
+; CHECK-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[DOTIDX:%.*]] = getelementptr i32, i32* [[TMP3]], i64 -1
+; CHECK-NEXT:    [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4
+; CHECK-NEXT:    call fastcc void @fn1(i32 [[DOTIDX_VAL]])
+; CHECK-NEXT:    ret i32 undef
+;
+  %1 = load i32, i32* @b, align 4
+  %2 = sext i32 %1 to i64
+  %3 = inttoptr i64 %2 to i32*
+  call fastcc void @fn1(i32* %3)
+  ret i32 undef
+}
+
+define internal fastcc void @fn1(i32* nocapture readonly) unnamed_addr {
+; CHECK-LABEL: define {{[^@]+}}@fn1
+; CHECK-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr
+; CHECK-NEXT:    store i32 [[DOT18446744073709551615_VAL]], i32* @a, align 4
+; CHECK-NEXT:    ret void
+;
+  %2 = getelementptr inbounds i32, i32* %0, i64 -1
+  %3 = load i32, i32* %2, align 4
+  store i32 %3, i32* @a, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr33641_remove_arg_dbgvalue.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr33641_remove_arg_dbgvalue.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -argpromotion -maxar=3 -verify -dse -S %s -o - | FileCheck %s
+
+; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to
+; dbg.value which still used the removed argument.
+
+; The %p argument should be removed, and the use of it in dbg.value should be
+; changed to undef.
+
+%p_t = type i16*
+%fun_t = type void (%p_t)*
+
+define void @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    ret void
+;
+  %tmp = alloca %fun_t
+  store %fun_t @bar, %fun_t* %tmp
+  ret void
+}
+
+define internal void @bar(%p_t %p)  {
+; CHECK-LABEL: define {{[^@]+}}@bar()
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i16* undef, metadata !3, metadata !DIExpression()), !dbg !5
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.dbg.value(metadata %p_t %p, metadata !4, metadata !5), !dbg !6
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "bar", unit: !0)
+!4 = !DILocalVariable(name: "p", scope: !3)
+!5 = !DIExpression()
+!6 = !DILocation(line: 1, column: 1, scope: !3)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/profile.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/profile.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/profile.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -argpromotion -maxar=3 -mem2reg -S < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Checks if !prof metadata is corret in deadargelim.
+
+define void @caller() #0 {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:    call void @promote_i32_ptr(i32 42), !prof !0
+; CHECK-NEXT:    ret void
+;
+  %x = alloca i32
+  store i32 42, i32* %x
+  call void @promote_i32_ptr(i32* %x), !prof !0
+  ret void
+}
+
+define internal void @promote_i32_ptr(i32* %xp) {
+; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr
+; CHECK-SAME: (i32 [[XP_VAL:%.*]])
+; CHECK-NEXT:    call void @use_i32(i32 [[XP_VAL]])
+; CHECK-NEXT:    ret void
+;
+  %x = load i32, i32* %xp
+  call void @use_i32(i32 %x)
+  ret void
+}
+
+declare void @use_i32(i32)
+
+!0 = !{!"branch_weights", i32 30}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/reserve-tbaa.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/reserve-tbaa.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+; PR17906
+; When we promote two arguments in a single function with different types,
+; before the fix, we used the same tag for the newly-created two loads.
+; This testing case makes sure that we correctly transfer the tbaa tags from the
+; original loads to the newly-created loads when promoting pointer arguments.
+
+@a = global i32* null, align 8
+@e = global i32** @a, align 8
+@g = global i32 0, align 4
+@c = global i64 0, align 8
+@d = global i8 0, align 1
+
+define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
+; CHECK-LABEL: define {{[^@]+}}@fn
+; CHECK-SAME: (i32 [[P1_VAL:%.*]], i64 [[P2_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[P2_VAL]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[P1_VAL]] to i8
+; CHECK-NEXT:    store i8 [[CONV1]], i8* @d, align 1, !tbaa !0
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i64, i64* %p2, align 8, !tbaa !1
+  %conv = trunc i64 %0 to i32
+  %1 = load i32, i32* %p1, align 4, !tbaa !5
+  %conv1 = trunc i32 %1 to i8
+  store i8 %conv1, i8* @d, align 1, !tbaa !7
+  ret void
+}
+
+define i32 @main() {
+; CHECK-LABEL: define {{[^@]+}}@main()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !3
+; CHECK-NEXT:    store i32* @g, i32** [[TMP0]], align 8, !tbaa !3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !3
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 4, !tbaa !5
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @g, align 4, !tbaa !5
+; CHECK-NEXT:    [[C_VAL:%.*]] = load i64, i64* @c, align 8, !tbaa !7
+; CHECK-NEXT:    call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %0 = load i32**, i32*** @e, align 8, !tbaa !8
+  store i32* @g, i32** %0, align 8, !tbaa !8
+  %1 = load i32*, i32** @a, align 8, !tbaa !8
+  store i32 1, i32* %1, align 4, !tbaa !5
+  call fastcc void @fn(i32* @g, i64* @c)
+
+  ret i32 0
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"long", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
+!7 = !{!3, !3, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"any pointer", !3, i64 0}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/sret.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/sret.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/sret.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @add({i32, i32}* %this, i32* sret %r) {
+; CHECK-LABEL: define {{[^@]+}}@add
+; CHECK-SAME: (i32 [[THIS_0_0_VAL:%.*]], i32 [[THIS_0_1_VAL:%.*]], i32* noalias [[R:%.*]])
+; CHECK-NEXT:    [[AB:%.*]] = add i32 [[THIS_0_0_VAL]], [[THIS_0_1_VAL]]
+; CHECK-NEXT:    store i32 [[AB]], i32* [[R]]
+; CHECK-NEXT:    ret void
+;
+  %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0
+  %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1
+  %a = load i32, i32* %ap
+  %b = load i32, i32* %bp
+  %ab = add i32 %a, %b
+  store i32 %ab, i32* %r
+  ret void
+}
+
+define void @f() {
+; CHECK-LABEL: define {{[^@]+}}@f()
+; CHECK-NEXT:    [[R:%.*]] = alloca i32
+; CHECK-NEXT:    [[PAIR:%.*]] = alloca { i32, i32 }
+; CHECK-NEXT:    [[PAIR_IDX:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 0
+; CHECK-NEXT:    [[PAIR_IDX_VAL:%.*]] = load i32, i32* [[PAIR_IDX]]
+; CHECK-NEXT:    [[PAIR_IDX1:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 1
+; CHECK-NEXT:    [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]]
+; CHECK-NEXT:    call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]])
+; CHECK-NEXT:    ret void
+;
+  %r = alloca i32
+  %pair = alloca {i32, i32}
+
+  call void @add({i32, i32}* %pair, i32* sret %r)
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/tail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/tail.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/tail.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt %s -argpromotion -maxar=3 -S -o - | FileCheck %s
+; RUN: opt %s -passes=argpromotion -maxar=3 -S -o - | FileCheck %s
+; PR14710
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%pair = type { i32, i32 }
+
+declare i8* @foo(%pair*)
+
+define internal void @bar(%pair* byval %Data) {
+; CHECK-LABEL: define {{[^@]+}}@bar
+; CHECK-SAME: (i32 [[DATA_0:%.*]], i32 [[DATA_1:%.*]])
+; CHECK-NEXT:    [[DATA:%.*]] = alloca [[PAIR:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[DATA_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[DATA_1]], i32* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]])
+; CHECK-NEXT:    ret void
+;
+  tail call i8* @foo(%pair* %Data)
+  ret void
+}
+
+define void @zed(%pair* byval %Data) {
+; CHECK-LABEL: define {{[^@]+}}@zed
+; CHECK-SAME: (%pair* byval [[DATA:%.*]])
+; CHECK-NEXT:    [[DATA_0:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 0
+; CHECK-NEXT:    [[DATA_0_VAL:%.*]] = load i32, i32* [[DATA_0]], align 4
+; CHECK-NEXT:    [[DATA_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
+; CHECK-NEXT:    [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]], align 4
+; CHECK-NEXT:    call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]])
+; CHECK-NEXT:    ret void
+;
+  call void @bar(%pair* byval %Data)
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/thiscall.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/thiscall.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; In PR41658, argpromotion put an inalloca in a position that per the
+; calling convention is passed in a register. This test verifies that
+; we don't do that anymore. It also verifies that the combination of
+; globalopt and argpromotion is able to optimize the call safely.
+;
+; RUN: opt -S -argpromotion -maxar=3 %s | FileCheck %s --check-prefix=ARGPROMOTION
+; RUN: opt -S -globalopt -argpromotion -maxar=3 %s | FileCheck %s --check-prefix=GLOBALOPT_ARGPROMOTION
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc19.11.0"
+
+%struct.a = type { i8 }
+
+define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun
+; ARGPROMOTION-SAME: (%struct.a* [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca [[TMP0:%.*]])
+; ARGPROMOTION-NEXT:  entry:
+; ARGPROMOTION-NEXT:    [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4
+; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]])
+; ARGPROMOTION-NEXT:    call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; ARGPROMOTION-NEXT:    ret void
+;
+; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun
+; GLOBALOPT_ARGPROMOTION-SAME: (<{ [[STRUCT_A:%.*]] }>* [[TMP0:%.*]]) unnamed_addr
+; GLOBALOPT_ARGPROMOTION-NEXT:  entry:
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    ret void
+;
+entry:
+  %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
+  %argmem = alloca inalloca <{ %struct.a }>, align 4
+  %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0
+  %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a)
+  call void @ext(<{ %struct.a }>* inalloca %argmem)
+  ret void
+}
+
+; This is here to ensure @internalfun is live.
+define void @exportedfun(%struct.a* %a) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun
+; ARGPROMOTION-SAME: (%struct.a* [[A:%.*]])
+; ARGPROMOTION-NEXT:    [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave()
+; ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4
+; ARGPROMOTION-NEXT:    call x86_thiscallcc void @internalfun(%struct.a* [[A]], <{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; ARGPROMOTION-NEXT:    call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]])
+; ARGPROMOTION-NEXT:    ret void
+;
+; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun
+; GLOBALOPT_ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) local_unnamed_addr
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave()
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4
+; GLOBALOPT_ARGPROMOTION-NEXT:    call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    ret void
+;
+  %inalloca.save = tail call i8* @llvm.stacksave()
+  %argmem = alloca inalloca <{ %struct.a }>, align 4
+  call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem)
+  call void @llvm.stackrestore(i8* %inalloca.save)
+  ret void
+}
+
+declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1))
+declare void @ext(<{ %struct.a }>* inalloca)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/variadic.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/variadic.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s
+
+; Unused arguments from variadic functions cannot be eliminated as that changes
+; their classiciation according to the SysV amd64 ABI. Clang and other frontends
+; bake in the classification when they use things like byval, as in this test.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.tt0 = type { i64, i64 }
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+; CHECK-LABEL: define {{[^@]+}}@main
+; CHECK-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+  ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) {
+; CHECK-LABEL: define {{[^@]+}}@callee_t0f
+; CHECK-SAME: (i8* nocapture readnone [[TP13:%.*]], i8* nocapture readnone [[TP14:%.*]], i8* nocapture readnone [[TP15:%.*]], i8* nocapture readnone [[TP16:%.*]], i8* nocapture readnone [[TP17:%.*]], ...)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
+entry:
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-02-01-ReturnAttrs.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-02-01-ReturnAttrs.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+
+define internal i32 @deref(i32* %x) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@deref
+; CHECK-SAME: (i32 [[X_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL]]
+;
+entry:
+  %tmp2 = load i32, i32* %x, align 4
+  ret i32 %tmp2
+}
+
+define i32 @f(i32 %x) {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
+; CHECK-NEXT:    [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %x_addr = alloca i32
+  store i32 %x, i32* %x_addr, align 4
+  %tmp1 = call i32 @deref( i32* %x_addr ) nounwind
+  ret i32 %tmp1
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-07-02-array-indexing.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-07-02-array-indexing.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; PR2498
+
+; This test tries to convince CHECK about promoting the load from %A + 2,
+; because there is a load of %A in the entry block
+define internal i32 @callee(i1 %C, i32* %A) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32* [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_0:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 [[A_0]]
+; CHECK:       F:
+; CHECK-NEXT:    [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[A_2]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  ; Unconditonally load the element at %A
+  %A.0 = load i32, i32* %A
+  br i1 %C, label %T, label %F
+
+T:
+  ret i32 %A.0
+
+F:
+  ; Load the element at offset two from %A. This should not be promoted!
+  %A.2 = getelementptr i32, i32* %A, i32 2
+  %R = load i32, i32* %A.2
+  ret i32 %R
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32* null)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %X = call i32 @callee(i1 false, i32* null)             ; <i32> [#uses=1]
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-07-CGUpdate.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-07-CGUpdate.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -inline -argpromotion -maxar=2147483647 -disable-output
+
+define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind {
+entry:
+  unreachable
+}
+
+define void @encode(i32* %m, i32* %ts, i32* %new) nounwind {
+entry:
+  %0 = call fastcc i32 @hash( i32* %ts, i32 0 ) nounwind		; <i32> [#uses=0]
+  unreachable
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-08-CGUpdateSelfEdge.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-08-CGUpdateSelfEdge.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -disable-output
+
+define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind {
+entry:
+  br i1 false, label %bb, label %bb5
+
+bb:		; preds = %entry
+  %0 = call fastcc i32 @term_SharingList( i32* null, i32* %List ) nounwind		; <i32> [#uses=0]
+  unreachable
+
+bb5:		; preds = %entry
+  ret i32 0
+}
+
+define i32 @term_Sharing(i32* %Term) nounwind {
+entry:
+  br i1 false, label %bb.i, label %bb14
+
+bb.i:		; preds = %entry
+  %0 = call fastcc i32 @term_SharingList( i32* null, i32* null ) nounwind		; <i32> [#uses=0]
+  ret i32 1
+
+bb14:		; preds = %entry
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/aggregate-promote.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/aggregate-promote.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+@G = constant %T { i32 0, i32 0, i32 17, i32 25 }
+
+define internal i32 @test(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[P_0_2_VAL:%.*]], i32 [[P_0_3_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  ret i32 %v
+}
+
+define i32 @caller() {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_IDX:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 2
+; CHECK-NEXT:    [[G_IDX_VAL:%.*]] = load i32, i32* [[G_IDX]]
+; CHECK-NEXT:    [[G_IDX1:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 3
+; CHECK-NEXT:    [[G_IDX1_VAL:%.*]] = load i32, i32* [[G_IDX1]]
+; CHECK-NEXT:    [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %v = call i32 @test(%T* @G)
+  ret i32 %v
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attributes.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attributes.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=2147483647 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=2147483647 < %s | FileCheck %s
+; Test that we only promote arguments when the caller/callee have compatible
+; function attrubtes.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <4 x i64> [[TMP]], <4 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <4 x i64>, <4 x i64>* %arg1
+  store <4 x i64> %tmp, <4 x i64>* %arg
+  ret void
+}
+
+define void @no_promote(<4 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@no_promote
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @no_promote_avx2(<4 x i64>* [[TMP2]], <4 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <4 x i64>, align 32
+  %tmp2 = alloca <4 x i64>, align 32
+  %tmp3 = bitcast <4 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
+  %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
+  store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
+  ret void
+}
+
+define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@promote_avx2
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <4 x i64>, <4 x i64>* %arg1
+  store <4 x i64> %tmp, <4 x i64>* %arg
+  ret void
+}
+
+define void @promote(<4 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@promote
+; CHECK-SAME: (<4 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* [[TMP2]], <4 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <4 x i64>, align 32
+  %tmp2 = alloca <4 x i64>, align 32
+  %tmp3 = bitcast <4 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
+  %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
+  store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2
+
+attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attrs.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attrs.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+%struct.ss = type { i32, i64 }
+
+; Don't drop 'byval' on %X here.
+define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[X]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+
+  store i32 0, i32* %X
+  ret void
+}
+
+; Also make sure we don't drop the call zeroext attribute.
+define i32 @test(i32* %X) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32* [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]], i32 zeroext 0)
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+
+  call void @f( %struct.ss* byval %S, i32* byval %X, i32 zeroext 0)
+
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/basictest.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/basictest.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -basicaa -argpromotion -maxar=2147483647 -mem2reg -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define internal i32 @test(i32* %X, i32* %Y) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define internal i32 @caller(i32* %B) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i32 [[B_VAL1:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @test(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+define i32 @callercaller() {
+; CHECK-LABEL: define {{[^@]+}}@callercaller()
+; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32 2)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %B = alloca i32
+  store i32 2, i32* %B
+  %X = call i32 @caller(i32* %B)
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval-2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval-2.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+; Arg promotion eliminates the struct argument.
+; FIXME: We should eliminate the i32* argument.
+
+%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[X]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+
+  store i32 0, i32* %X
+  ret void
+}
+
+define i32 @test(i32* %X) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32* [[X:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+  call void @f( %struct.ss* byval %S, i32* byval %X)
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
+}
+
+
+define internal void @g(%struct.ss* byval align 32 %b) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
+}
+
+
+define i32 @main() nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@main()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
+; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]])
+; CHECK-NEXT:    [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT:    [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4
+; CHECK-NEXT:    [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4
+; CHECK-NEXT:    call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca %struct.ss
+  %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  store i32 1, i32* %tmp1, align 8
+  %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i64 2, i64* %tmp4, align 4
+  call void @f(%struct.ss* byval %S) nounwind
+  call void @g(%struct.ss* byval %S) nounwind
+  ret i32 0
+}
+
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/chained.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/chained.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/chained.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+@G1 = constant i32 0
+@G2 = constant i32* @G1
+
+define internal i32 @test(i32** %x) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL_VAL]]
+;
+entry:
+  %y = load i32*, i32** %x
+  %z = load i32, i32* %y
+  ret i32 %z
+}
+
+define i32 @caller() {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G2_VAL:%.*]] = load i32*, i32** @G2
+; CHECK-NEXT:    [[G2_VAL_VAL:%.*]] = load i32, i32* [[G2_VAL]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  %x = call i32 @test(i32** @G2)
+  ret i32 %x
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+; Don't promote around control flow.
+define internal i32 @callee(i1 %C, i32* %P) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32* [[P:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 17
+; CHECK:       F:
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  br i1 %C, label %T, label %F
+
+T:
+  ret i32 17
+
+F:
+  %X = load i32, i32* %P
+  ret i32 %X
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 true, i32* null)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+entry:
+  %X = call i32 @callee(i1 true, i32* null)
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow2.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow2.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define internal i32 @callee(i1 %C, i32* %P) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]])
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 17
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 [[P_VAL]]
+;
+  br i1 %C, label %T, label %F
+
+T:              ; preds = %0
+  ret i32 17
+
+F:              ; preds = %0
+  %X = load i32, i32* %P               ; <i32> [#uses=1]
+  ret i32 %X
+}
+
+define i32 @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 17, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %A = alloca i32         ; <i32*> [#uses=2]
+  store i32 17, i32* %A
+  %X = call i32 @callee( i1 false, i32* %A )              ; <i32> [#uses=1]
+  ret i32 %X
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/crash.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/crash.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/crash.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S < %s -inline -argpromotion -maxar=2147483647 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_OLDPM
+; RUN: opt -S < %s -passes=inline,argpromotion -maxar=2147483647 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_NEWPM
+
+%S = type { %S* }
+
+; Inlining should nuke the invoke (and any inlined calls) here even with
+; argument promotion running along with it.
+define void @zot() personality i32 (...)* @wibble {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    unreachable
+; ARGPROMOTION:       hoge.exit:
+; ARGPROMOTION-NEXT:    br label [[BB1:%.*]]
+; ARGPROMOTION:       bb1:
+; ARGPROMOTION-NEXT:    unreachable
+; ARGPROMOTION:       bb2:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = landingpad { i8*, i32 }
+; ARGPROMOTION-NEXT:    cleanup
+; ARGPROMOTION-NEXT:    unreachable
+;
+bb:
+  invoke void @hoge()
+  to label %bb1 unwind label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  %tmp = landingpad { i8*, i32 }
+  cleanup
+  unreachable
+}
+
+define internal void @hoge() {
+bb:
+  %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs)
+  %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney)
+  unreachable
+}
+
+define internal fastcc i8* @spam(i1 (i8*)* %arg) {
+bb:
+  unreachable
+}
+
+define internal i1 @eggs(i8* %arg) {
+; ALL_NEWPM-LABEL: define {{[^@]+}}@eggs()
+; ALL_NEWPM-NEXT:  bb:
+; ALL_NEWPM-NEXT:    unreachable
+;
+bb:
+  %tmp = call zeroext i1 @barney(i8* %arg)
+  unreachable
+}
+
+define internal i1 @barney(i8* %arg) {
+bb:
+  ret i1 undef
+}
+
+define i32 @test_inf_promote_caller(i32 %arg) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_caller
+; ARGPROMOTION-SAME: (i32 [[ARG:%.*]])
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]]
+; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = alloca [[S]]
+; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]])
+; ARGPROMOTION-NEXT:    ret i32 0
+;
+bb:
+  %tmp = alloca %S
+  %tmp1 = alloca %S
+  %tmp2 = call i32 @test_inf_promote_callee(%S* %tmp, %S* %tmp1)
+
+  ret i32 0
+}
+
+define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_callee
+; ARGPROMOTION-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]])
+; ARGPROMOTION-NEXT:  bb:
+; ARGPROMOTION-NEXT:    [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = load %S*, %S** [[TMP]]
+; ARGPROMOTION-NEXT:    [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[TMP4:%.*]] = load %S*, %S** [[TMP3]]
+; ARGPROMOTION-NEXT:    [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]])
+; ARGPROMOTION-NEXT:    ret i32 0
+;
+bb:
+  %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0
+  %tmp2 = load %S*, %S** %tmp
+  %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0
+  %tmp4 = load %S*, %S** %tmp3
+  %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2)
+
+  ret i32 0
+}
+
+declare i32 @wibble(...)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/dbg.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/dbg.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+declare void @sink(i32)
+
+define internal void @test(i32** %X) !dbg !2 {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) !dbg !3
+; CHECK-NEXT:    call void @sink(i32 [[X_VAL_VAL]])
+; CHECK-NEXT:    ret void
+;
+  %1 = load i32*, i32** %X, align 8
+  %2 = load i32, i32* %1, align 8
+  call void @sink(i32 %2)
+  ret void
+}
+
+%struct.pair = type { i32, i32 }
+
+define internal void @test_byval(%struct.pair* byval %P) {
+; CHECK-LABEL: define {{[^@]+}}@test_byval
+; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]])
+; CHECK-NEXT:    [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[P_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[P_1]], i32* [[DOT1]], align 4
+; CHECK-NEXT:    ret void
+;
+  ret void
+}
+
+define void @caller(i32** %Y, %struct.pair* %P) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i32** [[Y:%.*]], %struct.pair* [[P:%.*]])
+; CHECK-NEXT:    [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg !4
+; CHECK-NEXT:    [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg !4
+; CHECK-NEXT:    call void @test(i32 [[Y_VAL_VAL]]), !dbg !4
+; CHECK-NEXT:    [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg !5
+; CHECK-NEXT:    [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg !5
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5
+; CHECK-NEXT:    [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg !5
+; CHECK-NEXT:    call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg !5
+; CHECK-NEXT:    ret void
+;
+  call void @test(i32** %Y), !dbg !1
+
+  call void @test_byval(%struct.pair* %P), !dbg !6
+  ret void
+}
+
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!3}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !DILocation(line: 8, scope: !2)
+!2 = distinct !DISubprogram(name: "test", file: !5, line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, scopeLine: 3, scope: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !5)
+!5 = !DIFile(filename: "test.c", directory: "")
+!6 = !DILocation(line: 9, scope: !2)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/fp80.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/fp80.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.u = type { x86_fp80 }
+%struct.s = type { double, i16, i8, [5 x i8] }
+
+@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16
+
+%struct.Foo = type { i32, i64 }
+@a = internal global %struct.Foo { i32 1, i64 2 }, align 8
+
+define void @run() {
+; CHECK-LABEL: define {{[^@]+}}@run()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0
+; CHECK-NEXT:    [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
+  call i64 @CaptureAStruct(%struct.Foo* @a)
+  ret void
+}
+
+define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
+; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely
+; CHECK-SAME: (%union.u* byval align 16 [[ARG:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s*
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2
+; CHECK-NEXT:    [[RESULT:%.*]] = load i8, i8* [[GEP]]
+; CHECK-NEXT:    ret i8 [[RESULT]]
+;
+entry:
+  %bitcast = bitcast %union.u* %arg to %struct.s*
+  %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2
+  %result = load i8, i8* %gep
+  ret i8 %result
+}
+
+define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) {
+; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely
+; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]])
+; CHECK-NEXT:    [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0
+; CHECK-NEXT:    store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]]
+; CHECK-NEXT:    ret x86_fp80 [[FP80]]
+;
+  %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
+  %fp80 = load x86_fp80, x86_fp80* %gep
+  ret x86_fp80 %fp80
+}
+
+define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
+; CHECK-LABEL: define {{[^@]+}}@AccessPaddingOfStruct
+; CHECK-SAME: (%struct.Foo* byval [[A:%.*]])
+; CHECK-NEXT:    [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64*
+; CHECK-NEXT:    [[V:%.*]] = load i64, i64* [[P]]
+; CHECK-NEXT:    ret i64 [[V]]
+;
+  %p = bitcast %struct.Foo* %a to i64*
+  %v = load i64, i64* %p
+  ret i64 %v
+}
+
+define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
+; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct
+; CHECK-SAME: (%struct.Foo* byval [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ]
+; CHECK-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]]
+; CHECK-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  %a_ptr = alloca %struct.Foo*
+  br label %loop
+
+loop:
+  %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ]
+  %0   = phi %struct.Foo* [ %a, %entry ],   [ %0, %loop ]
+  store %struct.Foo* %phi, %struct.Foo** %a_ptr
+  %gep = getelementptr %struct.Foo, %struct.Foo* %a, i64 0
+  br label %loop
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/inalloca.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/inalloca.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt %s -globalopt -argpromotion -maxar=2147483647 -sroa -S | FileCheck %s
+; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -maxar=2147483647 -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.ss = type { i32, i32 }
+
+; Argpromote + sroa should change this to passing the two integers by value.
+define internal i32 @f(%struct.ss* inalloca  %s) {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[S_0_0_VAL]], [[S_0_1_VAL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
+  %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1
+  %a = load i32, i32* %f0, align 4
+  %b = load i32, i32* %f1, align 4
+  %r = add i32 %a, %b
+  ret i32 %r
+}
+
+define i32 @main() {
+; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+entry:
+  %S = alloca inalloca %struct.ss
+  %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+  %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+  store i32 1, i32* %f0, align 4
+  store i32 2, i32* %f1, align 4
+  %r = call i32 @f(%struct.ss* inalloca %S)
+  ret i32 %r
+}
+
+; Argpromote can't promote %a because of the icmp use.
+define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind  {
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: (%struct.ss* [[A:%.*]], %struct.ss* [[B:%.*]]) unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq %struct.ss* [[A]], [[B]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+entry:
+  %c = icmp eq %struct.ss* %a, %b
+  ret i1 %c
+}
+
+define i32 @test() {
+; CHECK-LABEL: define {{[^@]+}}@test() local_unnamed_addr
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %S = alloca inalloca %struct.ss
+  %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/invalidation.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/invalidation.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; Check that when argument promotion changes a function in some parent node of
+; the call graph, any analyses that happened to be cached for that function are
+; actually invalidated. We are using `demanded-bits` here because when printed
+; it will end up caching a value for every instruction, making it easy to
+; detect the instruction-level changes that will fail here. With improper
+; invalidation this will crash in the second printer as it tries to reuse
+; now-invalid demanded bits.
+;
+; RUN: opt < %s -passes='function(print<demanded-bits>),cgscc(argpromotion,function(print<demanded-bits>))' -maxar=2147483647 -S | FileCheck %s
+
+@G = constant i32 0
+
+define internal i32 @a(i32* %x) {
+; CHECK-LABEL: define {{[^@]+}}@a
+; CHECK-SAME: (i32 [[X_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 [[X_VAL]]
+;
+entry:
+  %v = load i32, i32* %x
+  ret i32 %v
+}
+
+define i32 @b() {
+; CHECK-LABEL: define {{[^@]+}}@b()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @G
+; CHECK-NEXT:    [[V:%.*]] = call i32 @a(i32 [[G_VAL]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %v = call i32 @a(i32* @G)
+  ret i32 %v
+}
+
+define i32 @c() {
+; CHECK-LABEL: define {{[^@]+}}@c()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @G
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @a(i32 [[G_VAL]])
+; CHECK-NEXT:    [[V2:%.*]] = call i32 @b()
+; CHECK-NEXT:    [[RESULT:%.*]] = add i32 [[V1]], [[V2]]
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+entry:
+  %v1 = call i32 @a(i32* @G)
+  %v2 = call i32 @b()
+  %result = add i32 %v1, %v2
+  ret i32 %result
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/lit.local.cfg b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/lit.local.cfg
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/min-legal-vector-width.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/min-legal-vector-width.ll
@@ -0,0 +1,387 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=2147483647 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=2147483647 < %s | FileCheck %s
+; Test that we only promote arguments when the caller/callee have compatible
+; function attrubtes.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should not promote
+define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should not promote
+define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]]
+; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 {
+; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 {
+; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; This should promote
+define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
+; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load <8 x i64>, <8 x i64>* %arg1
+  store <8 x i64> %tmp, <8 x i64>* %arg
+  ret void
+}
+
+define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 {
+; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = alloca <8 x i64>, align 32
+  %tmp2 = alloca <8 x i64>, align 32
+  %tmp3 = bitcast <8 x i64>* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
+  call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
+  %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
+  store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
+  ret void
+}
+
+; If the arguments are scalar, its ok to promote.
+define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %X, i32* %Y) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %B) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CHECK-SAME: (i32* [[B:%.*]])
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 1, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, i32* [[B]]
+; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+; If the arguments are scalar, its ok to promote.
+define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %X, i32* %Y) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %B) #2 {
+; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CHECK-SAME: (i32* [[B:%.*]])
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 1, i32* [[A]]
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, i32* [[B]]
+; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = alloca i32
+  store i32 1, i32* %A
+  %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %A, i32* %B)
+  ret i32 %C
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
+
+attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
+attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
+attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
+attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
+attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
+attributes #5 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/musttail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/musttail.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/musttail.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; PR36543
+
+; Don't promote arguments of musttail callee
+
+%T = type { i32, i32, i32, i32 }
+
+define internal i32 @test(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (%T* [[P:%.*]])
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3
+; CHECK-NEXT:    [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[A_GEP]]
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[B_GEP]]
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  ret i32 %v
+}
+
+define i32 @caller(%T* %p) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (%T* [[P:%.*]])
+; CHECK-NEXT:    [[V:%.*]] = musttail call i32 @test(%T* [[P]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %v = musttail call i32 @test(%T* %p)
+  ret i32 %v
+}
+
+; Don't promote arguments of musttail caller
+
+define i32 @foo(%T* %p, i32 %v) {
+; CHECK-LABEL: define {{[^@]+}}@foo
+; CHECK-SAME: (%T* [[P:%.*]], i32 [[V:%.*]])
+; CHECK-NEXT:    ret i32 0
+;
+  ret i32 0
+}
+
+define internal i32 @test2(%T* %p, i32 %p2) {
+; CHECK-LABEL: define {{[^@]+}}@test2
+; CHECK-SAME: (%T* [[P:%.*]], i32 [[P2:%.*]])
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3
+; CHECK-NEXT:    [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[A_GEP]]
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[B_GEP]]
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    [[CA:%.*]] = musttail call i32 @foo(%T* undef, i32 [[V]])
+; CHECK-NEXT:    ret i32 [[CA]]
+;
+  %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
+  %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
+  %a = load i32, i32* %a.gep
+  %b = load i32, i32* %b.gep
+  %v = add i32 %a, %b
+  %ca = musttail call i32 @foo(%T* undef, i32 %v)
+  ret i32 %ca
+}
+
+define i32 @caller2(%T* %g) {
+; CHECK-LABEL: define {{[^@]+}}@caller2
+; CHECK-SAME: (%T* [[G:%.*]])
+; CHECK-NEXT:    [[V:%.*]] = call i32 @test2(%T* [[G]], i32 0)
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %v = call i32 @test2(%T* %g, i32 0)
+  ret i32 %v
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/naked_functions.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/naked_functions.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+
+; Don't promote paramaters of/arguments to naked functions
+
+@g = common global i32 0, align 4
+
+define i32 @bar() {
+; CHECK-LABEL: define {{[^@]+}}@bar()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo(i32* @g)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+entry:
+  %call = call i32 @foo(i32* @g)
+  ret i32 %call
+}
+
+define internal i32 @foo(i32*) #0 {
+; CHECK-LABEL: define {{[^@]+}}@foo
+; CHECK-SAME: (i32* [[TMP0:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %retval = alloca i32, align 4
+  call void asm sideeffect "ldr r0, [r0] \0Abx lr        \0A", ""()
+  unreachable
+}
+
+
+attributes #0 = { naked }
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/nonzero-address-spaces.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/nonzero-address-spaces.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+
+; ArgumentPromotion should preserve the default function address space
+; from the data layout.
+
+target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
+
+@g = common global i32 0, align 4
+
+define i32 @bar() {
+; CHECK-LABEL: define {{[^@]+}}@bar() addrspace(1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call addrspace(1) i32 @foo()
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+
+entry:
+  %call = call i32 @foo(i32* @g)
+  ret i32 %call
+}
+
+define internal i32 @foo(i32*) {
+; CHECK-LABEL: define {{[^@]+}}@foo() addrspace(1)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %retval = alloca i32, align 4
+  call void asm sideeffect "ldr r0, [r0] \0Abx lr        \0A", ""()
+  unreachable
+}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr27568.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr27568.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -argpromotion -maxar=2147483647 < %s | FileCheck %s
+; RUN: opt -S -passes=argpromotion -maxar=2147483647 < %s | FileCheck %s
+; RUN: opt -S -debugify -maxar=2147483647 -o /dev/null < %s
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @callee(i8*) {
+; CHECK-LABEL: define {{[^@]+}}@callee()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @thunk()
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @thunk()
+  ret void
+}
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @thunk()
+; CHECK-NEXT:    to label [[OUT:%.*]] unwind label [[CPAD:%.*]]
+; CHECK:       out:
+; CHECK-NEXT:    ret void
+; CHECK:       cpad:
+; CHECK-NEXT:    [[PAD:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @callee() [ "funclet"(token [[PAD]]) ]
+; CHECK-NEXT:    cleanupret from [[PAD]] unwind to caller
+;
+entry:
+  invoke void @thunk()
+  to label %out unwind label %cpad
+
+out:
+  ret void
+
+cpad:
+  %pad = cleanuppad within none []
+  call void @callee(i8* null) [ "funclet"(token %pad) ]
+  cleanupret from %pad unwind to caller
+}
+
+
+declare void @thunk()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr32917.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr32917.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; PR 32917
+
+@b = common local_unnamed_addr global i32 0, align 4
+@a = common local_unnamed_addr global i32 0, align 4
+
+define i32 @fn2() local_unnamed_addr {
+; CHECK-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[DOTIDX:%.*]] = getelementptr i32, i32* [[TMP3]], i64 -1
+; CHECK-NEXT:    [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4
+; CHECK-NEXT:    call fastcc void @fn1(i32 [[DOTIDX_VAL]])
+; CHECK-NEXT:    ret i32 undef
+;
+  %1 = load i32, i32* @b, align 4
+  %2 = sext i32 %1 to i64
+  %3 = inttoptr i64 %2 to i32*
+  call fastcc void @fn1(i32* %3)
+  ret i32 undef
+}
+
+define internal fastcc void @fn1(i32* nocapture readonly) unnamed_addr {
+; CHECK-LABEL: define {{[^@]+}}@fn1
+; CHECK-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr
+; CHECK-NEXT:    store i32 [[DOT18446744073709551615_VAL]], i32* @a, align 4
+; CHECK-NEXT:    ret void
+;
+  %2 = getelementptr inbounds i32, i32* %0, i64 -1
+  %3 = load i32, i32* %2, align 4
+  store i32 %3, i32* @a, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr33641_remove_arg_dbgvalue.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr33641_remove_arg_dbgvalue.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -argpromotion -maxar=2147483647 -verify -dse -S %s -o - | FileCheck %s
+
+; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to
+; dbg.value which still used the removed argument.
+
+; The %p argument should be removed, and the use of it in dbg.value should be
+; changed to undef.
+
+%p_t = type i16*
+%fun_t = type void (%p_t)*
+
+define void @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo()
+; CHECK-NEXT:    ret void
+;
+  %tmp = alloca %fun_t
+  store %fun_t @bar, %fun_t* %tmp
+  ret void
+}
+
+define internal void @bar(%p_t %p)  {
+; CHECK-LABEL: define {{[^@]+}}@bar()
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i16* undef, metadata !3, metadata !DIExpression()), !dbg !5
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.dbg.value(metadata %p_t %p, metadata !4, metadata !5), !dbg !6
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "bar", unit: !0)
+!4 = !DILocalVariable(name: "p", scope: !3)
+!5 = !DIExpression()
+!6 = !DILocation(line: 1, column: 1, scope: !3)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/profile.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/profile.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/profile.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -argpromotion -maxar=2147483647 -mem2reg -S < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Checks if !prof metadata is corret in deadargelim.
+
+define void @caller() #0 {
+; CHECK-LABEL: define {{[^@]+}}@caller()
+; CHECK-NEXT:    call void @promote_i32_ptr(i32 42), !prof !0
+; CHECK-NEXT:    ret void
+;
+  %x = alloca i32
+  store i32 42, i32* %x
+  call void @promote_i32_ptr(i32* %x), !prof !0
+  ret void
+}
+
+define internal void @promote_i32_ptr(i32* %xp) {
+; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr
+; CHECK-SAME: (i32 [[XP_VAL:%.*]])
+; CHECK-NEXT:    call void @use_i32(i32 [[XP_VAL]])
+; CHECK-NEXT:    ret void
+;
+  %x = load i32, i32* %xp
+  call void @use_i32(i32 %x)
+  ret void
+}
+
+declare void @use_i32(i32)
+
+!0 = !{!"branch_weights", i32 30}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/reserve-tbaa.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/reserve-tbaa.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+; PR17906
+; When we promote two arguments in a single function with different types,
+; before the fix, we used the same tag for the newly-created two loads.
+; This testing case makes sure that we correctly transfer the tbaa tags from the
+; original loads to the newly-created loads when promoting pointer arguments.
+
+@a = global i32* null, align 8
+@e = global i32** @a, align 8
+@g = global i32 0, align 4
+@c = global i64 0, align 8
+@d = global i8 0, align 1
+
+define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
+; CHECK-LABEL: define {{[^@]+}}@fn
+; CHECK-SAME: (i32 [[P1_VAL:%.*]], i64 [[P2_VAL:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[P2_VAL]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[P1_VAL]] to i8
+; CHECK-NEXT:    store i8 [[CONV1]], i8* @d, align 1, !tbaa !0
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i64, i64* %p2, align 8, !tbaa !1
+  %conv = trunc i64 %0 to i32
+  %1 = load i32, i32* %p1, align 4, !tbaa !5
+  %conv1 = trunc i32 %1 to i8
+  store i8 %conv1, i8* @d, align 1, !tbaa !7
+  ret void
+}
+
+define i32 @main() {
+; CHECK-LABEL: define {{[^@]+}}@main()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !3
+; CHECK-NEXT:    store i32* @g, i32** [[TMP0]], align 8, !tbaa !3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !3
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 4, !tbaa !5
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, i32* @g, align 4, !tbaa !5
+; CHECK-NEXT:    [[C_VAL:%.*]] = load i64, i64* @c, align 8, !tbaa !7
+; CHECK-NEXT:    call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]])
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %0 = load i32**, i32*** @e, align 8, !tbaa !8
+  store i32* @g, i32** %0, align 8, !tbaa !8
+  %1 = load i32*, i32** @a, align 8, !tbaa !8
+  store i32 1, i32* %1, align 4, !tbaa !5
+  call fastcc void @fn(i32* @g, i64* @c)
+
+  ret i32 0
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"long", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
+!7 = !{!3, !3, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"any pointer", !3, i64 0}
+
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/sret.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/sret.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/sret.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @add({i32, i32}* %this, i32* sret %r) {
+; CHECK-LABEL: define {{[^@]+}}@add
+; CHECK-SAME: (i32 [[THIS_0_0_VAL:%.*]], i32 [[THIS_0_1_VAL:%.*]], i32* noalias [[R:%.*]])
+; CHECK-NEXT:    [[AB:%.*]] = add i32 [[THIS_0_0_VAL]], [[THIS_0_1_VAL]]
+; CHECK-NEXT:    store i32 [[AB]], i32* [[R]]
+; CHECK-NEXT:    ret void
+;
+  %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0
+  %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1
+  %a = load i32, i32* %ap
+  %b = load i32, i32* %bp
+  %ab = add i32 %a, %b
+  store i32 %ab, i32* %r
+  ret void
+}
+
+define void @f() {
+; CHECK-LABEL: define {{[^@]+}}@f()
+; CHECK-NEXT:    [[R:%.*]] = alloca i32
+; CHECK-NEXT:    [[PAIR:%.*]] = alloca { i32, i32 }
+; CHECK-NEXT:    [[PAIR_IDX:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 0
+; CHECK-NEXT:    [[PAIR_IDX_VAL:%.*]] = load i32, i32* [[PAIR_IDX]]
+; CHECK-NEXT:    [[PAIR_IDX1:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 1
+; CHECK-NEXT:    [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]]
+; CHECK-NEXT:    call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]])
+; CHECK-NEXT:    ret void
+;
+  %r = alloca i32
+  %pair = alloca {i32, i32}
+
+  call void @add({i32, i32}* %pair, i32* sret %r)
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/tail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/tail.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/tail.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt %s -argpromotion -maxar=2147483647 -S -o - | FileCheck %s
+; RUN: opt %s -passes=argpromotion -maxar=2147483647 -S -o - | FileCheck %s
+; PR14710
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%pair = type { i32, i32 }
+
+declare i8* @foo(%pair*)
+
+define internal void @bar(%pair* byval %Data) {
+; CHECK-LABEL: define {{[^@]+}}@bar
+; CHECK-SAME: (i32 [[DATA_0:%.*]], i32 [[DATA_1:%.*]])
+; CHECK-NEXT:    [[DATA:%.*]] = alloca [[PAIR:%.*]], align 8
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[DATA_0]], i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[DATA_1]], i32* [[DOT1]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]])
+; CHECK-NEXT:    ret void
+;
+  tail call i8* @foo(%pair* %Data)
+  ret void
+}
+
+define void @zed(%pair* byval %Data) {
+; CHECK-LABEL: define {{[^@]+}}@zed
+; CHECK-SAME: (%pair* byval [[DATA:%.*]])
+; CHECK-NEXT:    [[DATA_0:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 0
+; CHECK-NEXT:    [[DATA_0_VAL:%.*]] = load i32, i32* [[DATA_0]], align 4
+; CHECK-NEXT:    [[DATA_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
+; CHECK-NEXT:    [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]], align 4
+; CHECK-NEXT:    call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]])
+; CHECK-NEXT:    ret void
+;
+  call void @bar(%pair* byval %Data)
+  ret void
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/thiscall.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/thiscall.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; In PR41658, argpromotion put an inalloca in a position that per the
+; calling convention is passed in a register. This test verifies that
+; we don't do that anymore. It also verifies that the combination of
+; globalopt and argpromotion is able to optimize the call safely.
+;
+; RUN: opt -S -argpromotion -maxar=2147483647 %s | FileCheck %s --check-prefix=ARGPROMOTION
+; RUN: opt -S -globalopt -argpromotion -maxar=2147483647 %s | FileCheck %s --check-prefix=GLOBALOPT_ARGPROMOTION
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc19.11.0"
+
+%struct.a = type { i8 }
+
+define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun
+; ARGPROMOTION-SAME: (%struct.a* [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca [[TMP0:%.*]])
+; ARGPROMOTION-NEXT:  entry:
+; ARGPROMOTION-NEXT:    [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4
+; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]])
+; ARGPROMOTION-NEXT:    call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; ARGPROMOTION-NEXT:    ret void
+;
+; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun
+; GLOBALOPT_ARGPROMOTION-SAME: (<{ [[STRUCT_A:%.*]] }>* [[TMP0:%.*]]) unnamed_addr
+; GLOBALOPT_ARGPROMOTION-NEXT:  entry:
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    ret void
+;
+entry:
+  %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
+  %argmem = alloca inalloca <{ %struct.a }>, align 4
+  %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0
+  %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a)
+  call void @ext(<{ %struct.a }>* inalloca %argmem)
+  ret void
+}
+
+; This is here to ensure @internalfun is live.
+define void @exportedfun(%struct.a* %a) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun
+; ARGPROMOTION-SAME: (%struct.a* [[A:%.*]])
+; ARGPROMOTION-NEXT:    [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave()
+; ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4
+; ARGPROMOTION-NEXT:    call x86_thiscallcc void @internalfun(%struct.a* [[A]], <{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
+; ARGPROMOTION-NEXT:    call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]])
+; ARGPROMOTION-NEXT:    ret void
+;
+; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun
+; GLOBALOPT_ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) local_unnamed_addr
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave()
+; GLOBALOPT_ARGPROMOTION-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4
+; GLOBALOPT_ARGPROMOTION-NEXT:    call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]])
+; GLOBALOPT_ARGPROMOTION-NEXT:    ret void
+;
+  %inalloca.save = tail call i8* @llvm.stacksave()
+  %argmem = alloca inalloca <{ %struct.a }>, align 4
+  call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem)
+  call void @llvm.stackrestore(i8* %inalloca.save)
+  ret void
+}
+
+declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1))
+declare void @ext(<{ %struct.a }>* inalloca)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/variadic.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/variadic.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s
+; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s
+
+; Unused arguments from variadic functions cannot be eliminated as that changes
+; their classiciation according to the SysV amd64 ABI. Clang and other frontends
+; bake in the classification when they use things like byval, as in this test.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.tt0 = type { i64, i64 }
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+; CHECK-LABEL: define {{[^@]+}}@main
+; CHECK-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+  ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) {
+; CHECK-LABEL: define {{[^@]+}}@callee_t0f
+; CHECK-SAME: (i8* nocapture readnone [[TP13:%.*]], i8* nocapture readnone [[TP14:%.*]], i8* nocapture readnone [[TP15:%.*]], i8* nocapture readnone [[TP16:%.*]], i8* nocapture readnone [[TP17:%.*]], ...)
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
+entry:
+  ret void
+}