diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
--- a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -1,9 +1,10 @@
 ; RUN: opt < %s -argpromotion -S | FileCheck %s
 
-; CHECK: define internal i32 @deref(i32 %x.val) #0 {
+; CHECK: define internal i32 @deref(i32 %[[X:[a-zA-Z._0-9]*]])
 define internal i32 @deref(i32* %x) nounwind {
 entry:
   %tmp2 = load i32, i32* %x, align 4
+; CHECK: ret i32 %[[X]]
   ret i32 %tmp2
 }
 
@@ -11,9 +12,9 @@
 entry:
   %x_addr = alloca i32
   store i32 %x, i32* %x_addr, align 4
-; CHECK: %tmp1 = call i32 @deref(i32 %x_addr.val) [[NUW:#[0-9]+]]
+; CHECK: %[[XVal:[a-zA-Z._0-9]*]] = load i32, i32* %x_addr, align 4
+; CHECK: %tmp1 = call i32 @deref(i32 %[[XVal]])
   %tmp1 = call i32 @deref( i32* %x_addr ) nounwind
+; CHECK: ret i32 %tmp1
   ret i32 %tmp1
 }
-
-; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
--- a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
@@ -22,9 +22,10 @@
 }
 
 define i32 @foo() {
+        %A = alloca i32, i32 3
 ; CHECK-LABEL: define i32 @foo
-        %X = call i32 @callee(i1 false, i32* null)             ; <i32> [#uses=1]
-; CHECK: call i32 @callee(i1 false, i32* null)
+        %X = call i32 @callee(i1 false, i32* %A)             ; <i32> [#uses=1]
+; CHECK: call i32 @callee(i1 false, i32* %A)
         ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
--- a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
@@ -18,14 +18,84 @@
 ; CHECK: ret i32
 }
 
+define internal i32 @bitcast1(%T* %p) {
+; FIXME: This should look like: define internal i32 @bitcast1(i32 %{{.*}})
+; CHECK-LABEL: define internal i32 @bitcast1(%T* %p)
+entry:
+; FIXME: The bitcast below is equivalent to the GEP
+;   %bc = getelementptr %T, %T* %p, i64 0, i32 0
+; but we currently fail to promote in the presence of bitcasts.
+  %bc = bitcast %T* %p to i32*
+  %v = load i32, i32* %bc
+; FIXME: This should be a CHECK-NOT!
+; CHECK: load
+  ret i32 %v
+; CHECK: ret i32
+}
+
+define internal i32 @bitcast2(%T* %p) {
+; FIXME: This should look like: define internal i32 @bitcast2(i32 %{{.*}})
+; CHECK-LABEL: define internal i32 @bitcast2(%T* %p)
+entry:
+; FIXME: The bitcast below is equivalent to the GEP
+;   %gp = getelementptr %T, %T* %p, i64 0, i32 2
+; but we currently fail to promote in the presence of bitcasts.
+  %bc = bitcast %T* %p to i32*
+  %gp = getelementptr i32, i32* %bc, i32 2
+  %v = load i32, i32* %gp
+; FIXME: This should be a CHECK-NOT!
+; CHECK: load
+  ret i32 %v
+; CHECK: ret i32
+}
+
+define internal i32 @bitcast3(%T* %p) {
+; FIXME: This should look like: define internal i32 @bitcast3(i32 %{{.*}})
+; CHECK-LABEL: define internal i32 @bitcast3(%T* %p)
+entry:
+; FIXME: The bitcast below is equivalent to the GEP
+;   %gp2 = getelementptr %T, %T* %p, i64 0, i32 3
+; but we currently fail to promote in the presence of bitcasts.
+  %gp1 = getelementptr %T, %T* %p, i64 0, i32 2
+  %bc1 = bitcast i32* %gp1 to i8*
+  %gp2 = getelementptr i8, i8* %bc1, i32 4
+  %bc2 = bitcast i8* %gp2 to i32*
+  %v = load i32, i32* %bc2
+; FIXME: This should be a CHECK-NOT!
+; CHECK: load
+  ret i32 %v
+; CHECK: ret i32
+}
+
 define i32 @caller() {
 ; CHECK-LABEL: define i32 @caller(
 entry:
-  %v = call i32 @test(%T* @G)
-; CHECK: %[[B_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 2
-; CHECK: %[[B:.*]] = load i32, i32* %[[B_GEP]]
-; CHECK: %[[A_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 3
-; CHECK: %[[A:.*]] = load i32, i32* %[[A_GEP]]
+  %v1 = call i32 @test(%T* @G)
+  %v2 = call i32 @bitcast1(%T* @G)
+  %v3 = call i32 @bitcast2(%T* @G)
+  %v4 = call i32 @bitcast3(%T* @G)
+; CHECK-DAG: %[[B_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 2
+; CHECK-DAG: %[[B:.*]] = load i32, i32* %[[B_GEP]]
+; CHECK-DAG: %[[A_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 3
+; CHECK-DAG: %[[A:.*]] = load i32, i32* %[[A_GEP]]
 ; CHECK: call i32 @test(i32 %[[B]], i32 %[[A]])
-  ret i32 %v
+; FIXME: This should look like: 
+;   %[[BC1_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 0
+;   %[[BC1_V:.*]] = load i32, i32* %[[BC1_GEP]]
+;   call i32 @bitcast1(i32 %[[BC1_V]])
+; CHECK: call i32 @bitcast1(%T* @G)
+; FIXME: This should look like: 
+;   %[[BC2_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 2
+;   %[[BC2_V:.*]] = load i32, i32* %[[BC2_GEP]]
+;   call i32 @bitcast2(i32 %[[BC2_V]])
+; CHECK: call i32 @bitcast2(%T* @G)
+; FIXME: This should look like: 
+;   %[[BC3_GEP:.*]] = getelementptr %T, %T* @G, i64 0, i32 3
+;   %[[BC3_V:.*]] = load i32, i32* %[[BC3_GEP]]
+;   call i32 @bitcast3(i32 %[[BC3_V]])
+; CHECK: call i32 @bitcast3(%T* @G)
+  %add1 = add i32 %v1, %v2
+  %add2 = add i32 %v3, %v4
+  %mul = mul i32 %add1, %add2
+  ret i32 %mul
 }
diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
--- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
@@ -3,7 +3,8 @@
 
 %struct.ss = type { i32, i64 }
 
-; Don't drop 'byval' on %X here.
+; FIXME: We should promote 'byval %X' here if we promote it.
+; PR42852
 define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
 ; CHECK-LABEL: define internal void @f(
 ; CHECK: i32 %[[B0:.*]], i64 %[[B1:.*]], i32* byval %X, i32 %i)
@@ -28,7 +29,7 @@
   ret void
 }
 
-; Also make sure we don't drop the call zeroext attribute.
+; Make sure we don't drop the call zeroext attribute.
 define i32 @test(i32* %X) {
 ; CHECK-LABEL: define i32 @test(
 entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
--- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -2,7 +2,8 @@
 ; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
 
 ; Arg promotion eliminates the struct argument.
-; FIXME: Should it eliminate the i32* argument?
+; FIXME: We should eliminate the i32* byval argument.
+; PR42852
 
 %struct.ss = type { i32, i64 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll
--- a/llvm/test/Transforms/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll
@@ -7,6 +7,10 @@
 
 define internal void @f(%struct.ss* byval  %b) nounwind  {
 entry:
+; CHECK: define internal void @f(i32 %[[B0:[a-zA-Z0-9._-]*]], i64 %[[B1:[a-zA-Z0-9._-]*]])
+; CHECK: alloca %struct.ss{{$}}
+; CHECK: store i32 %[[B0]]
+; CHECK: store i64 %[[B1]]
   %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
   %tmp1 = load i32, i32* %tmp, align 4
   %tmp2 = add i32 %tmp1, 1
@@ -14,13 +18,12 @@
   ret void
 }
 
-; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1)
-; CHECK: alloca %struct.ss{{$}}
-; CHECK: store i32 %b.0
-; CHECK: store i64 %b.1
-
 define internal void @g(%struct.ss* byval align 32 %b) nounwind {
 entry:
+; CHECK: define internal void @g(i32 %[[B0:[a-zA-Z0-9._-]*]], i64 %[[B1:[a-zA-Z0-9._-]*]])
+; CHECK: alloca %struct.ss, align 32
+; CHECK: store i32 %[[B0]]
+; CHECK: store i64 %[[B1]]
   %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
   %tmp1 = load i32, i32* %tmp, align 4
   %tmp2 = add i32 %tmp1, 1
@@ -28,23 +31,35 @@
   ret void
 }
 
-; CHECK-LABEL: define internal void @g(i32 %b.0, i64 %b.1)
-; CHECK: alloca %struct.ss, align 32
-; CHECK: store i32 %b.0
-; CHECK: store i64 %b.1
+define internal void @h([2 x i32]* byval %b) nounwind {
+entry:
+; Even if we do not access the first element we can promote this array.
+  %tmp = getelementptr [2 x i32], [2 x i32]* %b, i32 0, i32 1
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = add i32 %tmp1, 1
+  store i32 %tmp2, i32* %tmp, align 4
+  ret void
+}
 
 define i32 @main() nounwind  {
 entry:
+; CHECK-LABEL: define i32 @main
+  %A = alloca [2 x i32]
   %S = alloca %struct.ss
   %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
   store i32 1, i32* %tmp1, align 8
   %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
   store i64 2, i64* %tmp4, align 4
+
   call void @f(%struct.ss* byval %S) nounwind
+; CHECK: call void @f(i32 %{{.*}}, i64 %{{.*}})
+
   call void @g(%struct.ss* byval %S) nounwind
+; CHECK: call void @g(i32 %{{.*}}, i64 %{{.*}})
+
+; Verify we unpack the byval array.
+; FIXME: this sould be: call void @h(i32 %{{[a-zA-Z._0-9]*}}, i32 %{{[a-zA-Z._0-9]*}})
+; CHECK: call void @h([2 x i32]* byval %A)
+  call void @h([2 x i32]* byval %A) nounwind
   ret i32 0
 }
-
-; CHECK-LABEL: define i32 @main
-; CHECK: call void @f(i32 %{{.*}}, i64 %{{.*}})
-; CHECK: call void @g(i32 %{{.*}}, i64 %{{.*}})
diff --git a/llvm/test/Transforms/ArgumentPromotion/crash.ll b/llvm/test/Transforms/ArgumentPromotion/crash.ll
--- a/llvm/test/Transforms/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/crash.ll
@@ -70,4 +70,39 @@
   ret i32 0
 }
 
+define i32 @test_inf2_promote_caller(i32 %arg) {
+; CHECK-LABEL: define i32 @test_inf2_promote_caller(
+bb:
+  %tmp = alloca %S
+  %tmp1 = alloca %S
+  %tmp2 = call i32 @test_inf2_promote_callee(%S* %tmp, %S* %tmp1)
+; CHECK: call i32 @test_inf2_promote_callee(%S* %{{.*}}, %S* %{{.*}})
+
+  ret i32 0
+}
+
+; Recursion but not self-recursion
+define internal i32 @test_inf2_promote_passthrough(%S* %arg, %S* %arg1) noinline {
+  %tmp0 = call i32 @test_inf2_promote_callee(%S* %arg, %S* %arg1)
+  ret i32 0
+}
+
+define internal i32 @test_inf2_promote_callee(%S* %arg, %S* %arg1) noinline {
+; CHECK-LABEL: define internal i32 @test_inf2_promote_callee(
+; CHECK: %S* %{{.*}}, %S* %{{.*}})
+bb:
+  %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0
+  %tmp2 = load %S*, %S** %tmp
+  %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0
+  %tmp4 = load %S*, %S** %tmp3
+; FIXME: If we replace the %tmp5 call with the line below, the test will loop
+;        indefinitily or crash as argument promotion will continue to promote
+;        the arguments. Only direct recursion is currently detected.
+; PR42683
+; %tmp5 = call i32 @test_inf2_promote_passthrough(%S* %tmp4, %S* %tmp2)
+  %tmp5 = call i32 @test_inf2_promote_callee(%S* %tmp4, %S* %tmp2)
+; CHECK: call i32 @test_inf2_promote_callee(%S* %{{.*}}, %S* %{{.*}})
+  ret i32 0
+}
+
 declare i32 @wibble(...)
diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll
--- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll
@@ -13,13 +13,24 @@
 
 %struct.pair = type { i32, i32 }
 
-; CHECK: define internal void @test_byval(i32 %{{.*}}, i32 %{{.*}})
-define internal void @test_byval(%struct.pair* byval %P) {
+; CHECK: define internal i32 @test_byval(i32 %{{.*}}, i32 %{{.*}})
+define internal i32 @test_byval(%struct.pair* byval %P) {
+  %g = getelementptr %struct.pair, %struct.pair* %P, i32 0, i32 0
+  %v = load i32, i32* %g, align 8
+  ret i32 %v
+}
+
+; Make sure unused byval arguments are not promoted but removed
+;
+; FIXME: This should be: define internal void @test_byval_2()
+; Related to PR42852
+; CHECK: define internal void @test_byval_2(i32 %{{.*}}, i32 %{{.*}})
+define internal void @test_byval_2(%struct.pair* byval %P) {
   ret void
 }
 
 ; CHECK-LABEL: define {{.*}} @caller(
-define void @caller(i32** %Y, %struct.pair* %P) {
+define i32 @caller(i32** %Y, %struct.pair* %P) {
 ; CHECK:  load i32*, {{.*}} !dbg [[LOC_1:![0-9]+]]
 ; CHECK-NEXT:  load i32, {{.*}} !dbg [[LOC_1]]
 ; CHECK-NEXT: call void @test(i32 %{{.*}}), !dbg [[LOC_1]]
@@ -29,9 +40,12 @@
 ; CHECK-NEXT: load i32, i32* {{.*}} !dbg [[LOC_2]]
 ; CHECK-NEXT: getelementptr %struct.pair, {{.*}} !dbg [[LOC_2]]
 ; CHECK-NEXT: load i32, i32* {{.*}} !dbg [[LOC_2]]
-; CHECK-NEXT: call void @test_byval(i32 %{{.*}}, i32 %{{.*}}), !dbg [[LOC_2]]
-  call void @test_byval(%struct.pair* %P), !dbg !6
-  ret void
+; CHECK-NEXT: call i32 @test_byval(i32 %{{.*}}, i32 %{{.*}}), !dbg [[LOC_2]]
+  %v = call i32 @test_byval(%struct.pair* %P), !dbg !6
+; FIXME: This should be: call void @test_byval_2(), !dbg [[LOC_2:![0-9]+]]
+; CHECK: call void @test_byval_2(i32 %{{.*}}, i32 %{{.*}}), !dbg [[LOC_2:![0-9]+]]
+  call void @test_byval_2(%struct.pair* %P), !dbg !6
+  ret i32 %v
 }
 
 ; CHECK: [[SP]] = distinct !DISubprogram(name: "test",
diff --git a/llvm/test/Transforms/ArgumentPromotion/recursion.ll b/llvm/test/Transforms/ArgumentPromotion/recursion.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/recursion.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -gvn-hoist -argpromotion %s | FileCheck %s
+;
+; We should promote %x here as it is only loaded and passed into recursive calls.
+; This should work after -gvn-hoist (to move the loads into the entry block) and after
+; we derive dereferenceable based on the loads in all paths. The latter is
+; under construction right now (in the Attributor framework).
+;
+; PR887
+
+; FIXME: This should be:  define internal i32 @foo(i32 %{{.*}}, i32 %n, i32 %m)
+; CHECK: define internal i32 @foo(i32* %x, i32 %n, i32 %m)
+define internal i32 @foo(i32* %x, i32 %n, i32 %m) {
+entry:
+  %tmp = icmp ne i32 %n, 0
+  br i1 %tmp, label %cond_true, label %cond_false
+
+cond_true:                                        ; preds = %entry
+  %tmp2 = load i32, i32* %x
+  br label %return
+
+cond_false:                                       ; preds = %entry
+  %tmp5 = load i32, i32* %x
+  %tmp7 = sub i32 %n, 1
+  %tmp9 = call i32 @foo(i32* %x, i32 %tmp7, i32 %tmp5)
+  %tmp11 = sub i32 %n, 2
+  %tmp14 = call i32 @foo(i32* %x, i32 %tmp11, i32 %m)
+  %tmp15 = add i32 %tmp9, %tmp14
+  br label %return
+
+return:                                           ; preds = %cond_next, %cond_false, %cond_true
+  %retval.0 = phi i32 [ %tmp2, %cond_true ], [ %tmp15, %cond_false ]
+  ret i32 %retval.0
+}
+
+define i32 @bar(i32* %x, i32 %n, i32 %m) {
+entry:
+
+; FIXME: This should be:
+;   %[[XVal:[a-zA-Z._0-9]*]] = load i32, i32* %x
+;   %tmp3 = call i32 @foo(i32 %[[XVal]], i32 %n, i32 %m)
+; CHECK:  %tmp3 = call i32 @foo(i32* %x, i32 %n, i32 %m)
+  %tmp3 = call i32 @foo(i32* %x, i32 %n, i32 %m)
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %tmp3
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/tail.ll b/llvm/test/Transforms/ArgumentPromotion/tail.ll
--- a/llvm/test/Transforms/ArgumentPromotion/tail.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/tail.ll
@@ -1,6 +1,9 @@
 ; RUN: opt %s -argpromotion -S -o - | FileCheck %s
 ; RUN: opt %s -passes=argpromotion -S -o - | FileCheck %s
-; PR14710
+; PR14710, and related problems (baz, biz, buz) where 'tail' needs to be
+; removed if we introduce allocas.
+
+; FIXME: If the function is norecurse 'tail' removal should not be necessary.
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -9,7 +12,7 @@
 declare i8* @foo(%pair*)
 
 define internal void @bar(%pair* byval %Data) {
-; CHECK: define internal void @bar(i32 %Data.0, i32 %Data.1)
+; CHECK: define internal void @bar(i32 %{{.*}}, i32 %{{.*}})
 ; CHECK: %Data = alloca %pair
 ; CHECK-NOT: tail
 ; CHECK: call i8* @foo(%pair* %Data)
@@ -17,7 +20,51 @@
   ret void
 }
 
+define internal void @baz(%pair* byval %Data) {
+; CHECK: define internal void @baz(i32 %{{.*}}, i32 %{{.*}})
+; CHECK: %Data = alloca %pair
+; CHECK: %Data2 = getelementptr %pair, %pair* %Data
+; FIXME: This is broken right now, it should be CHECK-NOT!
+; CHECK: tail
+; CHECK: call i8* @foo(%pair* %Data2)
+  %Data2 = getelementptr %pair, %pair* %Data
+  tail call i8* @foo(%pair* %Data2)
+  ret void
+}
+
+@a = global %pair* null, align 8
+declare void @unknown(%pair*)
+
+define internal void @biz(%pair* byval %Data) {
+; CHECK: define internal void @biz(i32 %{{.*}}, i32 %{{.*}})
+; CHECK: %Data = alloca %pair
+; CHECK: %Data2 = load %pair*, %pair** @a
+; FIXME: This is broken right now, it should be CHECK-NOT!
+; CHECK: tail
+; CHECK: call i8* @foo(%pair* %Data2)
+  call void @unknown(%pair* %Data)
+  %Data2 = load %pair*, %pair** @a
+  tail call i8* @foo(%pair* %Data2)
+  ret void
+}
+
+define internal void @buz(%pair* byval %Data) {
+; CHECK: define internal void @buz(i32 %{{.*}}, i32 %{{.*}})
+; CHECK: %Data = alloca %pair
+; CHECK: call i8* @foo(%pair* %Data)
+; FIXME: This is broken right now, it should be CHECK-NOT!
+; CHECK: tail
+; CHECK: call i8* @foo(%pair* %Data2)
+  %fr = call i8* @foo(%pair* %Data)
+  %Data2 = bitcast i8* %fr to %pair*
+  tail call i8* @foo(%pair* %Data2)
+  ret void
+}
+
 define void @zed(%pair* byval %Data) {
   call void @bar(%pair* byval %Data)
+  call void @baz(%pair* byval %Data)
+  call void @biz(%pair* byval %Data)
+  call void @buz(%pair* byval %Data)
   ret void
 }