Index: clang/test/CodeGen/aapcs-align.cpp
===================================================================
--- clang/test/CodeGen/aapcs-align.cpp
+++ clang/test/CodeGen/aapcs-align.cpp
@@ -95,8 +95,8 @@
   f4m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define void @g4
-// CHECK: call void @f4(i32 1, %struct.SF16* byval nonnull align 8
-// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* byval nonnull align 8
+// CHECK: call void @f4(i32 1, %struct.SF16* nonnull byval align 8
+// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* nonnull byval align 8
 // CHECK: declare void @f4(i32, %struct.SF16* byval align 8)
 // CHECK: declare void @f4m(i32, i32, i32, i32, i32, %struct.SF16* byval align 8)
 
Index: clang/test/CodeGenCXX/builtin-source-location.cpp
===================================================================
--- clang/test/CodeGenCXX/builtin-source-location.cpp
+++ clang/test/CodeGenCXX/builtin-source-location.cpp
@@ -104,7 +104,7 @@
 //
 // CHECK-CTOR-GLOBAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP_ONE:[^,]*]],
 // CHECK-CTOR-GLOBAL-SAME: i32 3400, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
-// CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{[^%]*}}%[[TMP_ONE]])
+// CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{.*}}%[[TMP_ONE]])
 #line 3400 "GlobalInitVal.cpp"
 TestInit GlobalInitVal;
 
@@ -119,7 +119,7 @@
 //
 // CHECK-CTOR-LOCAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP:[^,]*]],
 // CHECK-CTOR-LOCAL-SAME: i32 3500, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
-// CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{[^%]*}}%[[TMP]])
+// CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{.*}}%[[TMP]])
 #line 3500 "LocalInitVal.cpp"
   TestInit init_local;
   sink(init_local);
Index: clang/test/CodeGenCXX/wasm-args-returns.cpp
===================================================================
--- clang/test/CodeGenCXX/wasm-args-returns.cpp
+++ clang/test/CodeGenCXX/wasm-args-returns.cpp
@@ -30,12 +30,12 @@
   double d, e;
 };
 test(two_fields);
-// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* byval nocapture readonly align 8 %{{.*}})
+// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* nocapture readonly byval align 8 %{{.*}})
 //
 // CHECK: define void @_Z15test_two_fieldsv()
 // CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8
 // CHECK: call void @_Z14def_two_fieldsv(%struct.two_fields* nonnull sret %[[tmp]])
-// CHECK: call void @_Z3use10two_fields(%struct.two_fields* byval nonnull align 8 %[[tmp]])
+// CHECK: call void @_Z3use10two_fields(%struct.two_fields* nonnull byval align 8 %[[tmp]])
 // CHECK: ret void
 //
 // CHECK: declare void @_Z3use10two_fields(%struct.two_fields* byval align 8)
Index: clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
===================================================================
--- clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
+++ clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
@@ -20,7 +20,7 @@
 struct struct_with_mdp_too_much {
   char *a; char *b; char *c; char *d; test_struct_mdp e;
 };
-// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval {{.*}} %a)
+// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval
 void f_struct_with_mdp_too_much(struct_with_mdp_too_much a) {
   (void)a;
 }
Index: clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -309,7 +309,7 @@
 // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
 void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* nocapture byval align 4 %arg)
 void func_flexible_array_arg(flexible_array arg) { }
 
 // CHECK: define float @func_f32_ret()
@@ -450,11 +450,11 @@
 // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* nocapture byval align 4 %s)
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* nocapture byval align 8 %arg4)
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +469,7 @@
 // CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
 void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval align 8 %arg)
 void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
   *out = arg;
 }
@@ -487,7 +487,7 @@
 void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                      short4 arg4, short4 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
 void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                                short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
 
@@ -495,7 +495,7 @@
 void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                      short3 arg4, short3 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
 void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                           short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
 
@@ -505,7 +505,7 @@
                      short2 arg8, short2 arg9, short2 arg10, short2 arg11,
                      struct_4regs arg13) { }
 
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg13)
 void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                           short2 arg4, short2 arg5, short2 arg6, short2 arg7,
                           short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +515,7 @@
 void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                     char2 arg4, char2 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* nocapture byval  align 4 %arg7)
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
Index: clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
===================================================================
--- clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -27,7 +27,7 @@
 kernel void test_single(int_single input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_single
-// CHECK: struct.int_single* byval nocapture
+// CHECK: struct.int_single* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = input.a;
 }
@@ -35,7 +35,7 @@
 kernel void test_pair(int_pair input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_pair
-// CHECK: struct.int_pair* byval nocapture
+// CHECK: struct.int_pair* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
@@ -44,7 +44,7 @@
 kernel void test_kernel(test_struct input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_kernel
-// CHECK: struct.test_struct* byval nocapture
+// CHECK: struct.test_struct* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = input.elementA;
  output[1] = input.elementB;
Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -1014,7 +1014,7 @@
     opposed to memory, though some targets use it to distinguish between
     two different kinds of registers). Use of this attribute is
     target-specific.
-``byval``
+``byval`` or ``byval(<ty>)``
     This indicates that the pointer parameter should really be passed by
     value to the function. The attribute implies that a hidden copy of
     the pointee is made between the caller and the callee, so the callee
@@ -1026,6 +1026,9 @@
     ``byval`` parameters). This is not a valid attribute for return
     values.
 
+    The byval attribute also supports an optional type argument, which must be
+    the same as the pointee type of the argument.
+
     The byval attribute also supports specifying an alignment with the
     align attribute. It indicates the alignment of the stack slot to
     form and the known alignment of the pointer specified to the call
Index: llvm/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -188,6 +188,7 @@
     bool IsSwiftSelf : 1;
     bool IsSwiftError : 1;
     uint16_t Alignment = 0;
+    Type *ByValType = nullptr;
 
     ArgListEntry()
         : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
Index: llvm/include/llvm/IR/Argument.h
===================================================================
--- llvm/include/llvm/IR/Argument.h
+++ llvm/include/llvm/IR/Argument.h
@@ -78,6 +78,9 @@
   /// If this is a byval or inalloca argument, return its alignment.
   unsigned getParamAlignment() const;
 
+  /// If this is a byval argument, return its type.
+  Type *getParamByValType() const;
+
   /// Return true if this argument has the nest attribute.
   bool hasNestAttr() const;
 
Index: llvm/include/llvm/IR/Attributes.h
===================================================================
--- llvm/include/llvm/IR/Attributes.h
+++ llvm/include/llvm/IR/Attributes.h
@@ -90,6 +90,7 @@
   static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
   static Attribute get(LLVMContext &Context, StringRef Kind,
                        StringRef Val = StringRef());
+  static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
 
   /// Return a uniquified Attribute object that has the specific
   /// alignment set.
@@ -102,6 +103,7 @@
   static Attribute getWithAllocSizeArgs(LLVMContext &Context,
                                         unsigned ElemSizeArg,
                                         const Optional<unsigned> &NumElemsArg);
+  static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
 
   //===--------------------------------------------------------------------===//
   // Attribute Accessors
@@ -117,6 +119,9 @@
   /// attribute.
   bool isStringAttribute() const;
 
+  /// Return true if the attribute is a type attribute.
+  bool isTypeAttribute() const;
+
   /// Return true if the attribute is present.
   bool hasAttribute(AttrKind Val) const;
 
@@ -139,6 +144,10 @@
   /// attribute to be a string attribute.
   StringRef getValueAsString() const;
 
+  /// Return the attribute's value as a Type. This requires the attribute to be
+  /// a type attribute.
+  Type *getValueAsType() const;
+
   /// Returns the alignment field of an attribute as a byte alignment
   /// value.
   unsigned getAlignment() const;
@@ -279,6 +288,7 @@
   unsigned getStackAlignment() const;
   uint64_t getDereferenceableBytes() const;
   uint64_t getDereferenceableOrNullBytes() const;
+  Type *getByValType() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp = false) const;
 
@@ -598,6 +608,9 @@
   /// Return the alignment for the specified function parameter.
   unsigned getParamAlignment(unsigned ArgNo) const;
 
+  /// Return the byval type for the specified function parameter.
+  Type *getParamByValType(unsigned ArgNo) const;
+
   /// Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
 
@@ -697,6 +710,7 @@
   uint64_t DerefBytes = 0;
   uint64_t DerefOrNullBytes = 0;
   uint64_t AllocSizeArgs = 0;
+  Type *ByValType = nullptr;
 
 public:
   AttrBuilder() = default;
@@ -772,6 +786,9 @@
   /// dereferenceable_or_null attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
 
+  /// Retrieve the byval type.
+  Type *getByValType() const { return ByValType; }
+
   /// Retrieve the allocsize args, if the allocsize attribute exists.  If it
   /// doesn't exist, pair(0, 0) is returned.
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
@@ -796,6 +813,9 @@
   AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg);
 
+  /// This turns a byval type into the form used internally in Attribute.
+  AttrBuilder &addByValAttr(Type *Ty);
+
   /// Add an allocsize attribute, using the representation returned by
   /// Attribute.getIntValue().
   AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
Index: llvm/include/llvm/IR/CallSite.h
===================================================================
--- llvm/include/llvm/IR/CallSite.h
+++ llvm/include/llvm/IR/CallSite.h
@@ -415,6 +415,11 @@
     CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
   }
 
+  /// Extract the byval type for a call or parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
+  }
+
   /// Extract the number of dereferenceable bytes for a call or parameter
   /// (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
Index: llvm/include/llvm/IR/Function.h
===================================================================
--- llvm/include/llvm/IR/Function.h
+++ llvm/include/llvm/IR/Function.h
@@ -431,6 +431,11 @@
     return AttributeSets.getParamAlignment(ArgNo);
   }
 
+  /// Extract the byval type for a parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    return AttributeSets.getParamByValType(ArgNo);
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   /// @param i AttributeList index, referring to a return value or argument.
Index: llvm/include/llvm/IR/InstrTypes.h
===================================================================
--- llvm/include/llvm/IR/InstrTypes.h
+++ llvm/include/llvm/IR/InstrTypes.h
@@ -1551,6 +1551,11 @@
     return Attrs.getParamAlignment(ArgNo);
   }
 
+  /// Extract the byval type for a call or parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    return Attrs.getParamByValType(ArgNo);
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
Index: llvm/lib/AsmParser/LLParser.h
===================================================================
--- llvm/lib/AsmParser/LLParser.h
+++ llvm/lib/AsmParser/LLParser.h
@@ -339,6 +339,7 @@
     bool ParseFnAttributeValuePairs(AttrBuilder &B,
                                     std::vector<unsigned> &FwdRefAttrGrps,
                                     bool inAttrGrp, LocTy &BuiltinLoc);
+    bool ParseByValWithOptionalType(Type *&Result);
 
     // Module Summary Index Parsing.
     bool SkipModuleSummaryEntry();
Index: llvm/lib/AsmParser/LLParser.cpp
===================================================================
--- llvm/lib/AsmParser/LLParser.cpp
+++ llvm/lib/AsmParser/LLParser.cpp
@@ -1578,7 +1578,13 @@
       B.addAlignmentAttr(Alignment);
       continue;
     }
-    case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
+    case lltok::kw_byval: {
+      Type *Ty;
+      if (ParseByValWithOptionalType(Ty))
+        return true;
+      B.addByValAttr(Ty);
+      continue;
+    }
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
       if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
@@ -2431,6 +2437,22 @@
   return false;
 }
 
+/// ParseByValWithOptionalType
+///   ::= byval
+///   ::= byval(<ty>)
+bool LLParser::ParseByValWithOptionalType(Type *&Result) {
+  Result = nullptr;
+  if (!EatIfPresent(lltok::kw_byval))
+    return true;
+  if (!EatIfPresent(lltok::lparen))
+    return false;
+  if (ParseType(Result))
+    return true;
+  if (!EatIfPresent(lltok::rparen))
+    return Error(Lex.getLoc(), "expected ')'");
+  return false;
+}
+
 /// ParseOptionalOperandBundles
 ///    ::= /*empty*/
 ///    ::= '[' OperandBundle [, OperandBundle ]* ']'
Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -638,6 +638,10 @@
     return getFnValueByID(ValNo, Ty);
   }
 
+  /// Upgrades old-style typeless byval attributes by adding the corresponding
+  /// argument's pointee type.
+  void propagateByValTypes(CallBase *CB);
+
   /// Converts alignment exponent (i.e. power of two (or zero)) to the
   /// corresponding alignment to use. If alignment is too large, returns
   /// a corresponding error code.
@@ -1492,6 +1496,12 @@
           if (Error Err = parseAttrKind(Record[++i], &Kind))
             return Err;
 
+          // Upgrade old-style byval attribute to one with a type, even if it's
+          // nullptr. We will have to insert the real type when we associate
+          // this AttributeList with a function.
+          if (Kind == Attribute::ByVal)
+            B.addByValAttr(nullptr);
+
           B.addAttribute(Kind);
         } else if (Record[i] == 1) { // Integer attribute
           Attribute::AttrKind Kind;
@@ -1507,9 +1517,7 @@
             B.addDereferenceableOrNullAttr(Record[++i]);
           else if (Kind == Attribute::AllocSize)
             B.addAllocSizeAttrFromRawRepr(Record[++i]);
-        } else {                     // String attribute
-          assert((Record[i] == 3 || Record[i] == 4) &&
-                 "Invalid attribute group entry");
+        } else if (Record[i] == 3 || Record[i] == 4) { // String attribute
           bool HasValue = (Record[i++] == 4);
           SmallString<64> KindStr;
           SmallString<64> ValStr;
@@ -1527,6 +1535,15 @@
           }
 
           B.addAttribute(KindStr.str(), ValStr.str());
+        } else {
+          assert((Record[i] == 5 || Record[i] == 6) &&
+                 "Invalid attribute group entry");
+          bool HasType = Record[i] == 6;
+          Attribute::AttrKind Kind;
+          if (Error Err = parseAttrKind(Record[++i], &Kind))
+            return Err;
+          if (Kind == Attribute::ByVal)
+            B.addByValAttr(HasType ? getTypeByID(Record[++i]) : nullptr);
         }
       }
 
@@ -3021,6 +3038,17 @@
   Func->setLinkage(getDecodedLinkage(RawLinkage));
   Func->setAttributes(getAttributes(Record[4]));
 
+  // Upgrade any old-style byval without a type by propagating the argument's
+  // pointee type. There should be no opaque pointers where the byval type is
+  // implicit.
+  for (auto &Arg : Func->args()) {
+    if (Arg.hasByValAttr() && !Arg.getParamByValType()) {
+      Arg.removeAttr(Attribute::ByVal);
+      Arg.addAttr(Attribute::getWithByValType(
+          Context, Arg.getType()->getPointerElementType()));
+    }
+  }
+
   unsigned Alignment;
   if (Error Err = parseAlignmentValue(Record[5], Alignment))
     return Err;
@@ -3421,6 +3449,19 @@
   return Error::success();
 }
 
+void BitcodeReader::propagateByValTypes(CallBase *CB) {
+  for (unsigned i = 0; i < CB->getNumArgOperands(); ++i) {
+    if (CB->paramHasAttr(i, Attribute::ByVal) &&
+        !CB->getAttribute(i, Attribute::ByVal).getValueAsType()) {
+      CB->removeParamAttr(i, Attribute::ByVal);
+      CB->addParamAttr(
+          i, Attribute::getWithByValType(
+                 Context,
+                 CB->getArgOperand(i)->getType()->getPointerElementType()));
+    }
+  }
+}
+
 /// Lazily parse the specified function body block.
 Error BitcodeReader::parseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
@@ -4236,6 +4277,8 @@
       cast<InvokeInst>(I)->setCallingConv(
           static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
+      propagateByValTypes(cast<CallBase>(I));
+
       break;
     }
     case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
@@ -4711,6 +4754,7 @@
         TCK = CallInst::TCK_NoTail;
       cast<CallInst>(I)->setTailCallKind(TCK);
       cast<CallInst>(I)->setAttributes(PAL);
+      propagateByValTypes(cast<CallBase>(I));
       if (FMF.any()) {
         if (!isa<FPMathOperator>(I))
           return error("Fast-math-flags specified for call without "
Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -747,7 +747,7 @@
         Record.push_back(1);
         Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
         Record.push_back(Attr.getValueAsInt());
-      } else {
+      } else if (Attr.isStringAttribute()) {
         StringRef Kind = Attr.getKindAsString();
         StringRef Val = Attr.getValueAsString();
 
@@ -758,6 +758,13 @@
           Record.append(Val.begin(), Val.end());
           Record.push_back(0);
         }
+      } else {
+        assert(Attr.isTypeAttribute());
+        Type *Ty = Attr.getValueAsType();
+        Record.push_back(Ty ? 6 : 5);
+        Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
+        if (Ty)
+          Record.push_back(VE.getTypeID(Attr.getValueAsType()));
       }
     }
 
@@ -4114,15 +4121,15 @@
   // Emit blockinfo, which defines the standard abbreviations etc.
   writeBlockInfo();
 
+  // Emit information describing all of the types in the module.
+  writeTypeTable();
+
   // Emit information about attribute groups.
   writeAttributeGroupTable();
 
   // Emit information about parameter attributes.
   writeAttributeTable();
 
-  // Emit information describing all of the types in the module.
-  writeTypeTable();
-
   writeComdats();
 
   // Emit top-level description of module, including target triple, inline asm,
Index: llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
===================================================================
--- llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -949,9 +949,11 @@
   incorporateFunctionMetadata(F);
 
   // Adding function arguments to the value table.
-  for (const auto &I : F.args())
+  for (const auto &I : F.args()) {
     EnumerateValue(&I);
-
+    if (I.hasAttribute(Attribute::ByVal) && I.getParamByValType())
+      EnumerateType(I.getParamByValType());
+  }
   FirstFuncConstantID = Values.size();
 
   // Add all function-level constants to the value table.
Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -87,7 +87,10 @@
 
   if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
     Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
-    Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+
+    auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
+    Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+
     // For ByVal, alignment should be passed from FE.  BE will guess if
     // this info is not there but there are cases it cannot get right.
     unsigned FrameAlign;
Index: llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1204,9 +1204,11 @@
     if (Arg.IsByVal || Arg.IsInAlloca) {
       PointerType *Ty = cast<PointerType>(Arg.Ty);
       Type *ElementTy = Ty->getElementType();
-      unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
-      // For ByVal, alignment should come from FE. BE will guess if this info is
-      // not there, but there are cases it cannot get right.
+      unsigned FrameSize =
+          DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
+
+      // For ByVal, alignment should come from FE. BE will guess if this info
+      // is not there, but there are cases it cannot get right.
       unsigned FrameAlign = Arg.Alignment;
       if (!FrameAlign)
         FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9072,8 +9072,11 @@
       if (Args[i].IsByVal || Args[i].IsInAlloca) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
-        // For ByVal, alignment should come from FE.  BE will guess if this
+
+        unsigned FrameSize = DL.getTypeAllocSize(
+            Args[i].ByValType ? Args[i].ByValType : ElementTy);
+        Flags.setByValSize(FrameSize);
+
         // info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
         if (Args[i].Alignment)
@@ -9570,9 +9573,14 @@
       if (Flags.isByVal() || Flags.isInAlloca()) {
         PointerType *Ty = cast<PointerType>(Arg.getType());
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
-        // For ByVal, alignment should be passed from FE.  BE will guess if
-        // this info is not there but there are cases it cannot get right.
+
+        // For ByVal, size and alignment should be passed from FE.  BE will
+        // guess if this info is not there but there are cases it cannot get
+        // right.
+        unsigned FrameSize = DL.getTypeAllocSize(
+            Arg.getParamByValType() ? Arg.getParamByValType() : ElementTy);
+        Flags.setByValSize(FrameSize);
+
         unsigned FrameAlign;
         if (Arg.getParamAlignment())
           FrameAlign = Arg.getParamAlignment();
Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -112,6 +112,7 @@
   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
   Alignment = Call->getParamAlignment(ArgIdx);
+  ByValType = Call->getParamByValType(ArgIdx);
 }
 
 /// Generate a libcall taking the given operands as arguments and returning a
Index: llvm/lib/IR/AttributeImpl.h
===================================================================
--- llvm/lib/IR/AttributeImpl.h
+++ llvm/lib/IR/AttributeImpl.h
@@ -29,6 +29,7 @@
 namespace llvm {
 
 class LLVMContext;
+class Type;
 
 //===----------------------------------------------------------------------===//
 /// \class
@@ -41,7 +42,8 @@
   enum AttrEntryKind {
     EnumAttrEntry,
     IntAttrEntry,
-    StringAttrEntry
+    StringAttrEntry,
+    TypeAttrEntry,
   };
 
   AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
@@ -56,6 +58,7 @@
   bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
   bool isIntAttribute() const { return KindID == IntAttrEntry; }
   bool isStringAttribute() const { return KindID == StringAttrEntry; }
+  bool isTypeAttribute() const { return KindID == TypeAttrEntry; }
 
   bool hasAttribute(Attribute::AttrKind A) const;
   bool hasAttribute(StringRef Kind) const;
@@ -66,16 +69,20 @@
   StringRef getKindAsString() const;
   StringRef getValueAsString() const;
 
+  Type *getValueAsType() const;
+
   /// Used when sorting the attributes.
   bool operator<(const AttributeImpl &AI) const;
 
   void Profile(FoldingSetNodeID &ID) const {
     if (isEnumAttribute())
-      Profile(ID, getKindAsEnum(), 0);
+      Profile(ID, getKindAsEnum(), static_cast<uint64_t>(0));
     else if (isIntAttribute())
       Profile(ID, getKindAsEnum(), getValueAsInt());
-    else
+    else if (isStringAttribute())
       Profile(ID, getKindAsString(), getValueAsString());
+    else
+      Profile(ID, getKindAsEnum(), getValueAsType());
   }
 
   static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
@@ -88,6 +95,12 @@
     ID.AddString(Kind);
     if (!Values.empty()) ID.AddString(Values);
   }
+
+  static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+                      Type *Ty) {
+    ID.AddInteger(Kind);
+    ID.AddPointer(Ty);
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -145,6 +158,18 @@
   StringRef getStringValue() const { return Val; }
 };
 
+class TypeAttributeImpl : public EnumAttributeImpl {
+  virtual void anchor();
+
+  Type *Ty;
+
+public:
+  TypeAttributeImpl(Attribute::AttrKind Kind, Type *Ty)
+      : EnumAttributeImpl(TypeAttrEntry, Kind), Ty(Ty) {}
+
+  Type *getTypeValue() const { return Ty; }
+};
+
 //===----------------------------------------------------------------------===//
 /// \class
 /// This class represents a group of attributes that apply to one
@@ -189,6 +214,7 @@
   uint64_t getDereferenceableOrNullBytes() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp) const;
+  Type *getByValType() const;
 
   using iterator = const Attribute *;
 
Index: llvm/lib/IR/Attributes.cpp
===================================================================
--- llvm/lib/IR/Attributes.cpp
+++ llvm/lib/IR/Attributes.cpp
@@ -121,6 +121,27 @@
   return Attribute(PA);
 }
 
+Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
+                         Type *Ty) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  FoldingSetNodeID ID;
+  ID.AddInteger(Kind);
+  ID.AddPointer(Ty);
+
+  void *InsertPoint;
+  AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+  if (!PA) {
+    // If we didn't find any existing attributes of the same shape then create a
+    // new one and insert it.
+    PA = new TypeAttributeImpl(Kind, Ty);
+    pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+  }
+
+  // Return the Attribute that we found or created.
+  return Attribute(PA);
+}
+
 Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x40000000 && "Alignment too large.");
@@ -146,6 +167,10 @@
   return get(Context, DereferenceableOrNull, Bytes);
 }
 
+Attribute Attribute::getWithByValType(LLVMContext &Context, Type *Ty) {
+  return get(Context, ByVal, Ty);
+}
+
 Attribute
 Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg) {
@@ -170,9 +195,13 @@
   return pImpl && pImpl->isStringAttribute();
 }
 
+bool Attribute::isTypeAttribute() const {
+  return pImpl && pImpl->isTypeAttribute();
+}
+
 Attribute::AttrKind Attribute::getKindAsEnum() const {
   if (!pImpl) return None;
-  assert((isEnumAttribute() || isIntAttribute()) &&
+  assert((isEnumAttribute() || isIntAttribute() || isTypeAttribute()) &&
          "Invalid attribute type to get the kind as an enum!");
   return pImpl->getKindAsEnum();
 }
@@ -198,6 +227,14 @@
   return pImpl->getValueAsString();
 }
 
+Type *Attribute::getValueAsType() const {
+  if (!pImpl) return {};
+  assert(isTypeAttribute() &&
+         "Invalid attribute type to get the value as a type!");
+  return pImpl->getValueAsType();
+}
+
+
 bool Attribute::hasAttribute(AttrKind Kind) const {
   return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
 }
@@ -252,8 +289,6 @@
     return "argmemonly";
   if (hasAttribute(Attribute::Builtin))
     return "builtin";
-  if (hasAttribute(Attribute::ByVal))
-    return "byval";
   if (hasAttribute(Attribute::Convergent))
     return "convergent";
   if (hasAttribute(Attribute::SwiftError))
@@ -353,6 +388,19 @@
   if (hasAttribute(Attribute::ImmArg))
     return "immarg";
 
+  if (hasAttribute(Attribute::ByVal)) {
+    std::string Result;
+    Result += "byval";
+    if (Type *Ty = getValueAsType()) {
+      raw_string_ostream OS(Result);
+      Result += '(';
+      Ty->print(OS, false, true);
+      OS.flush();
+      Result += ')';
+    }
+    return Result;
+  }
+
   // FIXME: These should be output like this:
   //
   //   align=4
@@ -451,6 +499,8 @@
 
 void StringAttributeImpl::anchor() {}
 
+void TypeAttributeImpl::anchor() {}
+
 bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
   if (isStringAttribute()) return false;
   return getKindAsEnum() == A;
@@ -462,7 +512,7 @@
 }
 
 Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
-  assert(isEnumAttribute() || isIntAttribute());
+  assert(isEnumAttribute() || isIntAttribute() || isTypeAttribute());
   return static_cast<const EnumAttributeImpl *>(this)->getEnumKind();
 }
 
@@ -481,6 +531,11 @@
   return static_cast<const StringAttributeImpl *>(this)->getStringValue();
 }
 
+Type *AttributeImpl::getValueAsType() const {
+  assert(isTypeAttribute());
+  return static_cast<const TypeAttributeImpl *>(this)->getTypeValue();
+}
+
 bool AttributeImpl::operator<(const AttributeImpl &AI) const {
   // This sorts the attributes with Attribute::AttrKinds coming first (sorted
   // relative to their enum value) and then strings.
@@ -488,10 +543,23 @@
     if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
     if (AI.isIntAttribute()) return true;
     if (AI.isStringAttribute()) return true;
+    if (AI.isTypeAttribute()) return true;
+  }
+
+  if (isTypeAttribute()) {
+    if (AI.isEnumAttribute()) return false;
+    if (AI.isTypeAttribute()) {
+      if (getKindAsEnum() == AI.getKindAsEnum())
+        llvm_unreachable("Comparison of types would be unstable");
+      return getKindAsEnum() < AI.getKindAsEnum();
+    }
+    if (AI.isIntAttribute()) return true;
+    if (AI.isStringAttribute()) return true;
   }
 
   if (isIntAttribute()) {
     if (AI.isEnumAttribute()) return false;
+    if (AI.isTypeAttribute()) return false;
     if (AI.isIntAttribute()) {
       if (getKindAsEnum() == AI.getKindAsEnum())
         return getValueAsInt() < AI.getValueAsInt();
@@ -500,7 +568,9 @@
     if (AI.isStringAttribute()) return true;
   }
 
+  assert(isStringAttribute());
   if (AI.isEnumAttribute()) return false;
+  if (AI.isTypeAttribute()) return false;
   if (AI.isIntAttribute()) return false;
   if (getKindAsString() == AI.getKindAsString())
     return getValueAsString() < AI.getValueAsString();
@@ -608,6 +678,10 @@
   return SetNode ? SetNode->getDereferenceableOrNullBytes() : 0;
 }
 
+Type *AttributeSet::getByValType() const {
+  return SetNode ? SetNode->getByValType() : nullptr;
+}
+
 std::pair<unsigned, Optional<unsigned>> AttributeSet::getAllocSizeArgs() const {
   return SetNode ? SetNode->getAllocSizeArgs()
                  : std::pair<unsigned, Optional<unsigned>>(0, 0);
@@ -691,6 +765,9 @@
 
     Attribute Attr;
     switch (Kind) {
+    case Attribute::ByVal:
+      Attr = Attribute::getWithByValType(C, B.getByValType());
+      break;
     case Attribute::Alignment:
       Attr = Attribute::getWithAlignment(C, B.getAlignment());
       break;
@@ -760,6 +837,13 @@
   return 0;
 }
 
+Type *AttributeSetNode::getByValType() const {
+  for (const auto I : *this)
+    if (I.hasAttribute(Attribute::ByVal))
+      return I.getValueAsType();
+  return 0;
+}
+
 uint64_t AttributeSetNode::getDereferenceableBytes() const {
   for (const auto I : *this)
     if (I.hasAttribute(Attribute::Dereferenceable))
@@ -1258,6 +1342,11 @@
   return getAttributes(ArgNo + FirstArgIndex).getAlignment();
 }
 
+Type *AttributeList::getParamByValType(unsigned Index) const {
+  return getAttributes(Index+FirstArgIndex).getByValType();
+}
+
+
 unsigned AttributeList::getStackAlignment(unsigned Index) const {
   return getAttributes(Index).getStackAlignment();
 }
@@ -1336,6 +1425,7 @@
   TargetDepAttrs.clear();
   Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
   AllocSizeArgs = 0;
+  ByValType = nullptr;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1360,6 +1450,8 @@
     Alignment = Attr.getAlignment();
   else if (Kind == Attribute::StackAlignment)
     StackAlignment = Attr.getStackAlignment();
+  else if (Kind == Attribute::ByVal)
+    ByValType = Attr.getValueAsType();
   else if (Kind == Attribute::Dereferenceable)
     DerefBytes = Attr.getDereferenceableBytes();
   else if (Kind == Attribute::DereferenceableOrNull)
@@ -1382,6 +1474,8 @@
     Alignment = 0;
   else if (Val == Attribute::StackAlignment)
     StackAlignment = 0;
+  else if (Val == Attribute::ByVal)
+    ByValType = nullptr;
   else if (Val == Attribute::Dereferenceable)
     DerefBytes = 0;
   else if (Val == Attribute::DereferenceableOrNull)
@@ -1464,6 +1558,12 @@
   return *this;
 }
 
+AttrBuilder &AttrBuilder::addByValAttr(Type *Ty) {
+  Attrs[Attribute::ByVal] = true;
+  ByValType = Ty;
+  return *this;
+}
+
 AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   // FIXME: What if both have alignments, but they don't match?!
   if (!Alignment)
Index: llvm/lib/IR/Function.cpp
===================================================================
--- llvm/lib/IR/Function.cpp
+++ llvm/lib/IR/Function.cpp
@@ -113,6 +113,11 @@
   return getParent()->getParamAlignment(getArgNo());
 }
 
+Type *Argument::getParamByValType() const {
+  assert(getType()->isPointerTy() && "Only pointers have byval types");
+  return getParent()->getParamByValType(getArgNo());
+}
+
 uint64_t Argument::getDereferenceableBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
Index: llvm/lib/IR/Verifier.cpp
===================================================================
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -1629,6 +1629,11 @@
          "'noinline and alwaysinline' are incompatible!",
          V);
 
+  if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
+    Assert(Attrs.getByValType() == cast<PointerType>(Ty)->getElementType(),
+           "Attribute 'byval' type does not match parameter!");
+  }
+
   AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
   Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs),
          "Wrong types for attribute: " +
Index: llvm/test/Assembler/byval-type-attr.ll
===================================================================
--- /dev/null
+++ llvm/test/Assembler/byval-type-attr.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: define void @foo(i32* byval(i32) align 4)
+define void @foo(i32* byval(i32) align 4) {
+  ret void
+}
+
+; CHECK: define void @bar({ i32*, i8 }* byval({ i32*, i8 }) align 4)
+define void @bar({i32*, i8}* byval({i32*, i8}) align 4) {
+  ret void
+}
+
+define void @caller({ i32*, i8 }* %ptr) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: call void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+; CHECK: invoke void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+  call void @bar({i32*, i8}* byval %ptr)
+  invoke void @bar({i32*, i8}* byval %ptr) to label %success unwind label %fail
+
+success:
+  ret void
+
+fail:
+  landingpad { i8*, i32 } cleanup
+  ret void
+}
+
+; CHECK: declare void @baz([8 x i8]* byval([8 x i8]))
+%named_type = type [8 x i8]
+declare void @baz(%named_type* byval(%named_type))
+
+declare i32 @__gxx_personality_v0(...)
Index: llvm/test/Assembler/invalid-byval-type1.ll
===================================================================
--- /dev/null
+++ llvm/test/Assembler/invalid-byval-type1.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: Attribute 'byval' type does not match parameter!
+declare void @foo(i32* byval(i8))
Index: llvm/test/Assembler/invalid-byval-type2.ll
===================================================================
--- /dev/null
+++ llvm/test/Assembler/invalid-byval-type2.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: error: void type only allowed for function results
+declare void @foo(i32* byval(void))
Index: llvm/test/Assembler/invalid-byval-type3.ll
===================================================================
--- /dev/null
+++ llvm/test/Assembler/invalid-byval-type3.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: Attributes 'byval' and 'inalloca' do not support unsized types!
+declare void @foo(void()* byval(void()))
Index: llvm/test/Bitcode/attributes-3.3.ll
===================================================================
--- llvm/test/Bitcode/attributes-3.3.ll
+++ llvm/test/Bitcode/attributes-3.3.ll
@@ -48,7 +48,7 @@
 }
 
 define void @f8(i8* byval)
-; CHECK: define void @f8(i8* byval)
+; CHECK: define void @f8(i8* byval(i8))
 {
         ret void;
 }
Index: llvm/test/Bitcode/attributes.ll
===================================================================
--- llvm/test/Bitcode/attributes.ll
+++ llvm/test/Bitcode/attributes.ll
@@ -45,7 +45,7 @@
 }
 
 define void @f8(i8* byval)
-; CHECK: define void @f8(i8* byval)
+; CHECK: define void @f8(i8* byval(i8))
 {
         ret void;
 }
Index: llvm/test/Bitcode/byval-upgrade.test
===================================================================
--- /dev/null
+++ llvm/test/Bitcode/byval-upgrade.test
@@ -0,0 +1,7 @@
+RUN: llvm-dis %p/Inputs/byval-upgrade.bc -o - | FileCheck %s
+
+Make sure we upgrade old-stile IntAttribute byval records to a fully typed
+version correctly.
+
+CHECK: call void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+CHECK: invoke void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
Index: llvm/test/Bitcode/compatibility-3.6.ll
===================================================================
--- llvm/test/Bitcode/compatibility-3.6.ll
+++ llvm/test/Bitcode/compatibility-3.6.ll
@@ -404,7 +404,7 @@
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
Index: llvm/test/Bitcode/compatibility-3.7.ll
===================================================================
--- llvm/test/Bitcode/compatibility-3.7.ll
+++ llvm/test/Bitcode/compatibility-3.7.ll
@@ -410,7 +410,7 @@
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
Index: llvm/test/Bitcode/compatibility-3.8.ll
===================================================================
--- llvm/test/Bitcode/compatibility-3.8.ll
+++ llvm/test/Bitcode/compatibility-3.8.ll
@@ -435,7 +435,7 @@
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
Index: llvm/test/Bitcode/compatibility-3.9.ll
===================================================================
--- llvm/test/Bitcode/compatibility-3.9.ll
+++ llvm/test/Bitcode/compatibility-3.9.ll
@@ -504,7 +504,7 @@
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
Index: llvm/test/Bitcode/compatibility-4.0.ll
===================================================================
--- llvm/test/Bitcode/compatibility-4.0.ll
+++ llvm/test/Bitcode/compatibility-4.0.ll
@@ -504,7 +504,7 @@
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
Index: llvm/test/Bitcode/compatibility-5.0.ll
===================================================================
--- llvm/test/Bitcode/compatibility-5.0.ll
+++ llvm/test/Bitcode/compatibility-5.0.ll
@@ -508,7 +508,7 @@
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
Index: llvm/test/Bitcode/compatibility-6.0.ll
===================================================================
--- llvm/test/Bitcode/compatibility-6.0.ll
+++ llvm/test/Bitcode/compatibility-6.0.ll
@@ -515,7 +515,7 @@
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
Index: llvm/test/Bitcode/compatibility.ll
===================================================================
--- llvm/test/Bitcode/compatibility.ll
+++ llvm/test/Bitcode/compatibility.ll
@@ -517,7 +517,7 @@
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
@@ -1713,6 +1713,15 @@
 declare void @llvm.test.immarg.intrinsic(i32 immarg)
 ; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg)
 
+; byval attribute with type
+%named_type = type [8 x i8]
+declare void @byval_type(i32* byval(i32) align 2)
+declare void @byval_type2({ i8, i8* }* byval({ i8, i8* }))
+declare void @byval_named_type(%named_type* byval(%named_type))
+; CHECK: declare void @byval_type(i32* byval(i32) align 2)
+; CHECK: declare void @byval_type2({ i8, i8* }* byval({ i8, i8* }))
+; CHECK: declare void @byval_named_type([8 x i8]* byval([8 x i8]))
+
 ; CHECK: attributes #0 = { alignstack=4 }
 ; CHECK: attributes #1 = { alignstack=8 }
 ; CHECK: attributes #2 = { alwaysinline }
Index: llvm/test/Bitcode/highLevelStructure.3.2.ll
===================================================================
--- llvm/test/Bitcode/highLevelStructure.3.2.ll
+++ llvm/test/Bitcode/highLevelStructure.3.2.ll
@@ -41,7 +41,7 @@
 declare void @ParamAttr4(i8 signext)
 ; CHECK: declare void @ParamAttr5(i8* inreg)
 declare void @ParamAttr5(i8* inreg)
-; CHECK: declare void @ParamAttr6(i8* byval)
+; CHECK: declare void @ParamAttr6(i8* byval(i8))
 declare void @ParamAttr6(i8* byval)
 ; CHECK: declare void @ParamAttr7(i8* noalias)
 declare void @ParamAttr7(i8* noalias)
@@ -51,7 +51,7 @@
 declare void @ParamAttr9(i8* nest noalias nocapture)
 ; CHECK: declare void @ParamAttr10{{[(i8* sret noalias nocapture) | (i8* noalias nocapture sret)]}}
 declare void @ParamAttr10(i8* sret noalias nocapture)
-;CHECK: declare void @ParamAttr11{{[(i8* byval noalias nocapture) | (i8* noalias nocapture byval)]}}
+;CHECK: declare void @ParamAttr11{{[(i8* byval(i8) noalias nocapture) | (i8* noalias nocapture byval(i8))]}}
 declare void @ParamAttr11(i8* byval noalias nocapture)
 ;CHECK: declare void @ParamAttr12{{[(i8* inreg noalias nocapture) | (i8* noalias nocapture inreg)]}}
 declare void @ParamAttr12(i8* inreg noalias nocapture)
Index: llvm/test/CodeGen/AArch64/byval-type.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/byval-type.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+
+define i8 @byval_match(i8* byval(i8) align 1, i8* byval %ptr) {
+; CHECK-LABEL: byval_match:
+; CHECK: ldrb w0, [sp, #8]
+  %res = load i8, i8* %ptr
+  ret i8 %res
+}
+
+define void @caller_match(i8* %p0, i8* %p1) {
+; CHECK-LABEL: caller_match:
+; CHECK: ldrb [[P1:w[0-9]+]], [x1]
+; CHECK: strb [[P1]], [sp, #8]
+; CHECK: ldrb [[P0:w[0-9]+]], [x0]
+; CHECK: strb [[P0]], [sp]
+; CHECK: bl byval_match
+  call i8 @byval_match(i8* byval(i8) align 1 %p0, i8* byval %p1)
+  ret void
+}
+
+define i8 @byval_large([3 x i64]* byval([3 x i64]) align 8, i8* byval %ptr) {
+; CHECK-LABEL: byval_large:
+; CHECK: ldrb w0, [sp, #24]
+  %res = load i8, i8* %ptr
+  ret i8 %res
+}
+
+define void @caller_large([3 x i64]* %p0, i8* %p1) {
+; CHECK-LABEL: caller_large:
+; CHECK: ldr [[P0HI:x[0-9]+]], [x0, #16]
+; CHECK: ldr [[P0LO:q[0-9]+]], [x0]
+; CHECK: str [[P0HI]], [sp, #16]
+; CHECK: str [[P0LO]], [sp]
+; CHECK: bl byval_large
+  call i8 @byval_large([3 x i64]* byval([3 x i64]) align 8 %p0, i8* byval %p1)
+  ret void
+}
Index: llvm/test/Transforms/Inline/byval-tail-call.ll
===================================================================
--- llvm/test/Transforms/Inline/byval-tail-call.ll
+++ llvm/test/Transforms/Inline/byval-tail-call.ll
@@ -56,7 +56,7 @@
 ; CHECK: %[[POS:.*]] = alloca i32
 ; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
-; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]]
+; CHECK: tail call void @ext2(i32* nonnull byval %[[POS]]
 ; CHECK: ret void
   tail call void @bar2(i32* byval %x)
   ret void
@@ -67,7 +67,7 @@
 ; CHECK: %[[POS:.*]] = alloca i32
 ; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
-; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]]
+; CHECK: tail call void @ext2(i32* nonnull byval %[[POS]]
 ; CHECK: ret void
   %x = alloca i32
   tail call void @bar2(i32* byval %x)
Index: llvm/unittests/IR/AttributesTest.cpp
===================================================================
--- llvm/unittests/IR/AttributesTest.cpp
+++ llvm/unittests/IR/AttributesTest.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "gtest/gtest.h"
 using namespace llvm;
 
@@ -40,6 +41,10 @@
   EXPECT_TRUE(Align4 < Deref5);
   EXPECT_TRUE(Align5 < Deref4);
 
+  Attribute ByVal = Attribute::get(C, Attribute::ByVal, Type::getInt32Ty(C));
+  EXPECT_FALSE(ByVal < Attribute::get(C, Attribute::ZExt));
+  EXPECT_TRUE(ByVal < Align4);
+
   AttributeList ASs[] = {AttributeList::get(C, 2, Attribute::ZExt),
                          AttributeList::get(C, 1, Attribute::SExt)};
 
@@ -166,4 +171,19 @@
   EXPECT_EQ(2U, AL.getNumAttrSets());
 }
 
+TEST(Attributes, StringRepresentation) {
+  LLVMContext C;
+  StructType *Ty = StructType::create(Type::getInt32Ty(C), "mystruct");
+
+  // Insufficiently careful printing can result in byval(%mystruct = { i32 })
+  Attribute A = Attribute::getWithByValType(C, Ty);
+  EXPECT_EQ(A.getAsString(), "byval(%mystruct)");
+
+  A = Attribute::getWithByValType(C, nullptr);
+  EXPECT_EQ(A.getAsString(), "byval");
+
+  A = Attribute::getWithByValType(C, Type::getInt32Ty(C));
+  EXPECT_EQ(A.getAsString(), "byval(i32)");
+}
+
 } // end anonymous namespace