Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -1142,6 +1142,68 @@
     does not alias any other memory visible within a function and that a
     ``swifterror`` alloca passed as an argument does not escape.
 
+``convergent``
+    In some parallel execution models, there exist operations that are executed
+    simultaneously for multiple threads and one or more arguments of the
+    operation must have the same value across all simultaneously executing
+    threads. Such arguments are called ``convergent``.
+
+    While the ``convergent`` attribute on functions can be thought of as
+    "whether and when each call site to this function is reached must be
+    uniform across a (target-dependent) set of threads", the ``convergent``
+    attribute on function arguments can be thought of as "the value of this
+    argument at each execution of each call site to this function must be
+    uniform across a (target-dependent) set of threads".
+
+    For every function F, transformations must ensure that every pair of
+    executions of F that is "compatible" with respect to convergent function
+    arguments in the original function F is also "compatible" when executing
+    the transformed function F. "Compatibility" is defined as follows:
+
+    Two executions e1 and e2 of a function are compatible (with respect to
+    convergent function arguments) if for every call site CS to a function with
+    a convergent argument, the sequences e1(CS) and e2(CS) of values supplied
+    to the convergent argument at CS during e1 and e2, respectively, satisfy
+    that e1(CS) is a subsequence of e2(CS) or vice versa.
+
+    As an example to illustrate this definition, consider the following
+    stylized code fragment.
+
+    .. code-block:: text
+
+        if (cond) {
+          Tmp1 = Foo(v [convergent])
+        } else {
+          Tmp2 = Foo(v [convergent])
+        }
+        Tmp3 = phi [Tmp1, Tmp2]
+
+    Without the restrictions imposed by the convergent attribute, this could be
+    simplified to:
+
+    .. code-block:: text
+
+        Tmp3 = Foo(v [convergent])
+
+    To see why this transformation is forbidden, consider executions
+    ``e1 = { cond = true, v = V0 }`` and ``e2 = { cond = false, v = V1 }`` with
+    V0 different from V1. These executions are compatible in the original code
+    fragment: At the first call-site of ``Foo``, the sequences are
+    ``e1(CS) = (V0)`` and ``e2(CS) = ()``, so ``e2(CS)`` is a subsequence of
+    ``e1(CS)``. The second call-site is similar. However, the executions are
+    incompatible in the transformed code fragment: The sequences at the
+    call-site are ``e1(CS) = (V0)`` and ``e2(CS) = (V1)``, so neither is a
+    subsequence of the other.
+
+    Calling a function F that contains a call-site CS with convergent function
+    arguments is only defined behavior if the only data-dependencies of the
+    convergent function arguments of CS are convergent parameters of F. If this
+    condition is not satisfied, then the behavior in F itself is defined, but a
+    function G that calls F has undefined behavior. The purpose of this rule is
+    to allow transformations to be applied to G without having to transitively
+    look inside all functions called by G.
+
+
 .. _gc:
 
 Garbage Collector Strategy Names
Index: include/llvm/Analysis/CodeMetrics.h
===================================================================
--- include/llvm/Analysis/CodeMetrics.h
+++ include/llvm/Analysis/CodeMetrics.h
@@ -53,7 +53,8 @@
   /// one or more 'noduplicate' instructions.
   bool notDuplicatable = false;
 
-  /// \brief True if this function contains a call to a convergent function.
+  /// \brief True if this function contains a call to a convergent function or
+  /// a function with convergent arguments.
   bool convergent = false;
 
   /// \brief True if this function calls alloca (in the C sense).
Index: include/llvm/IR/Instructions.h
===================================================================
--- include/llvm/IR/Instructions.h
+++ include/llvm/IR/Instructions.h
@@ -1775,6 +1775,18 @@
     return AttributeList.hasAttrSomewhere(Attribute::ByVal);
   }
 
+  /// Determine if any call argument is convergent.
+  bool hasConvergentArgument() const {
+    if (AttributeList.hasAttrSomewhere(Attribute::Convergent))
+      return true;
+
+    if (const Function *F = getCalledFunction())
+      if (F->getAttributes().hasAttrSomewhere(Attribute::Convergent))
+        return true;
+
+    return false;
+  }
+
   /// Return the function called, or null if this is an
   /// indirect function invocation.
   ///
Index: include/llvm/IR/Intrinsics.td
===================================================================
--- include/llvm/IR/Intrinsics.td
+++ include/llvm/IR/Intrinsics.td
@@ -49,6 +49,12 @@
 // Throws - This intrinsic can throw.
 def Throws : IntrinsicProperty;
 
+// Convergent - The specified argument is convergent. This corresponds to the
+// IR function argument attribute of the same name.
+class Convergent<int argNo> : IntrinsicProperty {
+  int ArgNo = argNo;
+}
+
 // NoCapture - The specified argument pointer is not captured by the intrinsic.
 class NoCapture<int argNo> : IntrinsicProperty {
   int ArgNo = argNo;
Index: include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- include/llvm/IR/IntrinsicsAMDGPU.td
+++ include/llvm/IR/IntrinsicsAMDGPU.td
@@ -240,7 +240,7 @@
    llvm_i1_ty,        // slc(imm)
    llvm_i1_ty,        // lwe(imm)
    llvm_i1_ty],       // da(imm)
-  [IntrReadMem]>;
+  [IntrReadMem, Convergent<1>]>;
 
 def int_amdgcn_image_load : AMDGPUImageLoad;
 def int_amdgcn_image_load_mip : AMDGPUImageLoad;
@@ -256,7 +256,7 @@
    llvm_i1_ty,        // slc(imm)
    llvm_i1_ty,        // lwe(imm)
    llvm_i1_ty],       // da(imm)
-  []>;
+  [Convergent<2>]>;
 
 def int_amdgcn_image_store : AMDGPUImageStore;
 def int_amdgcn_image_store_mip : AMDGPUImageStore;
@@ -272,7 +272,7 @@
      llvm_i1_ty,        // slc(imm)
      llvm_i1_ty,        // lwe(imm)
      llvm_i1_ty],       // da(imm)
-     [IntrReadMem]>;
+     [IntrReadMem, Convergent<1>, Convergent<2>]>;
 
 // Basic sample
 def int_amdgcn_image_sample : AMDGPUImageSample;
@@ -364,7 +364,7 @@
    llvm_i1_ty,        // r128(imm)
    llvm_i1_ty,        // da(imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [Convergent<2>]>;
 
 def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
 def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
@@ -387,7 +387,7 @@
    llvm_i1_ty,        // r128(imm)
    llvm_i1_ty,        // da(imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [Convergent<3>]>;
 
 class AMDGPUBufferLoad : Intrinsic <
   [llvm_anyfloat_ty],
@@ -396,7 +396,7 @@
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty,        // glc(imm)
    llvm_i1_ty],       // slc(imm)
-  [IntrReadMem]>;
+  [IntrReadMem, Convergent<0>]>;
 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
 def int_amdgcn_buffer_load : AMDGPUBufferLoad;
 
@@ -408,7 +408,7 @@
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty,        // glc(imm)
    llvm_i1_ty],       // slc(imm)
-  [IntrWriteMem]>;
+  [IntrWriteMem, Convergent<1>]>;
 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
 def int_amdgcn_buffer_store : AMDGPUBufferStore;
 
@@ -419,7 +419,7 @@
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [Convergent<1>]>;
 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
 def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
@@ -438,7 +438,7 @@
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [Convergent<2>]>;
 
 def int_amdgcn_buffer_wbinvl1_sc :
   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
Index: lib/Analysis/CodeMetrics.cpp
===================================================================
--- lib/Analysis/CodeMetrics.cpp
+++ lib/Analysis/CodeMetrics.cpp
@@ -168,7 +168,7 @@
     if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
       if (CI->cannotDuplicate())
         notDuplicatable = true;
-      if (CI->isConvergent())
+      if (CI->isConvergent() || CI->hasConvergentArgument())
         convergent = true;
     }
 
Index: lib/AsmParser/LLParser.cpp
===================================================================
--- lib/AsmParser/LLParser.cpp
+++ lib/AsmParser/LLParser.cpp
@@ -1387,6 +1387,7 @@
       continue;
     }
     case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
+    case lltok::kw_convergent:      B.addAttribute(Attribute::Convergent); break;
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
       if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
Index: lib/IR/Verifier.cpp
===================================================================
--- lib/IR/Verifier.cpp
+++ lib/IR/Verifier.cpp
@@ -1324,7 +1324,6 @@
         I->getKindAsEnum() == Attribute::Cold ||
         I->getKindAsEnum() == Attribute::OptimizeNone ||
         I->getKindAsEnum() == Attribute::JumpTable ||
-        I->getKindAsEnum() == Attribute::Convergent ||
         I->getKindAsEnum() == Attribute::ArgMemOnly ||
         I->getKindAsEnum() == Attribute::NoRecurse ||
         I->getKindAsEnum() == Attribute::InaccessibleMemOnly ||
@@ -1335,7 +1334,8 @@
                     "' only applies to functions!", V);
         return;
       }
-    } else if (I->getKindAsEnum() == Attribute::ReadOnly ||
+    } else if (I->getKindAsEnum() == Attribute::Convergent ||
+               I->getKindAsEnum() == Attribute::ReadOnly ||
                I->getKindAsEnum() == Attribute::WriteOnly ||
                I->getKindAsEnum() == Attribute::ReadNone) {
       if (Idx == 0) {
@@ -1362,6 +1362,7 @@
 
   if (isReturnValue)
     Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+               !Attrs.hasAttribute(Idx, Attribute::Convergent) &&
                !Attrs.hasAttribute(Idx, Attribute::Nest) &&
                !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
                !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
Index: lib/Transforms/Scalar/GVNHoist.cpp
===================================================================
--- lib/Transforms/Scalar/GVNHoist.cpp
+++ lib/Transforms/Scalar/GVNHoist.cpp
@@ -920,7 +920,7 @@
               break;
           }
 
-          if (Call->isConvergent())
+          if (Call->isConvergent() || Call->hasConvergentArgument())
             break;
 
           CI.insert(Call, VN);
Index: lib/Transforms/Scalar/LoopUnrollPass.cpp
===================================================================
--- lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1007,6 +1007,13 @@
   // Assuming n is the same on all threads, any kind of unrolling is
   // safe.  But currently llvm's notion of convergence isn't powerful
   // enough to express this.
+  //
+  // Similarly, if the loop contains an operation with a convergent argument,
+  // the prelude could shift the sequences of argument values in a way that
+  // affects the compatibility of runs wrt convergent function arguments.
+  //
+  // TODO: This is also conservative. If all convergent arguments that appear
+  // in the loop are loop invariant, then compatibility of runs is preserved.
   if (Convergent)
     UP.AllowRemainder = false;
 
Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1371,7 +1371,6 @@
       return false;
 
     return true;
-
   case Instruction::ShuffleVector:
     // Shufflevector masks are constant.
     return OpIdx != 2;
@@ -1438,6 +1437,17 @@
       // Don't touch any operand of token type.
       return false;
 
+    // We cannot sink calls if any argument is convergent: sinking may add
+    // divergence even when the argument is the same IR value in all blocks,
+    // because there may be divergence in the argument that was removed by the
+    // control flow conditions for those blocks.
+    if (isa<CallInst>(I0) || isa<InvokeInst>(I0)) {
+      ImmutableCallSite CS(I0);
+      if (OI < CS.getNumArgOperands() &&
+          CS.paramHasAttr(OI + 1, Attribute::Convergent))
+        return false;
+    }
+
     // Because SROA can't handle speculating stores of selects, try not
     // to sink loads or stores of allocas when we'd have to create a PHI for
     // the address operand. Also, because it is likely that loads or stores
Index: test/Bitcode/attributes.ll
===================================================================
--- test/Bitcode/attributes.ll
+++ test/Bitcode/attributes.ll
@@ -334,6 +334,11 @@
   ret void
 }
 
+; CHECK: define void @f57(i32 convergent)
+define void @f57(i32 convergent) {
+  ret void
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
Index: test/Transforms/GVNHoist/hoist-convergent.ll
===================================================================
--- test/Transforms/GVNHoist/hoist-convergent.ll
+++ test/Transforms/GVNHoist/hoist-convergent.ll
@@ -86,8 +86,35 @@
   ret float %add
 }
 
+; Cannot hoist because there could be divergence in the argument that was
+; filtered out by the branch condition.
+;
+; CHECK-LABEL: @no_convergent_arg_hoisting(
+; CHECK: if.then:
+; CHECK: call float @convergent_arg_func(
+
+; CHECK: if.else:
+; CHECK: call float @convergent_arg_func(
+define float @no_convergent_arg_hoisting(i1 %cc, float %x) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  %r1 = call float @convergent_arg_func(float %x)
+  br label %if.end
+
+if.else:
+  %r2 = call float @convergent_arg_func(float %x)
+  br label %if.end
+
+if.end:
+  %r = phi float [ %r1, %if.then ], [ %r2, %if.else ]
+  ret float %r
+}
+
 declare float @convergent_func(float, float) #0
 declare float @func(float, float) #1
+declare float @convergent_arg_func(float convergent) #1
 
 attributes #0 = { nounwind readnone convergent }
 attributes #1 = { nounwind readnone }
Index: test/Transforms/InstCombine/select-call.ll
===================================================================
--- /dev/null
+++ test/Transforms/InstCombine/select-call.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare i32 @f(i32 convergent %a, i32 %b) nounwind readnone
+
+; Must keep both calls to f here, since they're not compatible wrt convergent
+; function arguments.
+define i32 @check_bad(i1 %cond, i32 %v) {
+; CHECK-LABEL: @check_bad(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[R0:%.*]] = call i32 @f(i32 0, i32 %v)
+; CHECK-NEXT:    [[R1:%.*]] = call i32 @f(i32 1, i32 %v)
+; CHECK-NEXT:    [[OUT:%.*]] = select i1 %cond, i32 [[R0]], i32 [[R1]]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+entry:
+  %r0 = call i32 @f(i32 0, i32 %v)
+  %r1 = call i32 @f(i32 1, i32 %v)
+  %out = select i1 %cond, i32 %r0, i32 %r1
+  ret i32 %out
+}
+
+; TODO: In this case, select C, call, call -> call select would be beneficial
+; and correct.
+define i32 @check_good(i1 %cond, i32 %v) {
+; CHECK-LABEL: @check_good
+; NO-CHECK: %out.v = select i1 %cond, i32 0, i32 1
+; NO-CHECK: %out = call i32 @f(i32 %v, i32 %out.v)
+;
+entry:
+  %r0 = call i32 @f(i32 %v, i32 0)
+  %r1 = call i32 @f(i32 %v, i32 1)
+  %out = select i1 %cond, i32 %r0, i32 %r1
+  ret i32 %out
+}
+
Index: test/Transforms/LoopUnroll/convergent.ll
===================================================================
--- test/Transforms/LoopUnroll/convergent.ll
+++ test/Transforms/LoopUnroll/convergent.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
 
 declare void @f() convergent
+declare void @g(i32 convergent)
 
 ; Although this loop contains a convergent instruction, it should be
 ; fully unrolled.
@@ -25,6 +26,27 @@
   ret i32 0
 }
 
+; CHECK-LABEL: @full_unroll_arg(
+define i32 @full_unroll_arg() {
+entry:
+  br label %l3
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK-NOT: call void @g(
+  call void @g(i32 %x.0)
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %exit, label %l3
+
+exit:
+  ret i32 0
+}
+
+
 ; This loop contains a convergent instruction, but it should be partially
 ; unrolled.  The unroll count is the largest power of 2 that divides the
 ; multiple -- 4, in this case.
@@ -51,6 +73,28 @@
   ret i32 0
 }
 
+; CHECK-LABEL: @runtime_unroll_arg(
+define i32 @runtime_unroll_arg(i32 %n) {
+entry:
+  %loop_ctl = mul nsw i32 %n, 12
+  br label %l3
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK-NOT: call void @g(
+  call void @g(i32 %x.0)
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %loop_ctl
+  br i1 %exitcond, label %exit, label %l3
+
+exit:
+  ret i32 0
+}
+
 ; This loop contains a convergent instruction, so its partial unroll
 ; count must divide its trip multiple.  This overrides its unroll
 ; pragma -- we unroll exactly 8 times, even though 16 is requested.
@@ -80,4 +124,30 @@
   ret i32 0
 }
 
+; CHECK-LABEL: @pragma_unroll_arg
+define i32 @pragma_unroll_arg(i32 %n) {
+entry:
+  %loop_ctl = mul nsw i32 %n, 24
+  br label %l3, !llvm.loop !0
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK: call void @g(
+; CHECK-NOT: call void @g(
+  call void @g(i32 %x.0)
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %loop_ctl
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !0
+
+exit:
+  ret i32 0
+}
+
 !0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
Index: test/Transforms/SimplifyCFG/convergent.ll
===================================================================
--- /dev/null
+++ test/Transforms/SimplifyCFG/convergent.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare i32 @f(i32 convergent %a, i32 %b) nounwind readonly
+
+; Cannot sink because there would clearly be divergence in the first argument.
+;
+define i32 @check1(i1 %cond, i32 %a, i32 %b, i32 %v) {
+; CHECK-LABEL: @check1(
+; CHECK-NEXT:    br i1 %cond, label %if.then, label %if.else
+; CHECK:       if.then:
+; CHECK-NEXT:    [[RA:%.*]] = call i32 @f(i32 %a, i32 %v)
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.else:
+; CHECK-NEXT:    [[RB:%.*]] = call i32 @f(i32 %b, i32 %v)
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.end:
+; CHECK-NEXT:    [[OUT:%.*]] = phi i32 [ [[RA]], %if.then ], [ [[RB]], %if.else ]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  br i1 %cond, label %if.then, label %if.else
+
+if.then:
+  %ra = call i32 @f(i32 %a, i32 %v)
+  br label %if.end
+
+if.else:
+  %rb = call i32 @f(i32 %b, i32 %v)
+  br label %if.end
+
+if.end:
+  %out = phi i32 [ %ra, %if.then ], [ %rb, %if.else ]
+  ret i32 %out
+}
+
+; Cannot sink because there could be divergence in the first argument that was
+; filtered out by the branch condition.
+;
+define i32 @check2(i1 %cond, i32 %a, i32 %b, i32 %v) {
+; CHECK-LABEL: @check2(
+; CHECK-NEXT:    br i1 %cond, label %if.then, label %if.else
+; CHECK:       if.then:
+; CHECK-NEXT:    [[RA:%.*]] = call i32 @f(i32 %v, i32 %a)
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.else:
+; CHECK-NEXT:    [[RB:%.*]] = call i32 @f(i32 %v, i32 %b)
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.end:
+; CHECK-NEXT:    [[OUT:%.*]] = phi i32 [ [[RA]], %if.then ], [ [[RB]], %if.else ]
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  br i1 %cond, label %if.then, label %if.else
+
+if.then:
+  %ra = call i32 @f(i32 %v, i32 %a)
+  br label %if.end
+
+if.else:
+  %rb = call i32 @f(i32 %v, i32 %b)
+  br label %if.end
+
+if.end:
+  %out = phi i32 [ %ra, %if.then ], [ %rb, %if.else ]
+  ret i32 %out
+}
Index: utils/TableGen/CodeGenIntrinsics.h
===================================================================
--- utils/TableGen/CodeGenIntrinsics.h
+++ utils/TableGen/CodeGenIntrinsics.h
@@ -108,7 +108,7 @@
   /// True if the intrinsic is marked as convergent.
   bool isConvergent;
 
-  enum ArgAttribute { NoCapture, Returned, ReadOnly, WriteOnly, ReadNone };
+  enum ArgAttribute { Convergent, NoCapture, Returned, ReadOnly, WriteOnly, ReadNone };
   std::vector<std::pair<unsigned, ArgAttribute>> ArgumentAttributes;
 
   CodeGenIntrinsic(Record *R);
Index: utils/TableGen/CodeGenTarget.cpp
===================================================================
--- utils/TableGen/CodeGenTarget.cpp
+++ utils/TableGen/CodeGenTarget.cpp
@@ -604,7 +604,10 @@
       isConvergent = true;
     else if (Property->getName() == "IntrNoReturn")
       isNoReturn = true;
-    else if (Property->isSubClassOf("NoCapture")) {
+    else if (Property->isSubClassOf("Convergent")) {
+      unsigned ArgNo = Property->getValueAsInt("ArgNo");
+      ArgumentAttributes.push_back(std::make_pair(ArgNo, Convergent));
+    } else if (Property->isSubClassOf("NoCapture")) {
       unsigned ArgNo = Property->getValueAsInt("ArgNo");
       ArgumentAttributes.push_back(std::make_pair(ArgNo, NoCapture));
     } else if (Property->isSubClassOf("Returned")) {
Index: utils/TableGen/IntrinsicEmitter.cpp
===================================================================
--- utils/TableGen/IntrinsicEmitter.cpp
+++ utils/TableGen/IntrinsicEmitter.cpp
@@ -560,6 +560,12 @@
 
         do {
           switch (intrinsic.ArgumentAttributes[ai].second) {
+          case CodeGenIntrinsic::Convergent:
+            if (addComma)
+              OS << ",";
+            OS << "Attribute::Convergent";
+            addComma = true;
+            break;
           case CodeGenIntrinsic::NoCapture:
             if (addComma)
               OS << ",";