Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -11045,13 +11045,15 @@
    - The call must immediately precede a :ref:`ret <i_ret>` instruction,
      or a pointer bitcast followed by a ret instruction.
    - The ret instruction must return the (possibly bitcasted) value
-     produced by the call or void.
-   - The caller and callee prototypes must match. Pointer types of
-     parameters or return types may differ in pointee type, but not
-     in address space.
+     produced by the call, undef, or void.
+   - The caller and callee prototypes must match if the calling convention is
+     not swifttailcc. Pointer types of parameters or return types may differ
+     in pointee type, but not in address space.
    - The calling conventions of the caller and callee must match.
    - All ABI-impacting function attributes, such as sret, byval, inreg,
-     returned, and inalloca, must match.
+     returned, and inalloca, must match. Matching isn't relevant for swifttailcc
+     calls, instead only a limited set of these attributes is allowed: sret, 
+     byval, swiftself, and swiftasync
    - The callee must be varargs iff the caller is varargs. Bitcasting a
      non-varargs function to the appropriate varargs type is legal so
      long as the non-varargs prefixes obey the other rules.
Index: llvm/lib/IR/Verifier.cpp
===================================================================
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -513,6 +513,7 @@
 
   void verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal);
   void verifySwiftErrorValue(const Value *SwiftErrorVal);
+  void verifySwiftTailCCMustTailAttrs(AttrBuilder Attrs, StringRef Context);
   void verifyMustTailCall(CallInst &CI);
   bool verifyAttributeCount(AttributeList Attrs, unsigned Params);
   void verifyAttributeTypes(AttributeSet Attrs, bool IsFunction,
@@ -3251,6 +3252,19 @@
   visitInstruction(Call);
 }
 
+void Verifier::verifySwiftTailCCMustTailAttrs(AttrBuilder Attrs, StringRef Context) {
+  Assert(!Attrs.contains(Attribute::InAlloca),
+         Twine("inalloca attribute not allowed in swifttailcc ") + Context);
+  Assert(!Attrs.contains(Attribute::InReg),
+         Twine("inreg attribute not allowed in swifttailcc ") + Context);
+  Assert(!Attrs.contains(Attribute::SwiftError),
+         Twine("swifterror attribute not allowed in swifttailcc ") + Context);
+  Assert(!Attrs.contains(Attribute::Preallocated),
+         Twine("preallocated attribute not allowed in swifttailcc ") + Context);
+  Assert(!Attrs.contains(Attribute::ByRef),
+         Twine("byref attribute not allowed in swifttailcc ") + Context);
+}
+
 /// Two types are "congruent" if they are identical, or if they are both pointer
 /// types with different pointee types and the same address space.
 static bool isTypeCongruent(Type *L, Type *R) {
@@ -3285,22 +3299,9 @@
 void Verifier::verifyMustTailCall(CallInst &CI) {
   Assert(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI);
 
-  // - The caller and callee prototypes must match.  Pointer types of
-  //   parameters or return types may differ in pointee type, but not
-  //   address space.
   Function *F = CI.getParent()->getParent();
   FunctionType *CallerTy = F->getFunctionType();
   FunctionType *CalleeTy = CI.getFunctionType();
-  if (!CI.getCalledFunction() || !CI.getCalledFunction()->isIntrinsic()) {
-    Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(),
-           "cannot guarantee tail call due to mismatched parameter counts",
-           &CI);
-    for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
-      Assert(
-          isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)),
-          "cannot guarantee tail call due to mismatched parameter types", &CI);
-    }
-  }
   Assert(CallerTy->isVarArg() == CalleeTy->isVarArg(),
          "cannot guarantee tail call due to mismatched varargs", &CI);
   Assert(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()),
@@ -3310,19 +3311,6 @@
   Assert(F->getCallingConv() == CI.getCallingConv(),
          "cannot guarantee tail call due to mismatched calling conv", &CI);
 
-  // - All ABI-impacting function attributes, such as sret, byval, inreg,
-  //   returned, preallocated, and inalloca, must match.
-  AttributeList CallerAttrs = F->getAttributes();
-  AttributeList CalleeAttrs = CI.getAttributes();
-  for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
-    AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs);
-    AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
-    Assert(CallerABIAttrs == CalleeABIAttrs,
-           "cannot guarantee tail call due to mismatched ABI impacting "
-           "function attributes",
-           &CI, CI.getOperand(I));
-  }
-
   // - The call must immediately precede a :ref:`ret <i_ret>` instruction,
   //   or a pointer bitcast followed by a ret instruction.
   // - The ret instruction must return the (possibly bitcasted) value
@@ -3342,8 +3330,53 @@
   ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
   Assert(Ret, "musttail call must precede a ret with an optional bitcast",
          &CI);
-  Assert(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal,
+  Assert(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal ||
+             isa<UndefValue>(Ret->getReturnValue()),
          "musttail call result must be returned", Ret);
+
+  AttributeList CallerAttrs = F->getAttributes();
+  AttributeList CalleeAttrs = CI.getAttributes();
+  if (CI.getCallingConv() == CallingConv::SwiftTail) {
+    // - Only sret, byval, swiftself, and swiftasync ABI-impacting attributes
+    //   are allowed in swifttailcc call
+    for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+      AttrBuilder ABIAttrs = getParameterABIAttributes(I, CallerAttrs);
+      verifySwiftTailCCMustTailAttrs(ABIAttrs, "musttail caller");
+    }
+    for (int I = 0, E = CalleeTy->getNumParams(); I != E; ++I) {
+      AttrBuilder ABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
+      verifySwiftTailCCMustTailAttrs(ABIAttrs, "musttail callee");
+    }
+    // - Varargs functions are not allowed
+    Assert(!CallerTy->isVarArg(),
+           "cannot guarantee swifttailcc tail call for varargs function");
+    return;
+  }
+
+  // - The caller and callee prototypes must match.  Pointer types of
+  //   parameters or return types may differ in pointee type, but not
+  //   address space.
+  if (!CI.getCalledFunction() || !CI.getCalledFunction()->isIntrinsic()) {
+    Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(),
+           "cannot guarantee tail call due to mismatched parameter counts",
+           &CI);
+    for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+      Assert(
+          isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)),
+          "cannot guarantee tail call due to mismatched parameter types", &CI);
+    }
+  }
+
+  // - All ABI-impacting function attributes, such as sret, byval, inreg,
+  //   returned, preallocated, and inalloca, must match.
+  for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+    AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs);
+    AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
+    Assert(CallerABIAttrs == CalleeABIAttrs,
+           "cannot guarantee tail call due to mismatched ABI impacting "
+           "function attributes",
+           &CI, CI.getOperand(I));
+  }
 }
 
 void Verifier::visitCallInst(CallInst &CI) {
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5244,9 +5244,6 @@
     // Check if it's really possible to do a tail call.
     IsTailCall = isEligibleForTailCallOptimization(
         Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
-    if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
-      report_fatal_error("failed to perform tail call elimination on a call "
-                         "site marked musttail");
 
     // A sibling call is one where we're under the usual C ABI and not planning
     // to change that but can still do a tail call:
@@ -5258,6 +5255,10 @@
       ++NumTailCalls;
   }
 
+  if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
+    report_fatal_error("failed to perform tail call elimination on a call "
+                       "site marked musttail");
+
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
Index: llvm/lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2304,9 +2304,6 @@
         Callee, CallConv, isVarArg, isStructRet,
         MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
         PreferIndirect);
-    if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
-      report_fatal_error("failed to perform tail call elimination on a call "
-                         "site marked musttail");
 
     if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
         CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
@@ -2318,6 +2315,9 @@
       ++NumTailCalls;
   }
 
+  if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
+    report_fatal_error("failed to perform tail call elimination on a call "
+                       "site marked musttail");
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3998,7 +3998,9 @@
     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
 
   int FPDiff = 0;
-  if (isTailCall && !IsSibcall && !IsMustTail) {
+  if (isTailCall &&
+      shouldGuaranteeTCO(CallConv,
+                         MF.getTarget().Options.GuaranteedTailCallOpt)) {
     // Lower arguments at fp - stackoffset + fpdiff.
     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
 
Index: llvm/test/CodeGen/AArch64/swifttail-call.ll
===================================================================
--- llvm/test/CodeGen/AArch64/swifttail-call.ll
+++ llvm/test/CodeGen/AArch64/swifttail-call.ll
@@ -10,7 +10,7 @@
 ; COMMON-LABEL: caller_to0_from0:
 ; COMMON-NEXT: // %bb.
 
-  tail call swifttailcc void @callee_stack0()
+  musttail call swifttailcc void @callee_stack0()
   ret void
 
 ; COMMON-NEXT: b callee_stack0
@@ -19,7 +19,7 @@
 define swifttailcc void @caller_to0_from8([8 x i64], i64) {
 ; COMMON-LABEL: caller_to0_from8:
 
-  tail call swifttailcc void @callee_stack0()
+  musttail call swifttailcc void @callee_stack0()
   ret void
 
 ; COMMON: add sp, sp, #16
@@ -31,7 +31,7 @@
 
 ; Key point is that the "42" should go #16 below incoming stack
 ; pointer (we didn't have arg space to reuse).
-  tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
+  musttail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
   ret void
 
 ; COMMON: str {{x[0-9]+}}, [sp, #-16]!
@@ -43,7 +43,7 @@
 ; COMMON-NOT: sub sp,
 
 ; Key point is that the "%a" should go where at SP on entry.
-  tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
+  musttail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
   ret void
 
 ; COMMON: str {{x[0-9]+}}, [sp]
@@ -57,7 +57,7 @@
 ; Important point is that the call reuses the "dead" argument space
 ; above %a on the stack. If it tries to go below incoming-SP then the
 ; callee will not deallocate the space, even in swifttailcc.
-  tail call swifttailcc void @callee_stack16([8 x i64] undef, i64 42, i64 2)
+  musttail call swifttailcc void @callee_stack16([8 x i64] undef, i64 42, i64 2)
 
 ; COMMON: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
 ; COMMON-NEXT: b callee_stack16
@@ -70,7 +70,7 @@
 ; COMMON-NOT: sub sp,
 
 ; Key point is that the "%a" should go where at #16 above SP on entry.
-  tail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
+  musttail call swifttailcc void @callee_stack8([8 x i64] undef, i64 42)
   ret void
 
 ; COMMON: str {{x[0-9]+}}, [sp, #16]!
@@ -84,7 +84,7 @@
 
 ; Here we want to make sure that both loads happen before the stores:
 ; otherwise either %a or %b will be wrongly clobbered.
-  tail call swifttailcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a)
+  musttail call swifttailcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a)
   ret void
 
 ; COMMON: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
Index: llvm/test/CodeGen/X86/tailcall-swifttailcc.ll
===================================================================
--- llvm/test/CodeGen/X86/tailcall-swifttailcc.ll
+++ llvm/test/CodeGen/X86/tailcall-swifttailcc.ll
@@ -8,7 +8,7 @@
 ; CHECK-NOT: addq
 ; CHECK: jmp tailcallee
 entry:
-  %tmp11 = tail call swifttailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
+  %tmp11 = musttail call swifttailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
   ret i32 %tmp11
 }
 
@@ -26,7 +26,7 @@
 define dso_local swifttailcc i8* @alias_caller() nounwind {
 ; CHECK-LABEL: alias_caller:
 ; CHECK:    jmp noalias_callee # TAILCALL
-  %p = tail call swifttailcc noalias i8* @noalias_callee()
+  %p = musttail call swifttailcc noalias i8* @noalias_callee()
   ret i8* %p
 }
 
@@ -35,7 +35,7 @@
 define dso_local swifttailcc i32 @ret_undef() nounwind {
 ; CHECK-LABEL: ret_undef:
 ; CHECK:    jmp i32_callee # TAILCALL
-  %p = tail call swifttailcc i32 @i32_callee()
+  %p = musttail call swifttailcc i32 @i32_callee()
   ret i32 undef
 }
 
@@ -52,7 +52,7 @@
 ; CHECK-LABEL: void_test:
 ; CHECK:    jmp void_test
   entry:
-   tail call swifttailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
+   musttail call swifttailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
    ret void
 }
 
@@ -60,6 +60,6 @@
 ; CHECK-LABEL: i1test:
 ; CHECK:    jmp i1test
   entry:
-  %4 = tail call swifttailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+  %4 = musttail call swifttailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
   ret i1 %4
 }
Index: llvm/test/Verifier/swifttailcc-musttail-valid.ll
===================================================================
--- /dev/null
+++ llvm/test/Verifier/swifttailcc-musttail-valid.ll
@@ -0,0 +1,11 @@
+; RUN: opt -verify %s
+
+define swifttailcc void @valid_attrs(i64* sret(i64) %ret, i8* byval(i8) %byval, i8* swiftself %self, i8* swiftasync %ctx) {
+  musttail call swifttailcc void @valid_attrs(i64* sret(i64) %ret, i8* byval(i8) %byval, i8* swiftself %self, i8* swiftasync %ctx)
+  ret void
+}
+
+define swifttailcc void @mismatch_parms() {
+  musttail call swifttailcc void @valid_attrs(i64* sret(i64) undef, i8* byval(i8) undef, i8* swiftself undef, i8* swiftasync  undef)
+  ret void
+}
Index: llvm/test/Verifier/swifttailcc-musttail.ll
===================================================================
--- /dev/null
+++ llvm/test/Verifier/swifttailcc-musttail.ll
@@ -0,0 +1,72 @@
+; RUN: not opt -verify %s 2>&1 | FileCheck %s
+
+declare swifttailcc void @simple()
+
+define swifttailcc void @inreg(i8* inreg) {
+; CHECK: inreg attribute not allowed in swifttailcc musttail caller
+  musttail call swifttailcc void @simple()
+  ret void
+}
+
+define swifttailcc void @inalloca(i8* inalloca) {
+; CHECK: inalloca attribute not allowed in swifttailcc musttail caller
+  musttail call swifttailcc void @simple()
+  ret void
+}
+
+define swifttailcc void @swifterror(i8** swifterror) {
+; CHECK: swifterror attribute not allowed in swifttailcc musttail caller
+  musttail call swifttailcc void @simple()
+  ret void
+}
+
+define swifttailcc void @preallocated(i8* preallocated(i8)) {
+; CHECK: preallocated attribute not allowed in swifttailcc musttail caller
+  musttail call swifttailcc void @simple()
+  ret void
+}
+
+define swifttailcc void @byref(i8* byref(i8)) {
+; CHECK: byref attribute not allowed in swifttailcc musttail caller
+  musttail call swifttailcc void @simple()
+  ret void
+}
+
+define swifttailcc void @call_inreg() {
+; CHECK: inreg attribute not allowed in swifttailcc musttail callee
+  musttail call swifttailcc void @inreg(i8* inreg undef)
+  ret void
+}
+
+define swifttailcc void @call_inalloca() {
+; CHECK: inalloca attribute not allowed in swifttailcc musttail callee
+  musttail call swifttailcc void @inalloca(i8* inalloca undef)
+  ret void
+}
+
+define swifttailcc void @call_swifterror() {
+; CHECK: swifterror attribute not allowed in swifttailcc musttail callee
+  %err = alloca swifterror i8*
+  musttail call swifttailcc void @swifterror(i8** swifterror %err)
+  ret void
+}
+
+define swifttailcc void @call_preallocated() {
+; CHECK: preallocated attribute not allowed in swifttailcc musttail callee
+  musttail call swifttailcc void @preallocated(i8* preallocated(i8) undef)
+  ret void
+}
+
+define swifttailcc void @call_byref() {
+; CHECK: byref attribute not allowed in swifttailcc musttail callee
+  musttail call swifttailcc void @byref(i8* byref(i8) undef)
+  ret void
+}
+
+
+declare swifttailcc void @varargs(...)
+define swifttailcc void @call_varargs(...) {
+; CHECK: cannot guarantee swifttailcc tail call for varargs function
+  musttail call swifttailcc void(...) @varargs(...)
+  ret void
+}