Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -36030,8 +36030,8 @@
     case 'v':
     case 'Y':
     case 'l':
-      return C_RegisterClass;
     case 'k': // AVX512 masking registers.
+      return C_RegisterClass;
     case 'a':
     case 'b':
     case 'c':
@@ -36063,8 +36063,15 @@
       switch (Constraint[1]) {
       default:
         break;
-      case 'k':
+      case 'z':
+      case '0':
         return C_Register;
+      case 'i':
+      case 'm':
+      case 'k':
+      case 't':
+      case '2':
+        return C_RegisterClass;
       }
     }
   }
@@ -36112,15 +36119,42 @@
     if (type->isX86_MMXTy() && Subtarget.hasMMX())
       weight = CW_SpecificReg;
     break;
-  case 'Y':
-    // Other "Y<x>" (e.g. "Yk") constraints should be implemented below.
-    if (constraint[1] == 'k') {
-      // Support for 'Yk' (similarly to the 'k' variant below).
-      weight = CW_SpecificReg;
+  case 'Y': {
+    unsigned Size = StringRef(constraint).size();
+    // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching 'Y'
+    char NextChar = Size == 2 ? constraint[1] : 'i';
+    if (Size > 2)
       break;
+    switch (NextChar) {
+      default:
+        return CW_Invalid;
+      // XMM0
+      case 'z':
+      case '0':
+        if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1())
+          return CW_SpecificReg;
+        return CW_Invalid;
+      // Conditional OpMask regs (AVX512)
+      case 'k':
+        if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
+          return CW_Register;
+        return CW_Invalid;
+      // Any MMX reg
+      case 'm':
+        if (type->isX86_MMXTy() && Subtarget.hasMMX())
+          return weight;
+        return CW_Invalid;
+      // Any SSE reg when ISA >= SSE2, same as 'Y'
+      case 'i':
+      case 't':
+      case '2':
+        if (!Subtarget.hasSSE2())
+          return CW_Invalid;
+        break;
     }
-  // Else fall through (handle "Y" constraint).
+    // Fall through (handle "Y" constraint).
     LLVM_FALLTHROUGH;
+  }
   case 'v':
     if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
       weight = CW_Register;
@@ -36132,7 +36166,8 @@
     break;
   case 'k':
     // Enable conditional vector operations using %k<#> registers.
-    weight = CW_SpecificReg;
+    if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
+      weight = CW_Register;
     break;
   case 'I':
     if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
@@ -36534,6 +36569,17 @@
     switch (Constraint[1]) {
     default:
       break;
+    case 'i':
+    case 't':
+    case '2':
+      return getRegForInlineAsmConstraint(TRI, "Y", VT);
+    case 'm':
+      if (!Subtarget.hasMMX()) break;
+      return std::make_pair(0U, &X86::VR64RegClass);
+    case 'z':
+    case '0':
+      if (!Subtarget.hasSSE1()) break;
+      return std::make_pair(X86::XMM0, &X86::VR128RegClass);
     case 'k':
       // This register class doesn't allocate k0 for masked vector operation.
       if (Subtarget.hasAVX512()) { // Only supported in AVX512.
Index: test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll
===================================================================
--- test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll
+++ test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu skx < %s | FileCheck %s
+; This test compliments the .c test under clang/test/CodeGen/. We check 
+; if the inline asm constraints are respected in the generated code.
+
+; Function Attrs: nounwind
+define void @f_Ym(i64 %m.coerce) {
+; Any mmx regiter constraint
+; CHECK-LABEL: f_Ym:
+; CHECK:         ## InlineAsm Start
+; CHECK-NEXT:    movq %mm{{[0-9]+}}, %mm1
+; CHECK:         ## InlineAsm End
+
+entry:
+  %0 = tail call x86_mmx asm sideeffect "movq $0, %mm1\0A\09", "=^Ym,~{dirflag},~{fpsr},~{flags}"() 
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @f_Yi(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; Any SSE register when SSE2 is enabled (GCC when inter-unit moves enabled)
+; CHECK-LABEL: f_Yi:
+; CHECK:         ## InlineAsm Start
+; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; CHECK:         ## InlineAsm End
+
+entry:
+  %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Yi,^Yi,^Yi,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) 
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @f_Yt(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; Any SSE register when SSE2 is enabled
+; CHECK-LABEL: f_Yt:
+; CHECK:         ## InlineAsm Start
+; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; CHECK:         ## InlineAsm End
+
+entry:
+  %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Yt,^Yt,^Yt,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @f_Y2(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; Any SSE register when SSE2 is enabled
+; CHECK-LABEL: f_Y2:
+; CHECK:         ## InlineAsm Start
+; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; CHECK:         ## InlineAsm End
+
+entry:
+  %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Y2,^Y2,^Y2,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @f_Yz(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; xmm0 SSE register(GCC)
+; CHECK-LABEL: f_Yz:
+; CHECK:         ## InlineAsm Start
+; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; CHECK:         ## InlineAsm End
+entry:
+  %0 = tail call { <4 x float>, <4 x float> } asm sideeffect "vpaddq $0,$2,$1\0A\09vpaddq $1,$0,$2\0A\09", "=^Yi,=^Yz,^Yi,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @f_Y0(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; xmm0 SSE register
+; CHECK-LABEL: f_Y0:
+; CHECK:         ## InlineAsm Start
+; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; CHECK:         ## InlineAsm End
+
+entry:
+  %0 = tail call { <4 x float>, <4 x float> } asm sideeffect "vpaddq $0,$2,$1\0A\09vpaddq $1,$0,$2\0A\09", "=^Yi,=^Y0,^Yi,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z)
+  ret void
+}
+