Index: lib/Target/AArch64/AArch64SchedA53.td
===================================================================
--- lib/Target/AArch64/AArch64SchedA53.td
+++ lib/Target/AArch64/AArch64SchedA53.td
@@ -230,11 +230,11 @@
 def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
 def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
 def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
-def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[A53WriteVLD3], (instregex "LD3Threev2d$")>;
 def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
 def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev2d_POST$")>;
 
 def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
 def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
Index: lib/Target/AArch64/AArch64SchedThunderX2T99.td
===================================================================
--- lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -423,15 +423,15 @@
 def : InstRW<[WriteI],
             (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
                        "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
-                       "ADC?(W|X)r(i|r|s|x)",   "ADCS?(W|X)r(i|r|s|x)",
+                       "ADC(W|X)r",
                        "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
                        "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
                        "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
-                       "SUBS?(W|X)r(i|r|s|x)",  "SBC?(W|X)r(i|r|s|x)",
-                       "SBCS?(W|X)r(i|r|s|x)",  "CCMN?(W|X)r(i|r|s|x)",
-                       "CCMP?(W|X)r(i|r|s|x)",  "CSEL?(W|X)r(i|r|s|x)",
-                       "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
-                       "CSNEG?(W|X)r(i|r|s|x)")>;
+                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
+                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
+                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
+                       "CSINC(W|X)r",           "CSINV(W|X)r",
+                       "CSNEG(W|X)r")>;
 
 def : InstRW<[WriteI], (instrs COPY)>;
 
@@ -445,15 +445,15 @@
 def : InstRW<[WriteISReg],
             (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
                        "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
-                       "ADC?(W|X)r(i|r|s|x)",   "ADCS?(W|X)r(i|r|s|x)",
+                       "ADC(W|X)r",
                        "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
                        "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
                        "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
-                       "SUBS?(W|X)r(i|r|s|x)",  "SBC?(W|X)r(i|r|s|x)",
-                       "SBCS?(W|X)r(i|r|s|x)",  "CCMN?(W|X)r(i|r|s|x)",
-                       "CCMP?(W|X)r(i|r|s|x)",  "CSEL?(W|X)r(i|r|s|x)",
-                       "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
-                       "CSNEG?(W|X)r(i|r|s|x)")>;
+                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
+                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
+                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
+                       "CSINC(W|X)r",           "CSINV(W|X)r",
+                       "CSNEG(W|X)r")>;
 
 def : WriteRes<WriteIEReg,   [THX2T99I012]> {
   let Latency = 1;
@@ -464,15 +464,15 @@
 def : InstRW<[WriteIEReg],
             (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
                        "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
-                       "ADC?(W|X)r(i|r|s|x)",   "ADCS?(W|X)r(i|r|s|x)",
+                       "ADC(W|X)r",
                        "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
                        "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
                        "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
-                       "SUBS?(W|X)r(i|r|s|x)",  "SBC?(W|X)r(i|r|s|x)",
-                       "SBCS?(W|X)r(i|r|s|x)",  "CCMN?(W|X)r(i|r|s|x)",
-                       "CCMP?(W|X)r(i|r|s|x)",  "CSEL?(W|X)r(i|r|s|x)",
-                       "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
-                       "CSNEG?(W|X)r(i|r|s|x)")>;
+                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
+                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
+                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
+                       "CSINC(W|X)r",           "CSINV(W|X)r",
+                       "CSNEG(W|X)r")>;
 
 // Move immed
 def : WriteRes<WriteImm,     [THX2T99I012]> {
@@ -1147,7 +1147,7 @@
 def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>;
 def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>;
 def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
-def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSrr")>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>;
 
 // FP divide, D-form
 // FP square root, D-form
@@ -1155,7 +1155,7 @@
 def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>;
 def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>;
 def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
-def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDrr")>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>;
 
 // FP multiply
 // FP multiply accumulate
@@ -1261,7 +1261,7 @@
 
 // ASIMD logical (MOV, MVN, ORN, ORR)
 def : InstRW<[THX2T99Write_5Cyc_F01],
-            (instregex "^ANDv", "^BICv", "^EORv", "^MOVv", "^MVNv",
+            (instregex "^ANDv", "^BICv", "^EORv", "^MOVIv", "^MVNIv",
                        "^ORRv", "^ORNv", "^NOTv")>;
 // ASIMD arith, reduce
 def : InstRW<[THX2T99Write_10Cyc_F01],
@@ -1513,7 +1513,7 @@
 def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
 
 // ASIMD move, integer immed
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>;
 
 // ASIMD move, FP immed
 def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
Index: lib/Target/ARM/ARMScheduleA9.td
===================================================================
--- lib/Target/ARM/ARMScheduleA9.td
+++ lib/Target/ARM/ARMScheduleA9.td
@@ -2537,8 +2537,7 @@
 def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
                                        "MOVCCsr")>;
 def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
-def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm",
-                                      "MOV_ga_dyn")>;
+def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm")>;
 def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
 def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
 
@@ -2551,12 +2550,12 @@
       "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
 def : InstRW< [A9WriteM, A9WriteMHi],
       (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
-      "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB",
+      "UMAAL", "SMLALv5", "UMLALv5", "SMLALBB", "SMLALBT", "SMLALTB",
       "SMLALTT")>;
 // FIXME: These instructions used to have NoItinerary. Just copied the one from above.
 def : InstRW< [A9WriteM, A9WriteMHi],
       (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
-      "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
+      "SMLSLD", "SMLSLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
 
 def : InstRW<[A9WriteM16, A9WriteM16Hi],
       (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
Index: lib/Target/ARM/ARMScheduleR52.td
===================================================================
--- lib/Target/ARM/ARMScheduleR52.td
+++ lib/Target/ARM/ARMScheduleR52.td
@@ -220,12 +220,12 @@
       "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
 
 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
-      (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
-      "t2MOVi", "t2MOV_ga_dyn")>;
+      (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi",
+      "t2MOVi")>;
 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
       (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
 def : InstRW<[R52WriteLd,R52Read_ISS],
-      (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
+      (instregex "MOV_ga_pcrel_ldr")>;
 
 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
 
@@ -260,12 +260,12 @@
 
 // Sum of Absolute Difference
 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
-      (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
+      (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
 
 // Integer Multiply
 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
-      (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
-      "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
+      (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
+      "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
       "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
       "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
 
@@ -273,17 +273,17 @@
 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
 // The store pipeline is used partly for 64-bit operations.
 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
-      (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
-      "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
+      (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
+      "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
       "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
       "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
       "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
       "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
       "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
       "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
-      "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
+      "SMLAL", "UMLAL", "SMLALBT",
       "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
-      "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
+      "UMAAL", "t2SMLAL", "t2UMLAL",
       "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
       "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
 
@@ -304,31 +304,31 @@
       "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
       "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
       "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
-      "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
+      "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
       "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
 
 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
 
-def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
       "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
       "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
       "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
 
 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
-      "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
+      "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
       "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
 
 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
       "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
-      "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
+      "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
 
 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
       (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
-      "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
+      "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
 
 def : InstRW<[R52WriteALU_EX1],
-    (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
+    (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
 
 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
@@ -487,7 +487,7 @@
 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
       (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
-        (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
+        (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
 
 // Integer Store, Single Element
 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
@@ -503,7 +503,7 @@
 
 // Integer Store, Dual
 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
-    (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
+    (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
     (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
 
@@ -511,7 +511,7 @@
     (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
     (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
-    "PUSH", "tPUSH")>;
+    "tPUSH")>;
 
 // LDRLIT pseudo instructions, they expand to LDR + PICADD
 def : InstRW<[R52WriteLd],
Index: lib/Target/ARM/ARMScheduleSwift.td
===================================================================
--- lib/Target/ARM/ARMScheduleSwift.td
+++ lib/Target/ARM/ARMScheduleSwift.td
@@ -164,12 +164,12 @@
                           "t2UXTB16")>;
   // Pseudo instructions.
   def : InstRW<[SwiftWriteP01OneCycle2x],
-        (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
-                   "t2MOVi32imm", "t2MOV_ga_dyn")>;
+        (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi32imm",
+                   "t2MOVi32imm")>;
   def : InstRW<[SwiftWriteP01OneCycle3x],
         (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
   def : InstRW<[SwiftWriteP01OneCycle2x_load],
-        (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
+        (instregex "MOV_ga_pcrel_ldr")>;
 
   def SwiftWriteP0TwoCycleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
 
@@ -221,8 +221,8 @@
   // 4.2.12 Integer Multiply (32-bit result)
   // Two sources.
   def : InstRW< [SwiftWriteP0FourCycle],
-        (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
-        "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
+        (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
+        "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
         "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
         "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
 
@@ -244,8 +244,8 @@
   // Multiply accumulate, three sources
   def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
                  SwiftReadAdvanceFourCyclesPred],
-        (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
-        "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
+        (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
+        "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
         "t2SMMLSR")>;
 
   // 4.2.13 Integer Multiply (32-bit result, Q flag)
@@ -305,9 +305,9 @@
   // We are being a bit inaccurate here.
   def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
                  SchedReadAdvance<4>, SchedReadAdvance<3>],
-        (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
+        (instregex "SMLAL", "UMLAL", "SMLALBT",
         "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
-        "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
+        "UMAAL", "t2SMLAL", "t2UMLAL", "t2SMLALBB", "t2SMLALBT",
         "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
         "t2UMAAL")>;
 
@@ -369,7 +369,7 @@
         "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
   def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
         (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
-        "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
+        "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?")>;
 
   // 4.2.21 Integer Dual Load
   // Not accurate.
@@ -486,7 +486,7 @@
         (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
         "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
   def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
-        (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
+        (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
   // 4.2.23 Integer Store, Single Element
   def : InstRW<[SwiftWriteP2],
         (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
@@ -536,7 +536,7 @@
         (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
   def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
         (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
-        "PUSH", "tPUSH")>;
+        "tPUSH")>;
 
   // LDRLIT pseudo instructions, they expand to LDR + PICADD
   def : InstRW<[SwiftWriteP2ThreeCycle, WriteALU],
@@ -552,14 +552,14 @@
 
   // 4.2.27 Not issued
   def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-  def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
+  def : InstRW<[WriteNoop], (instregex "t2IT", "IT")>;
 
   // 4.2.28 Advanced SIMD, Integer, 2 cycle
   def : InstRW<[SwiftWriteP0TwoCycle],
         (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
                    "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
                    "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
-                   "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
+                   "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF",
                    "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
 
   def : InstRW<[SwiftWriteP1TwoCycle],
@@ -569,7 +569,7 @@
   // 4.2.30 Advanced SIMD, Integer with Accumulate
   def : InstRW<[SwiftWriteP0FourCycle],
         (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
-        "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
+        "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
         "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
         "VQSUB")>;
   def : InstRW<[SwiftWriteP1FourCycle],
@@ -626,12 +626,12 @@
   // 4.2.37 Advanced SIMD and VFP, Move
   def : InstRW<[SwiftWriteP0TwoCycle],
         (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
-                   "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
+                   "VMVNv", "VMVN(d|q)",
                    "FCONST(D|S)")>;
   def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
   def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
         (instregex "VQMOVN")>;
-  def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
+  def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN")>;
   def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
         (instregex "VDUP(8|16|32)")>;
   def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
Index: lib/Target/X86/X86ScheduleZnver1.td
===================================================================
--- lib/Target/X86/X86ScheduleZnver1.td
+++ lib/Target/X86/X86ScheduleZnver1.td
@@ -367,8 +367,7 @@
 // INC DEC NOT NEG.
 // m.
 def : InstRW<[WriteALULd],
-             (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m",
-              "(INC|DEC)64(16|32)m")>;
+             (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
 
 // MUL IMUL.
 // r16.
@@ -499,7 +498,7 @@
   let NumMicroOps = 2;
 }
 def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
-                            "IRET(D|Q)", "RETF")>;
+                            "IRET(16|32|64)")>;
 
 //-- Logic instructions --//
 
@@ -917,7 +916,7 @@
 def : InstRW<[ZnWriteToALU2], (instregex "VMOVPQIto64rr")>;
 
 // (x)mm <- r64.
-def : InstRW<[ZnWriteFPU2], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>;
+def : InstRW<[ZnWriteFPU2], (instregex "VMOV64toPQIrr")>;
 
 // (x)mm <- (x)mm.
 def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ64rr")>;
Index: utils/TableGen/CodeGenSchedule.cpp
===================================================================
--- utils/TableGen/CodeGenSchedule.cpp
+++ utils/TableGen/CodeGenSchedule.cpp
@@ -104,6 +104,8 @@
         Regexpr = Regex(pat);
       }
 
+      int NumMatches = 0;
+
       unsigned NumGeneric = Target.getNumFixedInstructions();
       ArrayRef<const CodeGenInstruction *> Generics =
           Target.getInstructionsByEnumValue().slice(0, NumGeneric + 1);
@@ -112,8 +114,10 @@
       for (auto *Inst : Generics) {
         StringRef InstName = Inst->TheDef->getName();
         if (InstName.startswith(Prefix) &&
-            (!Regexpr || Regexpr->match(InstName.substr(Prefix.size()))))
+            (!Regexpr || Regexpr->match(InstName.substr(Prefix.size())))) {
           Elts.insert(Inst->TheDef);
+          NumMatches++;
+        }
       }
 
       ArrayRef<const CodeGenInstruction *> Instructions =
@@ -137,13 +141,22 @@
       // a regex that needs to be checked.
       for (auto *Inst : make_range(Range)) {
         StringRef InstName = Inst->TheDef->getName();
-        if (!Regexpr || Regexpr->match(InstName.substr(Prefix.size())))
+        if (!Regexpr || Regexpr->match(InstName.substr(Prefix.size()))) {
           Elts.insert(Inst->TheDef);
+          NumMatches++;
+        }
       }
+
+      if (0 == NumMatches)
+        PrintFatalError(Loc, "instregex has no matches: " + Original);
+#if 0 // TODO
+      if (1 == NumMatches)
+        PrintFatalError(Loc,
+                        "instregex only matches one instruction: " + Original);
+#endif
     }
   }
 };
-
 } // end anonymous namespace
 
 /// CodeGenModels ctor interprets machine model records and populates maps.