Index: lib/Target/X86/InstPrinter/X86InstComments.cpp =================================================================== --- lib/Target/X86/InstPrinter/X86InstComments.cpp +++ lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -46,6 +46,16 @@ CASE_AVX_INS_COMMON(Inst, Y, r##src) \ CASE_SSE_INS_COMMON(Inst, r##src) +#define CASE_MASK_MOVDUP(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z256, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z128, r##src) + +#define CASE_MASKZ_MOVDUP(Inst, src) \ + CASE_MASKZ_INS_COMMON(Inst, Z, r##src) \ + CASE_MASKZ_INS_COMMON(Inst, Z256, r##src) \ + CASE_MASKZ_INS_COMMON(Inst, Z128, r##src) + #define CASE_PMOVZX(Inst, src) \ CASE_AVX512_INS_COMMON(Inst, Z, r##src) \ CASE_AVX512_INS_COMMON(Inst, Z256, r##src) \ @@ -129,6 +139,48 @@ } } +/// Wraps the destination register name with AVX512 mask/maskz filtering. +static std::string getMaskName(const MCInst *MI, const char *DestName, + const char *(*getRegName)(unsigned)) { + std::string OpMaskName(DestName); + + bool MaskWithZero = false; + const char *MaskRegName = nullptr; + + switch (MI->getOpcode()) { + default: + return OpMaskName; + CASE_MASKZ_MOVDUP(MOVDDUP, m) + CASE_MASKZ_MOVDUP(MOVDDUP, r) + CASE_MASKZ_MOVDUP(MOVSHDUP, m) + CASE_MASKZ_MOVDUP(MOVSHDUP, r) + CASE_MASKZ_MOVDUP(MOVSLDUP, m) + CASE_MASKZ_MOVDUP(MOVSLDUP, r) + MaskWithZero = true; + MaskRegName = getRegName(MI->getOperand(1).getReg()); + break; + CASE_MASK_MOVDUP(MOVDDUP, m) + CASE_MASK_MOVDUP(MOVDDUP, r) + CASE_MASK_MOVDUP(MOVSHDUP, m) + CASE_MASK_MOVDUP(MOVSHDUP, r) + CASE_MASK_MOVDUP(MOVSLDUP, m) + CASE_MASK_MOVDUP(MOVSLDUP, r) + MaskRegName = getRegName(MI->getOperand(2).getReg()); + break; + } + + // MASK: zmmX {%kY} + OpMaskName += " {%"; + OpMaskName += MaskRegName; + OpMaskName += "}"; + + // MASKZ: zmmX {%kY} {z} + if (MaskWithZero) + OpMaskName += " {z}"; + + return OpMaskName; +} + //===----------------------------------------------------------------------===// // Top Level Entrypoint //===----------------------------------------------------------------------===// @@ -753,9 +805,8 @@ if (ShuffleMask.empty()) return false; - // TODO: Add support for specifying an AVX512 style mask register in the comment. if (!DestName) DestName = Src1Name; - OS << (DestName ? DestName : "mem") << " = "; + OS << (DestName ? getMaskName(MI, DestName, getRegName) : "mem") << " = "; // If the two sources are the same, canonicalize the input elements to be // from the first src so that we get larger element spans. Index: test/CodeGen/X86/avx512-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -23,13 +23,13 @@ ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm1[0,0,2,2,4,4,6,6] +; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_mask_movddup_pd: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm1[0,0,2,2,4,4,6,6] +; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6] ; X64-NEXT: retq %arg1 = bitcast i8 %a1 to <8 x i1> %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> @@ -42,13 +42,13 @@ ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] +; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_maskz_movddup_pd: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] +; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; X64-NEXT: retq %arg0 = bitcast i8 %a0 to <8 x i1> %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> @@ -75,13 +75,13 @@ ; X32: # BB#0: ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_mask_movehdup_ps: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; X64-NEXT: retq %arg1 = bitcast i16 %a1 to <16 x i1> %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> @@ -94,13 +94,13 @@ ; X32: # BB#0: ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_maskz_movehdup_ps: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; X64-NEXT: retq %arg0 = bitcast i16 %a0 to <16 x i1> %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> @@ -127,13 +127,13 @@ ; X32: # BB#0: ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_mask_moveldup_ps: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; X64-NEXT: retq %arg1 = bitcast i16 %a1 to <16 x i1> %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> @@ -146,13 +146,13 @@ ; X32: # BB#0: ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_maskz_moveldup_ps: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; X64-NEXT: retq %arg0 = bitcast i16 %a0 to <16 x i1> %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -8,8 +8,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] -; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq @@ -28,8 +28,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] -; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq @@ -48,8 +48,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6] -; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] +; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] +; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq Index: test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -29,7 +29,7 @@ ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] +; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] ; X32-NEXT: popl %eax ; X32-NEXT: retl ; @@ -39,7 +39,7 @@ ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: kmovw %eax, %k1 -; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] +; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] ; X64-NEXT: retq %trn1 = trunc i8 %a1 to i2 %arg1 = bitcast i2 %trn1 to <2 x i1> @@ -59,7 +59,7 @@ ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; X32-NEXT: popl %eax ; X32-NEXT: retl ; @@ -69,7 +69,7 @@ ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: kmovw %eax, %k1 -; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; X64-NEXT: retq %trn1 = trunc i8 %a0 to i2 %arg0 = bitcast i2 %trn1 to <2 x i1> @@ -103,7 +103,7 @@ ; X32-NEXT: movb %al, (%esp) ; X32-NEXT: movzbl (%esp), %eax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm1[0,0,2,2] +; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2] ; X32-NEXT: popl %eax ; X32-NEXT: retl ; @@ -113,7 +113,7 @@ ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: kmovw %eax, %k1 -; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm1[0,0,2,2] +; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2] ; X64-NEXT: retq %trn1 = trunc i8 %a1 to i4 %arg1 = bitcast i4 %trn1 to <4 x i1> @@ -133,7 +133,7 @@ ; X32-NEXT: movb %al, (%esp) ; X32-NEXT: movzbl (%esp), %eax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] +; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; X32-NEXT: popl %eax ; X32-NEXT: retl ; @@ -143,7 +143,7 @@ ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: kmovw %eax, %k1 -; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] +; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; X64-NEXT: retq %trn1 = trunc i8 %a0 to i4 %arg0 = bitcast i4 %trn1 to <4 x i1> @@ -177,7 +177,7 @@ ; X32-NEXT: movb %al, (%esp) ; X32-NEXT: movzbl (%esp), %eax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3] ; X32-NEXT: popl %eax ; X32-NEXT: retl ; @@ -187,7 +187,7 @@ ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: kmovw %eax, %k1 -; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3] ; X64-NEXT: retq %trn1 = trunc i8 %a1 to i4 %arg1 = bitcast i4 %trn1 to <4 x i1> @@ -207,7 +207,7 @@ ; X32-NEXT: movb %al, (%esp) ; X32-NEXT: movzbl (%esp), %eax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; X32-NEXT: popl %eax ; X32-NEXT: retl ; @@ -217,7 +217,7 @@ ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: kmovw %eax, %k1 -; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; X64-NEXT: retq %trn0 = trunc i8 %a0 to i4 %arg0 = bitcast i4 %trn0 to <4 x i1> @@ -245,13 +245,13 @@ ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm1[1,1,3,3,5,5,7,7] +; X32-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_mask_movehdup_ps: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm1[1,1,3,3,5,5,7,7] +; X64-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7] ; X64-NEXT: retq %arg1 = bitcast i8 %a1 to <8 x i1> %res0 = shufflevector <8 x float> %a2, <8 x float> undef, <8 x i32> @@ -264,13 +264,13 @@ ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; X32-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_movehdup_ps: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; X64-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; X64-NEXT: retq %arg0 = bitcast i8 %a0 to <8 x i1> %res0 = shufflevector <8 x float> %a1, <8 x float> undef, <8 x i32> @@ -303,7 +303,7 @@ ; X32-NEXT: movb %al, (%esp) ; X32-NEXT: movzbl (%esp), %eax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovsldup {{.*#+}} xmm0 = xmm1[0,0,2,2] +; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2] ; X32-NEXT: popl %eax ; X32-NEXT: retl ; @@ -313,7 +313,7 @@ ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: kmovw %eax, %k1 -; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm1[0,0,2,2] +; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2] ; X64-NEXT: retq %trn1 = trunc i8 %a1 to i4 %arg1 = bitcast i4 %trn1 to <4 x i1> @@ -333,7 +333,7 @@ ; X32-NEXT: movb %al, (%esp) ; X32-NEXT: movzbl (%esp), %eax ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; X32-NEXT: popl %eax ; X32-NEXT: retl ; @@ -343,7 +343,7 @@ ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: kmovw %eax, %k1 -; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; X64-NEXT: retq %trn0 = trunc i8 %a0 to i4 %arg0 = bitcast i4 %trn0 to <4 x i1> @@ -371,13 +371,13 @@ ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm1[0,0,2,2,4,4,6,6] +; X32-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_mask_moveldup_ps: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm1[0,0,2,2,4,4,6,6] +; X64-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6] ; X64-NEXT: retq %arg1 = bitcast i8 %a1 to <8 x i1> %res0 = shufflevector <8 x float> %a2, <8 x float> undef, <8 x i32> @@ -390,13 +390,13 @@ ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; X32-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_moveldup_ps: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; X64-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; X64-NEXT: retq %arg0 = bitcast i8 %a0 to <8 x i1> %res0 = shufflevector <8 x float> %a1, <8 x float> undef, <8 x i32> Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -10,9 +10,9 @@ ; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8] -; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2] +; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0] -; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2] +; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -33,9 +33,9 @@ ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8] -; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0] -; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6] +; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -56,9 +56,9 @@ ; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8] -; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3] +; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0] -; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3] +; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -79,9 +79,9 @@ ; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8] -; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0] -; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7] +; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -101,9 +101,9 @@ ; CHECK-NEXT: ## xmm2 = xmm0[0,0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8] -; CHECK-NEXT: ## xmm1 = xmm0[0,0] +; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,0] ; CHECK-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0] -; CHECK-NEXT: ## xmm0 = xmm0[0,0] +; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,0] ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca] ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -124,9 +124,9 @@ ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8] -; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0] -; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2] +; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca] ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] Index: test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -53,7 +53,7 @@ ; CHECK-LABEL: combine_vpermt2var_8f64_movddup_mask: ; CHECK: # BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] +; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %res0 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> , <8 x double> %x0, <8 x double> %x1, i8 %m) ret <8 x double> %res0 @@ -169,7 +169,7 @@ ; CHECK-LABEL: combine_vpermt2var_16f32_vmovshdup_mask: ; CHECK: # BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m) ret <16 x float> %res0 @@ -196,7 +196,7 @@ ; CHECK-LABEL: combine_vpermt2var_16f32_vmovsldup_mask: ; CHECK: # BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m) ret <16 x float> %res0 @@ -205,7 +205,7 @@ ; CHECK-LABEL: combine_vpermt2var_16f32_vmovsldup_mask_load: ; CHECK: # BB#0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %x0 = load <16 x float>, <16 x float> *%p0 %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m) Index: utils/update_llc_test_checks.py =================================================================== --- utils/update_llc_test_checks.py +++ utils/update_llc_test_checks.py @@ -32,7 +32,7 @@ SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) SCRUB_X86_SHUFFLES_RE = ( re.compile( - r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem) = .*)$', + r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$', flags=re.M)) SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)') SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)') Index: utils/update_test_checks.py =================================================================== --- utils/update_test_checks.py +++ utils/update_test_checks.py @@ -47,7 +47,7 @@ SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) SCRUB_X86_SHUFFLES_RE = ( re.compile( - r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem) = .*)$', + r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$', flags=re.M)) SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)') SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')