diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2139,6 +2139,13 @@ return C->isExactlyValue(1.0); } } + if (ConstantSDNode *C = isConstOrConstSplat(V)) { + switch (Opcode) { + case ISD::ADD: // X + 0 --> X + case ISD::SUB: // X - 0 --> X + return C->isZero(); + } + } return false; }; @@ -3321,9 +3328,15 @@ EVT VT = N0.getValueType(); SDLoc DL(N); + auto PeekThroughFreeze = [](SDValue N) { + if (N->getOpcode() == ISD::FREEZE && N.hasOneUse()) + return N->getOperand(0); + return N; + }; + // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. - if (N0 == N1) + if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1)) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // fold (sub c1, c2) -> c3 diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -32,9 +32,8 @@ ; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x4c,0x24,0x01] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc1] -; X86-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd1] -; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512: @@ -42,9 +41,8 @@ ; X64-NEXT: vpbroadcastd %edi, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xcf] ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpbroadcastd %edi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc7] -; X64-NEXT: vpbroadcastd %edi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd7] -; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xc1] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1) %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask) @@ -63,9 +61,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xc1] -; X86-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xd1] -; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512: @@ -73,9 +70,8 @@ ; X64-NEXT: vpbroadcastq %rdi, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xcf] ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x7c,0xc7] -; X64-NEXT: vpbroadcastq %rdi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7c,0xd7] -; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xc1] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1) %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask) @@ -4099,9 +4095,9 @@ ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03] -; X86-NEXT: vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04] +; X86-NEXT: vprold $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x04] ; X86-NEXT: vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05] -; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4109,9 +4105,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03] -; X64-NEXT: vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04] +; X64-NEXT: vprold $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x04] ; X64-NEXT: vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05] -; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %1 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) @@ -4134,9 +4130,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03] -; X86-NEXT: vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04] +; X86-NEXT: vprolq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x04] ; X86-NEXT: vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05] -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4144,9 +4140,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03] -; X64-NEXT: vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04] +; X64-NEXT: vprolq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x04] ; X64-NEXT: vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05] -; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %1 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) @@ -4168,9 +4164,9 @@ ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03] -; X86-NEXT: vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04] +; X86-NEXT: vprord $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x04] ; X86-NEXT: vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05] -; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4178,9 +4174,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03] -; X64-NEXT: vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04] +; X64-NEXT: vprord $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x04] ; X64-NEXT: vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05] -; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %1 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) @@ -4203,9 +4199,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03] -; X86-NEXT: vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04] +; X86-NEXT: vprorq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x04] ; X86-NEXT: vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05] -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4213,9 +4209,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03] -; X64-NEXT: vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04] +; X64-NEXT: vprorq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x04] ; X64-NEXT: vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05] -; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %1 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) @@ -4239,9 +4235,10 @@ ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04] ; X86-NEXT: vpsrlq $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x05] -; X86-NEXT: vpsrlq $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x06] -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] -; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xca] +; X86-NEXT: vpsrlq $6, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xd0,0x06] +; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xc8] +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_512: @@ -4249,9 +4246,10 @@ ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04] ; X64-NEXT: vpsrlq $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x05] -; X64-NEXT: vpsrlq $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x06] -; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] -; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xca] +; X64-NEXT: vpsrlq $6, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xd0,0x06] +; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xc8] +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1) @@ -4269,9 +4267,10 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04] ; X86-NEXT: vpsrld $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x05] -; X86-NEXT: vpsrld $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x06] -; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] -; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca] +; X86-NEXT: vpsrld $6, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x06] +; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xc8] +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_512: @@ -4279,9 +4278,10 @@ ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04] ; X64-NEXT: vpsrld $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x05] -; X64-NEXT: vpsrld $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x06] -; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] -; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca] +; X64-NEXT: vpsrld $6, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x06] +; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xc8] +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1) @@ -4298,9 +4298,9 @@ ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03] -; X86-NEXT: vpsrad $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xe0,0x04] +; X86-NEXT: vpsrad $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xe0,0x04] ; X86-NEXT: vpsrad $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x05] -; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4308,9 +4308,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03] -; X64-NEXT: vpsrad $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xe0,0x04] +; X64-NEXT: vpsrad $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xe0,0x04] ; X64-NEXT: vpsrad $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x05] -; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) @@ -4329,9 +4329,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03] -; X86-NEXT: vpsraq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xe0,0x04] +; X86-NEXT: vpsraq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xe0,0x04] ; X86-NEXT: vpsraq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x05] -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4339,9 +4339,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03] -; X64-NEXT: vpsraq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xe0,0x04] +; X64-NEXT: vpsraq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xe0,0x04] ; X64-NEXT: vpsraq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x05] -; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) @@ -4359,9 +4359,9 @@ ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03] -; X86-NEXT: vpslld $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xf0,0x04] +; X86-NEXT: vpslld $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xf0,0x04] ; X86-NEXT: vpslld $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x05] -; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4369,9 +4369,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03] -; X64-NEXT: vpslld $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xf0,0x04] +; X64-NEXT: vpslld $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xf0,0x04] ; X64-NEXT: vpslld $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x05] -; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) @@ -4390,9 +4390,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03] -; X86-NEXT: vpsllq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x73,0xf0,0x04] +; X86-NEXT: vpsllq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf0,0x04] ; X86-NEXT: vpsllq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x05] -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -4400,9 +4400,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03] -; X64-NEXT: vpsllq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x73,0xf0,0x04] +; X64-NEXT: vpsllq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf0,0x04] ; X64-NEXT: vpsllq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x05] -; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) @@ -6519,24 +6519,22 @@ ; X86: ## %bb.0: ; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01] -; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01] +; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8] -; X86-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0] -; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca] -; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1] +; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xca] +; X86-NEXT: vpaddd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc1] +; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512: ; X64: ## %bb.0: ; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01] -; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01] +; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8] -; X64-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0] -; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca] -; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1] +; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xca] +; X64-NEXT: vpaddd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc1] +; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xc2] ; X64-NEXT: retq ## encoding: [0xc3] %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) @@ -9718,9 +9716,9 @@ ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03] -; X86-NEXT: vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04] +; X86-NEXT: vprold $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x04] ; X86-NEXT: vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05] -; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -9728,9 +9726,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03] -; X64-NEXT: vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04] +; X64-NEXT: vprold $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x04] ; X64-NEXT: vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05] -; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) @@ -9749,9 +9747,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03] -; X86-NEXT: vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04] +; X86-NEXT: vprolq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x04] ; X86-NEXT: vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05] -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -9759,9 +9757,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03] -; X64-NEXT: vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04] +; X64-NEXT: vprolq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x04] ; X64-NEXT: vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05] -; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) @@ -9779,9 +9777,9 @@ ; X86: ## %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03] -; X86-NEXT: vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04] +; X86-NEXT: vprord $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x04] ; X86-NEXT: vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05] -; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -9789,9 +9787,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03] -; X64-NEXT: vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04] +; X64-NEXT: vprord $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x04] ; X64-NEXT: vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05] -; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] +; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0xfe,0xca] ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) @@ -9810,9 +9808,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03] -; X86-NEXT: vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04] +; X86-NEXT: vprorq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x04] ; X86-NEXT: vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05] -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -9820,9 +9818,9 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03] -; X64-NEXT: vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04] +; X64-NEXT: vprorq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x04] ; X64-NEXT: vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05] -; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] +; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xd4,0xca] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq ## encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) diff --git a/llvm/test/CodeGen/X86/avx512-rotate.ll b/llvm/test/CodeGen/X86/avx512-rotate.ll --- a/llvm/test/CodeGen/X86/avx512-rotate.ll +++ b/llvm/test/CodeGen/X86/avx512-rotate.ll @@ -17,9 +17,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vprold $5, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprold $6, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprold $6, %zmm0, %zmm2 ; KNL-NEXT: vprold $7, %zmm0, %zmm0 +; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -27,9 +27,9 @@ ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vprold $5, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprold $6, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprold $6, %zmm0, %zmm2 ; SKX-NEXT: vprold $7, %zmm0, %zmm0 +; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; SKX-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) @@ -45,9 +45,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vprolq $5, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprolq $6, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprolq $6, %zmm0, %zmm2 ; KNL-NEXT: vprolq $7, %zmm0, %zmm0 +; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -55,9 +55,9 @@ ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vprolq $5, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprolq $6, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprolq $6, %zmm0, %zmm2 ; SKX-NEXT: vprolq $7, %zmm0, %zmm0 +; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; SKX-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) @@ -73,9 +73,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vprord $5, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprord $6, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprord $6, %zmm0, %zmm2 ; KNL-NEXT: vprord $7, %zmm0, %zmm0 +; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -83,9 +83,9 @@ ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vprord $5, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprord $6, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprord $6, %zmm0, %zmm2 ; SKX-NEXT: vprord $7, %zmm0, %zmm0 +; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; SKX-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) @@ -101,9 +101,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vprorq $5, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprorq $6, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprorq $6, %zmm0, %zmm2 ; KNL-NEXT: vprorq $7, %zmm0, %zmm0 +; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -111,9 +111,9 @@ ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vprorq $5, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprorq $6, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprorq $6, %zmm0, %zmm2 ; SKX-NEXT: vprorq $7, %zmm0, %zmm0 +; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; SKX-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) @@ -131,9 +131,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vprold $1, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprold $31, %zmm0, %zmm2 ; KNL-NEXT: vprold $30, %zmm0, %zmm0 +; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -141,9 +141,9 @@ ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vprold $1, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprold $31, %zmm0, %zmm2 ; SKX-NEXT: vprold $30, %zmm0, %zmm0 +; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; SKX-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) @@ -159,9 +159,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vprolq $62, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprolq $1, %zmm0, %zmm2 ; KNL-NEXT: vprolq $63, %zmm0, %zmm0 +; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -169,9 +169,9 @@ ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vprolq $62, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprolq $1, %zmm0, %zmm2 ; SKX-NEXT: vprolq $63, %zmm0, %zmm0 +; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; SKX-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) @@ -187,9 +187,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vprord $1, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprord $31, %zmm0, %zmm2 ; KNL-NEXT: vprord $30, %zmm0, %zmm0 +; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -197,9 +197,9 @@ ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vprord $1, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprord $31, %zmm0, %zmm2 ; SKX-NEXT: vprord $30, %zmm0, %zmm0 +; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 {%k1} ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; SKX-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) @@ -215,9 +215,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vprorq $62, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprorq $1, %zmm0, %zmm2 ; KNL-NEXT: vprorq $63, %zmm0, %zmm0 +; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -225,9 +225,9 @@ ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vprorq $62, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprorq $1, %zmm0, %zmm2 ; SKX-NEXT: vprorq $63, %zmm0, %zmm0 +; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 {%k1} ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; SKX-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -52,9 +52,8 @@ ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1] -; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] ; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] +; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfc,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: @@ -62,9 +61,8 @@ ; X64-NEXT: vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf] ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] ; X64-NEXT: vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7] -; X64-NEXT: vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7] -; X64-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] ; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] +; X64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfc,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask) @@ -81,9 +79,8 @@ ; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x4c,0x24,0x02] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd1] -; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: @@ -91,9 +88,8 @@ ; X64-NEXT: vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7] -; X64-NEXT: vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7] -; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) @@ -1095,9 +1091,10 @@ ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] ; X86-NEXT: vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpaddw %zmm2, %zmm1, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xca] +; X86-NEXT: vpsrlw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x05] +; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0xfd,0xc8] +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512: @@ -1105,9 +1102,10 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] ; X64-NEXT: vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X64-NEXT: vpaddw %zmm2, %zmm1, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xca] +; X64-NEXT: vpsrlw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x05] +; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0xfd,0xc8] +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 4, <32 x i16> %x2, i32 -1) @@ -1169,9 +1167,9 @@ ; X86: # %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x04] ; X86-NEXT: vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X86-NEXT: vpaddw %zmm2, %zmm1, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0xfd,0xca] ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -1179,9 +1177,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x04] ; X64-NEXT: vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X64-NEXT: vpaddw %zmm2, %zmm1, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0xfd,0xca] ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) @@ -1244,9 +1242,9 @@ ; X86: # %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x04] ; X86-NEXT: vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X86-NEXT: vpaddw %zmm2, %zmm1, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0xfd,0xca] ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -1254,9 +1252,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x04] ; X64-NEXT: vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X64-NEXT: vpaddw %zmm2, %zmm1, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0xfd,0xca] ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) @@ -3008,9 +3006,9 @@ ; X86: # %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] -; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] +; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x03] ; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x49,0xfd,0xd3] ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -3018,9 +3016,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] -; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] +; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x03] ; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x49,0xfd,0xd3] ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1243,9 +1243,9 @@ ; X86: # %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] -; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] +; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x03] ; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x49,0xfd,0xd3] ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -1253,9 +1253,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] -; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] +; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x03] ; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x49,0xfd,0xd3] ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -10,9 +10,8 @@ ; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x4c,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd1] -; X86-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfc,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: @@ -20,9 +19,8 @@ ; X64-NEXT: vpbroadcastb %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpbroadcastb %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] -; X64-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7] -; X64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfc,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask) @@ -42,9 +40,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd1] -; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: @@ -52,9 +49,8 @@ ; X64-NEXT: vpbroadcastw %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpbroadcastw %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] -; X64-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7] -; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask) @@ -73,9 +69,8 @@ ; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x4c,0x24,0x04] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd1] -; X86-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] +; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfc,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: @@ -83,9 +78,8 @@ ; X64-NEXT: vpbroadcastb %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpbroadcastb %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] -; X64-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7] -; X64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] +; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfc,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask) @@ -105,9 +99,8 @@ ; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x4c,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd1] -; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: @@ -115,9 +108,8 @@ ; X64-NEXT: vpbroadcastw %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpbroadcastw %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] -; X64-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7] -; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask) @@ -3158,9 +3150,10 @@ ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] ; X86-NEXT: vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca] +; X86-NEXT: vpsrlw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x05] +; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfd,0xc8] +; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128: @@ -3168,9 +3161,10 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] ; X64-NEXT: vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca] +; X64-NEXT: vpsrlw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x05] +; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfd,0xc8] +; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 4, <8 x i16> %x2, i8 -1) @@ -3188,9 +3182,10 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] ; X86-NEXT: vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpaddw %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xca] +; X86-NEXT: vpsrlw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x05] +; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfd,0xc8] +; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256: @@ -3198,9 +3193,10 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] ; X64-NEXT: vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xca] +; X64-NEXT: vpsrlw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x05] +; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfd,0xc8] +; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 4, <16 x i16> %x2, i16 -1) @@ -3218,9 +3214,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x04] ; X86-NEXT: vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfd,0xca] ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -3228,9 +3224,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x04] ; X64-NEXT: vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfd,0xca] ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) @@ -3248,9 +3244,9 @@ ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x04] ; X86-NEXT: vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X86-NEXT: vpaddw %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfd,0xca] ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -3258,9 +3254,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x04] ; X64-NEXT: vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfd,0xca] ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) @@ -3279,9 +3275,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x04] ; X86-NEXT: vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfd,0xca] ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -3289,9 +3285,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x04] ; X64-NEXT: vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfd,0xca] ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) @@ -3309,9 +3305,9 @@ ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x04] ; X86-NEXT: vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X86-NEXT: vpaddw %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfd,0xca] ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -3319,9 +3315,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x04] ; X64-NEXT: vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfd,0xca] ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) @@ -6931,20 +6927,20 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] -; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] +; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x03] ; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfd,0xd3] +; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] -; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] +; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x03] ; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfd,0xd3] +; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3, <8 x i16> zeroinitializer, i8 %x4) @@ -6961,9 +6957,9 @@ ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] -; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] +; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x03] ; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x29,0xfd,0xd3] ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -6971,9 +6967,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] -; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] +; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x03] ; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x29,0xfd,0xd3] ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1996,20 +1996,20 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] -; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] +; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x03] ; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfd,0xd3] +; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] -; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] +; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x03] ; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfd,0xd3] +; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) %2 = bitcast i8 %x4 to <8 x i1> @@ -2030,9 +2030,9 @@ ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] -; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] +; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x03] ; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x29,0xfd,0xd3] ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -2040,9 +2040,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] -; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] +; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x03] ; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x29,0xfd,0xd3] ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll @@ -675,8 +675,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshrdvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x18] -; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: @@ -684,8 +685,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshrdvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x1f] -; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) @@ -714,8 +716,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshrdvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x18] -; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2] -; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] +; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xc2] +; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x49,0xd4,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: @@ -723,8 +726,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshrdvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x1f] -; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2] -; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] +; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xc2] +; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x49,0xd4,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i64>, <8 x i64>* %x2p %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) @@ -783,8 +787,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshldvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x18] -; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: @@ -792,8 +797,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshldvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x1f] -; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %res = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) @@ -822,8 +828,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshldvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x18] -; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2] -; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] +; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xc2] +; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x49,0xd4,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: @@ -831,8 +838,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshldvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x1f] -; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2] -; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] +; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xc2] +; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x49,0xd4,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i64>, <8 x i64>* %x2p %res = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) @@ -860,8 +868,9 @@ ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshldvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x18] -; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2] +; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfd,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: @@ -869,8 +878,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshldvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x1f] -; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2] +; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfd,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <32 x i16>, <32 x i16>* %x2p %res = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll @@ -520,8 +520,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshrdvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x18] -; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: @@ -529,8 +530,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshrdvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x1f] -; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) @@ -551,8 +553,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshrdvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x18] -; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2] -; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] +; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xc2] +; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x49,0xd4,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: @@ -560,8 +563,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshrdvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x1f] -; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2] -; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] +; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xc2] +; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x49,0xd4,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i64>, <8 x i64>* %x2p %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2) @@ -581,8 +585,9 @@ ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshrdvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x18] -; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xc2] +; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfd,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: @@ -590,8 +595,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshrdvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x1f] -; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xc2] +; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfd,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <32 x i16>, <32 x i16>* %x2p %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) @@ -611,8 +617,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshldvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x18] -; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: @@ -620,8 +627,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshldvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x1f] -; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) @@ -642,8 +650,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshldvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x18] -; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2] -; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] +; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xc2] +; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x49,0xd4,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: @@ -651,8 +660,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshldvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x1f] -; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2] -; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] +; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xc2] +; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x49,0xd4,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i64>, <8 x i64>* %x2p %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) @@ -672,8 +682,9 @@ ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpshldvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x18] -; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2] +; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfd,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: @@ -681,8 +692,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpshldvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x1f] -; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2] +; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfd,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <32 x i16>, <32 x i16>* %x2p %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) diff --git a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll @@ -732,9 +732,10 @@ ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] ; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] -; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xc1,0x18] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128: @@ -742,9 +743,10 @@ ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] ; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] -; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xc1,0x18] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1) @@ -881,9 +883,10 @@ ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] ; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] -; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xc1,0x18] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: @@ -891,9 +894,10 @@ ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] ; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] -; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xc1,0x18] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1) @@ -1030,9 +1034,10 @@ ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] ; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] -; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xc1,0x18] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2: @@ -1040,9 +1045,10 @@ ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] ; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] -; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xc1,0x18] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) %2 = bitcast i8 %x4 to <8 x i1> @@ -1197,9 +1203,10 @@ ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] ; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] -; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xc1,0x18] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2: @@ -1207,9 +1214,10 @@ ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] ; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] -; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xc1,0x18] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) %2 = bitcast i8 %x4 to <8 x i1> @@ -1377,8 +1385,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x18] -; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: @@ -1386,8 +1395,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x1f] -; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) @@ -1416,8 +1426,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x18] -; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: @@ -1425,8 +1436,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x1f] -; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) @@ -1455,8 +1467,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x18] -; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] -; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2] +; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x29,0xd4,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: @@ -1464,8 +1477,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x1f] -; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] -; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2] +; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x29,0xd4,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i64>, <4 x i64>* %x2p %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) @@ -1494,8 +1508,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x18] -; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] -; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2] +; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x09,0xd4,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: @@ -1503,8 +1518,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x1f] -; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] -; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2] +; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x09,0xd4,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <2 x i64>, <2 x i64>* %x2p %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) @@ -1532,8 +1548,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshrdvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x18] -; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xc2] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2] +; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfd,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: @@ -1541,8 +1558,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshrdvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x1f] -; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xc2] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2] +; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfd,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i16>, <16 x i16>* %x2p %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) @@ -1571,8 +1589,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x18] -; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] +; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2] +; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfd,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: @@ -1580,8 +1599,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x1f] -; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] +; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2] +; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfd,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i16>, <8 x i16>* %x2p %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) @@ -1610,8 +1630,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x18] -; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: @@ -1619,8 +1640,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x1f] -; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) @@ -1649,8 +1671,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x18] -; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: @@ -1658,8 +1681,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x1f] -; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) @@ -1688,8 +1712,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x18] -; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] -; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2] +; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x29,0xd4,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: @@ -1697,8 +1722,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x1f] -; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] -; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2] +; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x29,0xd4,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i64>, <4 x i64>* %x2p %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) @@ -1727,8 +1753,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x18] -; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] -; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2] +; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x09,0xd4,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: @@ -1736,8 +1763,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x1f] -; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] -; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2] +; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x09,0xd4,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <2 x i64>, <2 x i64>* %x2p %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) @@ -1765,8 +1793,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x18] -; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2] +; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfd,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: @@ -1774,8 +1803,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x1f] -; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2] +; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfd,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i16>, <16 x i16>* %x2p %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) @@ -1804,8 +1834,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshldvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x18] -; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xc2] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] +; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2] +; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfd,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: @@ -1813,8 +1844,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshldvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x1f] -; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xc2] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] +; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2] +; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfd,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i16>, <8 x i16>* %x2p %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) diff --git a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll @@ -738,9 +738,10 @@ ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] ; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] -; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xc1,0x18] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128: @@ -748,9 +749,10 @@ ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] ; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] -; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xc1,0x18] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> ) %2 = bitcast i8 %x4 to <8 x i1> @@ -899,9 +901,10 @@ ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] ; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] -; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xc1,0x18] +; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: @@ -909,9 +912,10 @@ ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] ; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] -; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xd3] +; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xc1,0x18] +; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm2 {%k1} # encoding: [0x62,0xf1,0x6d,0x09,0xfe,0xd0] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> ) %2 = bitcast i8 %x4 to <8 x i1> @@ -1061,8 +1065,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x18] -; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: @@ -1070,8 +1075,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x1f] -; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2) @@ -1092,8 +1098,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x18] -; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: @@ -1101,8 +1108,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x1f] -; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2) @@ -1125,8 +1133,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x18] -; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] -; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2] +; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x29,0xd4,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: @@ -1134,8 +1143,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x1f] -; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] -; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2] +; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x29,0xd4,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i64>, <4 x i64>* %x2p %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2) @@ -1158,8 +1168,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x18] -; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] -; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2] +; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x09,0xd4,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: @@ -1167,8 +1178,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x1f] -; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] -; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2] +; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x09,0xd4,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <2 x i64>, <2 x i64>* %x2p %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2) @@ -1221,8 +1233,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x18] -; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] +; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2] +; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfd,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: @@ -1230,8 +1243,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x1f] -; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] +; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2] +; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfd,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i16>, <8 x i16>* %x2p %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) @@ -1252,8 +1266,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x18] -; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: @@ -1261,8 +1276,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x1f] -; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -1283,8 +1299,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x18] -; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: @@ -1292,8 +1309,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x1f] -; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) @@ -1316,8 +1334,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x18] -; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] -; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2] +; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x29,0xd4,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: @@ -1325,8 +1344,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x1f] -; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] -; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2] +; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x29,0xd4,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i64>, <4 x i64>* %x2p %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) @@ -1349,8 +1369,9 @@ ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x18] -; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] -; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2] +; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x09,0xd4,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: @@ -1358,8 +1379,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x1f] -; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] -; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2] +; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0xe5,0x09,0xd4,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <2 x i64>, <2 x i64>* %x2p %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) @@ -1381,8 +1403,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x18] -; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2] +; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfd,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: @@ -1390,8 +1413,9 @@ ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x1f] -; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2] +; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfd,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i16>, <16 x i16>* %x2p %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -11,9 +11,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc1] -; X86-NEXT: vmovdqa32 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd1] -; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128: @@ -21,9 +20,8 @@ ; X64-NEXT: vpbroadcastd %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xcf] ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpbroadcastd %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7] -; X64-NEXT: vpbroadcastd %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd7] -; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask) @@ -43,9 +41,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] -; X86-NEXT: vmovdqa64 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xd1] -; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; X86-NEXT: vpaddq %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd4,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128: @@ -53,9 +50,8 @@ ; X64-NEXT: vpbroadcastq %rdi, %xmm1 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xcf] ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7] -; X64-NEXT: vpbroadcastq %rdi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xd7] -; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; X64-NEXT: vpaddq %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd4,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask) @@ -75,9 +71,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc1] -; X86-NEXT: vmovdqa32 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0xd1] -; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256: @@ -85,9 +80,8 @@ ; X64-NEXT: vpbroadcastd %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xcf] ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpbroadcastd %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7] -; X64-NEXT: vpbroadcastd %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd7] -; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask) @@ -106,9 +100,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc1] -; X86-NEXT: vmovdqa64 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0xd1] -; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; X86-NEXT: vpaddq %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd4,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256: @@ -116,9 +109,8 @@ ; X64-NEXT: vpbroadcastq %rdi, %ymm1 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xcf] ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7] -; X64-NEXT: vpbroadcastq %rdi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xd7] -; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; X64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd4,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask) @@ -6859,9 +6851,10 @@ ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03] ; X86-NEXT: vpsrlq $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x04] -; X86-NEXT: vpsrlq $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x05] -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] -; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xca] +; X86-NEXT: vpsrlq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x05] +; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xc8] +; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_128: @@ -6869,9 +6862,10 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03] ; X64-NEXT: vpsrlq $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x04] -; X64-NEXT: vpsrlq $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x05] -; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] -; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xca] +; X64-NEXT: vpsrlq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x05] +; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xc8] +; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 4, <2 x i64> %x2, i8 -1) @@ -6890,9 +6884,10 @@ ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03] ; X86-NEXT: vpsrlq $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x04] -; X86-NEXT: vpsrlq $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x05] -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] -; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xca] +; X86-NEXT: vpsrlq $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x05] +; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xc8] +; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_256: @@ -6900,9 +6895,10 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03] ; X64-NEXT: vpsrlq $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x04] -; X64-NEXT: vpsrlq $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x05] -; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] -; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xca] +; X64-NEXT: vpsrlq $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x05] +; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xc8] +; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 4, <4 x i64> %x2, i8 -1) @@ -6921,9 +6917,10 @@ ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03] ; X86-NEXT: vpsrld $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x04] -; X86-NEXT: vpsrld $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x05] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] -; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] +; X86-NEXT: vpsrld $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x05] +; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xc8] +; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_128: @@ -6931,9 +6928,10 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03] ; X64-NEXT: vpsrld $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x04] -; X64-NEXT: vpsrld $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x05] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] -; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] +; X64-NEXT: vpsrld $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x05] +; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xc8] +; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 4, <4 x i32> %x2, i8 -1) @@ -6952,9 +6950,10 @@ ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03] ; X86-NEXT: vpsrld $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x04] -; X86-NEXT: vpsrld $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x05] -; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] -; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xca] +; X86-NEXT: vpsrld $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x05] +; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xc8] +; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_256: @@ -6962,9 +6961,10 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03] ; X64-NEXT: vpsrld $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x04] -; X64-NEXT: vpsrld $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x05] -; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] -; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xca] +; X64-NEXT: vpsrld $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x05] +; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xc8] +; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 4, <8 x i32> %x2, i8 -1) @@ -6982,9 +6982,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03] -; X86-NEXT: vpslld $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xf0,0x04] +; X86-NEXT: vpslld $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x04] ; X86-NEXT: vpslld $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x05] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -6992,9 +6992,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03] -; X64-NEXT: vpslld $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xf0,0x04] +; X64-NEXT: vpslld $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x04] ; X64-NEXT: vpslld $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x05] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) @@ -7013,9 +7013,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03] -; X86-NEXT: vpslld $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xf0,0x04] +; X86-NEXT: vpslld $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x04] ; X86-NEXT: vpslld $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x05] -; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -7023,9 +7023,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03] -; X64-NEXT: vpslld $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xf0,0x04] +; X64-NEXT: vpslld $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x04] ; X64-NEXT: vpslld $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x05] -; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) @@ -8528,9 +8528,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03] -; X86-NEXT: vpsraq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xe0,0x04] +; X86-NEXT: vpsraq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x04] ; X86-NEXT: vpsraq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x05] -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -8538,9 +8538,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03] -; X64-NEXT: vpsraq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xe0,0x04] +; X64-NEXT: vpsraq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x04] ; X64-NEXT: vpsraq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x05] -; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) @@ -8559,9 +8559,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03] -; X86-NEXT: vpsraq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xe0,0x04] +; X86-NEXT: vpsraq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x04] ; X86-NEXT: vpsraq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x05] -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -8569,9 +8569,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03] -; X64-NEXT: vpsraq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xe0,0x04] +; X64-NEXT: vpsraq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x04] ; X64-NEXT: vpsraq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x05] -; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) @@ -13929,9 +13929,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] -; X86-NEXT: vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04] +; X86-NEXT: vprold $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x04] ; X86-NEXT: vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -13939,9 +13939,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] -; X64-NEXT: vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04] +; X64-NEXT: vprold $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x04] ; X64-NEXT: vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) @@ -13960,9 +13960,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] -; X86-NEXT: vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04] +; X86-NEXT: vprold $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x04] ; X86-NEXT: vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05] -; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -13970,9 +13970,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] -; X64-NEXT: vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04] +; X64-NEXT: vprold $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x04] ; X64-NEXT: vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05] -; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) @@ -13991,9 +13991,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] -; X86-NEXT: vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04] +; X86-NEXT: vprolq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x04] ; X86-NEXT: vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05] -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14001,9 +14001,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] -; X64-NEXT: vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04] +; X64-NEXT: vprolq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x04] ; X64-NEXT: vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05] -; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) @@ -14022,9 +14022,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] -; X86-NEXT: vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04] +; X86-NEXT: vprolq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x04] ; X86-NEXT: vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05] -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14032,9 +14032,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] -; X64-NEXT: vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04] +; X64-NEXT: vprolq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x04] ; X64-NEXT: vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05] -; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) @@ -14241,9 +14241,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] -; X86-NEXT: vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04] +; X86-NEXT: vprord $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x04] ; X86-NEXT: vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14251,9 +14251,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] -; X64-NEXT: vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04] +; X64-NEXT: vprord $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x04] ; X64-NEXT: vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) @@ -14272,9 +14272,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] -; X86-NEXT: vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04] +; X86-NEXT: vprord $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x04] ; X86-NEXT: vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05] -; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14282,9 +14282,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] -; X64-NEXT: vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04] +; X64-NEXT: vprord $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x04] ; X64-NEXT: vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05] -; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) @@ -14303,9 +14303,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] -; X86-NEXT: vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04] +; X86-NEXT: vprorq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x04] ; X86-NEXT: vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05] -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14313,9 +14313,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] -; X64-NEXT: vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04] +; X64-NEXT: vprorq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x04] ; X64-NEXT: vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05] -; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) @@ -14334,9 +14334,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] -; X86-NEXT: vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04] +; X86-NEXT: vprorq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x04] ; X86-NEXT: vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05] -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14344,9 +14344,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] -; X64-NEXT: vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04] +; X64-NEXT: vprorq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x04] ; X64-NEXT: vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05] -; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) @@ -14575,9 +14575,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] -; X86-NEXT: vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04] +; X86-NEXT: vprold $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x04] ; X86-NEXT: vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14585,9 +14585,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] -; X64-NEXT: vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04] +; X64-NEXT: vprold $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x04] ; X64-NEXT: vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3) @@ -14612,9 +14612,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] -; X86-NEXT: vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04] +; X86-NEXT: vprold $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x04] ; X86-NEXT: vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05] -; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14622,9 +14622,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] -; X64-NEXT: vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04] +; X64-NEXT: vprold $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x04] ; X64-NEXT: vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05] -; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3) @@ -14647,9 +14647,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] -; X86-NEXT: vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04] +; X86-NEXT: vprolq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x04] ; X86-NEXT: vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05] -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14657,9 +14657,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] -; X64-NEXT: vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04] +; X64-NEXT: vprolq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x04] ; X64-NEXT: vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05] -; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3) @@ -14684,9 +14684,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] -; X86-NEXT: vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04] +; X86-NEXT: vprolq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x04] ; X86-NEXT: vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05] -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14694,9 +14694,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] -; X64-NEXT: vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04] +; X64-NEXT: vprolq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x04] ; X64-NEXT: vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05] -; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3) @@ -14931,9 +14931,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] -; X86-NEXT: vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04] +; X86-NEXT: vprord $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x04] ; X86-NEXT: vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05] -; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14941,9 +14941,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] -; X64-NEXT: vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04] +; X64-NEXT: vprord $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x04] ; X64-NEXT: vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05] -; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0xfe,0xca] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3) @@ -14968,9 +14968,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] -; X86-NEXT: vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04] +; X86-NEXT: vprord $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x04] ; X86-NEXT: vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05] -; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -14978,9 +14978,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] -; X64-NEXT: vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04] +; X64-NEXT: vprord $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x04] ; X64-NEXT: vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05] -; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0xfe,0xca] ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3) @@ -15003,9 +15003,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] -; X86-NEXT: vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04] +; X86-NEXT: vprorq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x04] ; X86-NEXT: vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05] -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -15013,9 +15013,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] -; X64-NEXT: vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04] +; X64-NEXT: vprorq $4, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x04] ; X64-NEXT: vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05] -; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0xd4,0xca] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3) @@ -15040,9 +15040,9 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] -; X86-NEXT: vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04] +; X86-NEXT: vprorq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x04] ; X86-NEXT: vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05] -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -15050,9 +15050,9 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] -; X64-NEXT: vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04] +; X64-NEXT: vprorq $4, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x04] ; X64-NEXT: vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05] -; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0xd4,0xca] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3) diff --git a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll @@ -22,8 +22,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x18] -; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: @@ -31,8 +32,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x1f] -; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) @@ -61,8 +63,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x18] -; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: @@ -70,8 +73,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x1f] -; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) @@ -100,8 +104,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x18] -; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: @@ -109,8 +114,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x1f] -; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) @@ -139,8 +145,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x18] -; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: @@ -148,8 +155,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x1f] -; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) @@ -178,8 +186,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x18] -; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: @@ -187,8 +196,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x1f] -; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) @@ -217,8 +227,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x18] -; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: @@ -226,8 +237,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x1f] -; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) @@ -257,8 +269,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x18] -; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: @@ -266,8 +279,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x1f] -; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) @@ -296,8 +310,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x18] -; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: @@ -305,8 +320,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x1f] -; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) diff --git a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll @@ -21,8 +21,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x18] -; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: @@ -30,8 +31,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x1f] -; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -63,8 +65,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x18] -; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: @@ -72,8 +75,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x1f] -; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) @@ -107,8 +111,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x18] -; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: @@ -116,8 +121,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x1f] -; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -149,8 +155,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x18] -; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: @@ -158,8 +165,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x1f] -; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) @@ -193,8 +201,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x18] -; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: @@ -202,8 +211,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x1f] -; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -235,8 +245,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x18] -; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: @@ -244,8 +255,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x1f] -; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) @@ -279,8 +291,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x18] -; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xc2] +; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: @@ -288,8 +301,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x1f] -; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xc2] +; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0xfe,0xd8] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -328,8 +342,9 @@ ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x18] -; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xc2] +; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: @@ -337,8 +352,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x1f] -; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xc2] +; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0xfe,0xd8] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll @@ -21,8 +21,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x18] -; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: @@ -30,8 +31,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x1f] -; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) @@ -59,8 +61,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x18] -; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: @@ -68,8 +71,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x1f] -; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) @@ -97,8 +101,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x18] -; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: @@ -106,8 +111,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x1f] -; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) @@ -135,8 +141,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x18] -; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: @@ -144,8 +151,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x1f] -; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll @@ -20,8 +20,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x18] -; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: @@ -29,8 +30,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x1f] -; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) @@ -61,8 +63,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x18] -; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: @@ -70,8 +73,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x1f] -; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) @@ -102,8 +106,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x18] -; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: @@ -111,8 +116,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x1f] -; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) @@ -143,8 +149,9 @@ ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x18] -; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xc2] +; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: @@ -152,8 +159,9 @@ ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x1f] -; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xc2] +; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0xfe,0xd8] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)