Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -4842,23 +4842,23 @@ multiclass SS41I_pmovx_rm_all opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, - X86FoldableSchedWrite sched, Predicate prd> { - defm NAME : SS41I_pmovx_rrrm; + Predicate prd> { + defm NAME : SS41I_pmovx_rrrm; let Predicates = [HasAVX, prd] in defm V#NAME : SS41I_pmovx_rrrm, VEX, VEX_WIG; + VR128, VR128, WriteShuffle>, VEX, VEX_WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : SS41I_pmovx_rrrm, VEX, VEX_L, VEX_WIG; + VR256, VR128, WriteShuffle256>, VEX, VEX_L, VEX_WIG; } multiclass SS41I_pmovx_rm opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, Predicate prd> { defm PMOVSX#NAME : SS41I_pmovx_rm_all; + MemOp, MemYOp, prd>; defm PMOVZX#NAME : SS41I_pmovx_rm_all; + MemOp, MemYOp, prd>; } defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -637,19 +637,7 @@ let ResourceCycles = [1]; } def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr", - "VPBROADCASTWrr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "VPBROADCASTWrr")>; def BWWriteResGroup29 : SchedWriteRes<[BWPort01]> { let Latency = 3; @@ -1295,19 +1283,6 @@ } def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>; -def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm", - "VPMOVSXBQYrm", - "VPMOVSXBWYrm", - "VPMOVSXDQYrm", - "VPMOVSXWDYrm", - "VPMOVSXWQYrm", - "VPMOVZXWDYrm")>; - def BWWriteResGroup93 : SchedWriteRes<[BWPort01,BWPort23]> { let Latency = 8; let NumMicroOps = 2; Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -904,9 +904,6 @@ "VPBLENDWYrmi", "VPERMILPDYmi", "VPERMILPSYmi", - "VPMOVSXBDYrm", - "VPMOVSXBQYrm", - "VPMOVSXWQYrm", "VPSHUFBYrm", "VPSHUFDYmi", "VPSHUFHWYmi", @@ -1379,19 +1376,7 @@ let ResourceCycles = [1]; } def: InstRW<[HWWriteResGroup51], (instregex "VPBROADCASTBrr", - "VPBROADCASTWrr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "VPBROADCASTWrr")>; def HWWriteResGroup52 : SchedWriteRes<[HWPort1,HWPort23]> { let Latency = 9; Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -743,19 +743,7 @@ "(ADD|SUB|SUBR)_FrST0", "VPBROADCASTBrr", "VPBROADCASTWrr", - "(V?)PCMPGTQ(Y?)rr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "(V?)PCMPGTQ(Y?)rr")>; def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 3; @@ -1526,9 +1514,6 @@ "VPERMILPDYrm", "VPERMILPSYmi", "VPERMILPSYrm", - "VPMOVSXBDYrm", - "VPMOVSXBQYrm", - "VPMOVSXWQYrm", "VPSHUFBYrm", "VPSHUFDYmi", "VPSHUFHWYmi", Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -1362,54 +1362,6 @@ "VPMOVQDZ128rr", "VPMOVQDZ256rr", "VPMOVQDZrr", - "VPMOVSXBDYrr", - "VPMOVSXBDZ128rr", - "VPMOVSXBDZ256rr", - "VPMOVSXBDZrr", - "VPMOVSXBQYrr", - "VPMOVSXBQZ128rr", - "VPMOVSXBQZ256rr", - "VPMOVSXBQZrr", - "VPMOVSXBWYrr", - "VPMOVSXBWZ128rr", - "VPMOVSXBWZ256rr", - "VPMOVSXBWZrr", - "VPMOVSXDQYrr", - "VPMOVSXDQZ128rr", - "VPMOVSXDQZ256rr", - "VPMOVSXDQZrr", - "VPMOVSXWDYrr", - "VPMOVSXWDZ128rr", - "VPMOVSXWDZ256rr", - "VPMOVSXWDZrr", - "VPMOVSXWQYrr", - "VPMOVSXWQZ128rr", - "VPMOVSXWQZ256rr", - "VPMOVSXWQZrr", - "VPMOVZXBDYrr", - "VPMOVZXBDZ128rr", - "VPMOVZXBDZ256rr", - "VPMOVZXBDZrr", - "VPMOVZXBQYrr", - "VPMOVZXBQZ128rr", - "VPMOVZXBQZ256rr", - "VPMOVZXBQZrr", - "VPMOVZXBWYrr", - "VPMOVZXBWZ128rr", - "VPMOVZXBWZ256rr", - "VPMOVZXBWZrr", - "VPMOVZXDQYrr", - "VPMOVZXDQZ128rr", - "VPMOVZXDQZ256rr", - "VPMOVZXDQZrr", - "VPMOVZXWDYrr", - "VPMOVZXWDZ128rr", - "VPMOVZXWDZ256rr", - "VPMOVZXWDZrr", - "VPMOVZXWQYrr", - "VPMOVZXWQZ128rr", - "VPMOVZXWQZ256rr", - "VPMOVZXWQZrr", "VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined. "VPTESTMBZ128rr", "VPTESTMBZ256rr", @@ -2907,9 +2859,6 @@ "VPERMILPSZ256rm(b?)", "VPERMILPSZm(b?)i", "VPERMILPSZrm(b?)", - "VPMOVSXBDYrm", - "VPMOVSXBQYrm", - "VPMOVSXWQYrm", "VPSHUFBYrm", "VPSHUFBZ256rm(b?)", "VPSHUFBZrm(b?)", Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -903,15 +903,20 @@ let NumMicroOps = 2; } def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ; +def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> { + let Latency = 8; + let NumMicroOps = 2; +} def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr", "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>; def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm", "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>; -// VPMOVSX/ZX BW BD BQ DW DQ. +// VPMOVSX/ZX BW BD BQ WD WQ DQ. // y <- x. -def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>; +def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>; +def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>; def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ; def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> { Index: test/CodeGen/X86/avx2-schedule.ll =================================================================== --- test/CodeGen/X86/avx2-schedule.ll +++ test/CodeGen/X86/avx2-schedule.ll @@ -4173,7 +4173,7 @@ ; ZNVER1-LABEL: test_pmovsxbd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> @@ -4373,7 +4373,7 @@ ; ZNVER1-LABEL: test_pmovsxwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = sext <8 x i16> %a0 to <8 x i32> @@ -4422,7 +4422,7 @@ ; ZNVER1-LABEL: test_pmovsxwq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> @@ -4473,7 +4473,7 @@ ; ZNVER1-LABEL: test_pmovzxbd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.25] +; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.50] ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> @@ -4673,7 +4673,7 @@ ; ZNVER1-LABEL: test_pmovzxwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25] +; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = zext <8 x i16> %a0 to <8 x i32> @@ -4722,7 +4722,7 @@ ; ZNVER1-LABEL: test_pmovzxwq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25] +; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> Index: test/tools/llvm-mca/X86/Znver1/resources-avx2.s =================================================================== --- test/tools/llvm-mca/X86/Znver1/resources-avx2.s +++ test/tools/llvm-mca/X86/Znver1/resources-avx2.s @@ -620,30 +620,30 @@ # CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 2 2.00 vpmovmskb %ymm0, %ecx -# CHECK-NEXT: 1 1 0.25 vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxbd (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxbq (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxbw (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxdq (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxwd (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovsxwq (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxbd (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxbq (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxbw (%rax), %ymm2 # CHECK-NEXT: 2 1 0.50 vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxdq (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxwd (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.25 vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: 2 1 0.50 vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: 2 8 0.50 * vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: 1 4 1.00 vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 11 1.00 * vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vpmulhrsw %ymm0, %ymm1, %ymm2 @@ -773,7 +773,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 63.50 63.50 - - - - - 77.17 70.67 80.00 47.17 - +# CHECK-NEXT: 63.50 63.50 - - - - - 72.67 75.17 84.50 42.67 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -940,30 +940,30 @@ # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpminuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - 2.00 - - vpmovmskb %ymm0, %ecx -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxdq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovsxwq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxdq (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - vpmulhrsw %ymm0, %ymm1, %ymm2