Index: ../lib/Target/X86/X86InstrInfo.cpp =================================================================== --- ../lib/Target/X86/X86InstrInfo.cpp +++ ../lib/Target/X86/X86InstrInfo.cpp @@ -5279,6 +5279,20 @@ return true; } +/// Expand a single-def pseudo instruction to a two-addr +/// instruction with two %k0 reads. +/// This is used for mapping: +/// %k4 = K_SET1 +/// to: +/// %k4 = KXNORrr %k0, %k0 +static bool Expand2AddrKreg(MachineInstrBuilder &MIB, + const MCInstrDesc &Desc, unsigned Reg) { + assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); + MIB->setDesc(Desc); + MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); + return true; +} + static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne) { MachineBasicBlock &MBB = *MIB->getParent(); @@ -5400,14 +5414,22 @@ case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; + + // KNL does not recognize dependency-breaking idioms for mask registers, + // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1. + // Using %k0 as the undef input register is a performance heuristic based + // on the assumption that %k0 is used less frequently than the other mask + // registers, since it is not usable as a write mask. + // FIXME: A more advanced approach would be to choose the best input mask + // register based on context. case X86::KSET0B: - case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr)); - case X86::KSET0D: return Expand2AddrUndef(MIB, get(X86::KXORDrr)); - case X86::KSET0Q: return Expand2AddrUndef(MIB, get(X86::KXORQrr)); + case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0); + case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0); + case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0); case X86::KSET1B: - case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr)); - case X86::KSET1D: return Expand2AddrUndef(MIB, get(X86::KXNORDrr)); - case X86::KSET1Q: return Expand2AddrUndef(MIB, get(X86::KXNORQrr)); + case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0); + case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0); + case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0); case TargetOpcode::LOAD_STACK_GUARD: expandLoadStackGuard(MIB, *this); return true; Index: ../test/CodeGen/X86/avx512-gather-scatter-intrin.ll =================================================================== --- ../test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ ../test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -240,8 +240,8 @@ define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_qps: ; CHECK: ## BB#0: -; CHECK-NEXT: kxnorw %k1, %k1, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1} @@ -257,7 +257,7 @@ define void @prefetch(<8 x i64> %ind, i8* %base) { ; CHECK-LABEL: prefetch: ; CHECK: ## BB#0: -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1} ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1} @@ -279,7 +279,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -312,7 +312,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1} ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -330,7 +330,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1} ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -348,7 +348,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -364,7 +364,7 @@ ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1} @@ -384,7 +384,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -420,7 +420,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -453,7 +453,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1} ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -486,7 +486,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -502,7 +502,7 @@ ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1} @@ -522,7 +522,7 @@ ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1} ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -556,7 +556,7 @@ ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -572,7 +572,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2) @@ -587,7 +587,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2) @@ -602,7 +602,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2) @@ -617,7 +617,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2) @@ -631,7 +631,7 @@ ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -647,7 +647,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2) @@ -662,7 +662,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2) @@ -676,7 +676,7 @@ ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -691,7 +691,7 @@ ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -707,7 +707,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2) @@ -721,7 +721,7 @@ ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -737,7 +737,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2) @@ -752,7 +752,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2) @@ -767,7 +767,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2) @@ -782,7 +782,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2) Index: ../test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- ../test/CodeGen/X86/avx512-mask-op.ll +++ ../test/CodeGen/X86/avx512-mask-op.ll @@ -323,7 +323,7 @@ } ; SKX-LABEL: test16 -; SKX: kxnorw %k1, %k1, %k1 +; SKX: kxnorw %k0, %k0, %k1 ; SKX: kshiftrw $15, %k1, %k1 ; SKX: kshiftlq $5, %k1, %k1 ; SKX: korq %k1, %k0, %k0 Index: ../test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- ../test/CodeGen/X86/masked_gather_scatter.ll +++ ../test/CodeGen/X86/masked_gather_scatter.ll @@ -20,7 +20,7 @@ define <16 x float> @test1(float* %base, <16 x i32> %ind) { ; KNL_64-LABEL: test1: ; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; KNL_64-NEXT: vmovaps %zmm1, %zmm0 ; KNL_64-NEXT: retq @@ -28,14 +28,14 @@ ; KNL_32-LABEL: test1: ; KNL_32: # BB#0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} ; KNL_32-NEXT: vmovaps %zmm1, %zmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test1: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq @@ -243,8 +243,8 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { ; KNL_64-LABEL: test6: ; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 -; KNL_64-NEXT: kxnorw %k2, %k2, %k2 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k2 ; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vmovaps %zmm2, %zmm0 @@ -252,9 +252,9 @@ ; ; KNL_32-LABEL: test6: ; KNL_32: # BB#0: -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm2 -; KNL_32-NEXT: kxnorw %k2, %k2, %k2 +; KNL_32-NEXT: kxnorw %k0, %k0, %k2 ; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2} ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1} ; KNL_32-NEXT: vmovaps %zmm1, %zmm0 @@ -262,8 +262,8 @@ ; ; SKX-LABEL: test6: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 -; SKX-NEXT: kxnorw %k2, %k2, %k2 +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k2 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} ; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; SKX-NEXT: vmovaps %zmm2, %zmm0 @@ -409,7 +409,7 @@ ; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_64-NEXT: retq ; @@ -426,7 +426,7 @@ ; KNL_32-NEXT: vpbroadcastd .LCPI8_2, %ymm1 ; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_32-NEXT: retl ; @@ -439,7 +439,7 @@ ; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -471,7 +471,7 @@ ; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_64-NEXT: retq ; @@ -488,7 +488,7 @@ ; KNL_32-NEXT: vpbroadcastd .LCPI9_2, %ymm1 ; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_32-NEXT: retl ; @@ -501,7 +501,7 @@ ; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -518,7 +518,7 @@ ; KNL_64-LABEL: test11: ; KNL_64: # BB#0: ; KNL_64-NEXT: vpbroadcastd %esi, %zmm1 -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} ; KNL_64-NEXT: retq ; @@ -526,14 +526,14 @@ ; KNL_32: # BB#0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1 -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} ; KNL_32-NEXT: retl ; ; SKX-LABEL: test11: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastd %esi, %zmm1 -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} ; SKX-NEXT: retq @@ -550,7 +550,7 @@ define <16 x float> @test12(float* %base, <16 x i32> %ind) { ; KNL_64-LABEL: test12: ; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; KNL_64-NEXT: vmovaps %zmm1, %zmm0 ; KNL_64-NEXT: retq @@ -558,14 +558,14 @@ ; KNL_32-LABEL: test12: ; KNL_32: # BB#0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} ; KNL_32-NEXT: vmovaps %zmm1, %zmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test12: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq @@ -1064,7 +1064,7 @@ ; ; SKX-LABEL: test24: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq @@ -1133,7 +1133,7 @@ ; ; SKX-LABEL: test26: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq @@ -1404,8 +1404,8 @@ define <16 x float*> @test31(<16 x float**> %ptrs) { ; KNL_64-LABEL: test31: ; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 -; KNL_64-NEXT: kxnorw %k2, %k2, %k2 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k2 ; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} ; KNL_64-NEXT: kshiftrw $8, %k1, %k1 ; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} @@ -1415,15 +1415,15 @@ ; ; KNL_32-LABEL: test31: ; KNL_32: # BB#0: -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} ; KNL_32-NEXT: vmovaps %zmm1, %zmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test31: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 -; SKX-NEXT: kxnorw %k2, %k2, %k2 +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k2 ; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} ; SKX-NEXT: kshiftrw $8, %k1, %k1 ; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} @@ -1433,7 +1433,7 @@ ; ; SKX_32-LABEL: test31: ; SKX_32: # BB#0: -; SKX_32-NEXT: kxnorw %k1, %k1, %k1 +; SKX_32-NEXT: kxnorw %k0, %k0, %k1 ; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} ; SKX_32-NEXT: vmovaps %zmm1, %zmm0 ; SKX_32-NEXT: retl