Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -4267,15 +4267,58 @@ return 0; } -inline static bool MaskRegClassContains(unsigned Reg) { +static bool MaskRegClassContains(unsigned Reg) { return X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) || X86::VK32RegClass.contains(Reg) || X86::VK64RegClass.contains(Reg) || X86::VK1RegClass.contains(Reg); } + +static bool GRRegClassContains(unsigned Reg) { + return X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg) || + X86::GR16RegClass.contains(Reg) || + X86::GR8RegClass.contains(Reg); +} +static +unsigned copyPhysRegOpcode_AVX512_DQ(unsigned& DestReg, unsigned& SrcReg) { + if (MaskRegClassContains(SrcReg) && X86::GR8RegClass.contains(DestReg)) { + DestReg = getX86SubSuperRegister(DestReg, MVT::i32); + return X86::KMOVBrk; + } + if (MaskRegClassContains(DestReg) && X86::GR8RegClass.contains(SrcReg)) { + SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32); + return X86::KMOVBkr; + } + return 0; +} + +static +unsigned copyPhysRegOpcode_AVX512_BW(unsigned& DestReg, unsigned& SrcReg) { + if (MaskRegClassContains(SrcReg) && MaskRegClassContains(DestReg)) + return X86::KMOVQkk; + if (MaskRegClassContains(SrcReg) && X86::GR32RegClass.contains(DestReg)) + return X86::KMOVDrk; + if (MaskRegClassContains(SrcReg) && X86::GR64RegClass.contains(DestReg)) + return X86::KMOVQrk; + if (MaskRegClassContains(DestReg) && X86::GR32RegClass.contains(SrcReg)) + return X86::KMOVDkr; + if (MaskRegClassContains(DestReg) && X86::GR64RegClass.contains(SrcReg)) + return X86::KMOVQkr; + return 0; +} + static -unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { +unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg, + const X86Subtarget &Subtarget) +{ + if (Subtarget.hasDQI()) + if (auto Opc = copyPhysRegOpcode_AVX512_DQ(DestReg, SrcReg)) + return Opc; + if (Subtarget.hasBWI()) + if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg)) + return Opc; if (X86::VR128XRegClass.contains(DestReg, SrcReg) || X86::VR256XRegClass.contains(DestReg, SrcReg) || X86::VR512RegClass.contains(DestReg, SrcReg)) { @@ -4283,20 +4326,13 @@ SrcReg = get512BitSuperRegister(SrcReg); return X86::VMOVAPSZrr; } - if (MaskRegClassContains(DestReg) && - MaskRegClassContains(SrcReg)) + if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) return X86::KMOVWkk; - if (MaskRegClassContains(DestReg) && - (X86::GR32RegClass.contains(SrcReg) || - X86::GR16RegClass.contains(SrcReg) || - X86::GR8RegClass.contains(SrcReg))) { + if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) { SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32); return X86::KMOVWkr; } - if ((X86::GR32RegClass.contains(DestReg) || - X86::GR16RegClass.contains(DestReg) || - X86::GR8RegClass.contains(DestReg)) && - MaskRegClassContains(SrcReg)) { + if (GRRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) { DestReg = getX86SubSuperRegister(DestReg, MVT::i32); return X86::KMOVWrk; } @@ -4332,7 +4368,7 @@ else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; else if (HasAVX512) - Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg); + Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget); else if (X86::VR128RegClass.contains(DestReg, SrcReg)) Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; else if (X86::VR256RegClass.contains(DestReg, SrcReg)) Index: llvm/trunk/test/CodeGen/X86/avx512-gather-scatter-intrin.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ llvm/trunk/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -14,7 +14,7 @@ ; CHECK-LABEL: gather_mask_dps: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} @@ -29,7 +29,7 @@ ; CHECK-LABEL: gather_mask_dpd: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 ; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} @@ -44,7 +44,7 @@ ; CHECK-LABEL: gather_mask_qps: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} @@ -59,7 +59,7 @@ ; CHECK-LABEL: gather_mask_qpd: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} @@ -86,7 +86,7 @@ ; CHECK-LABEL: gather_mask_dd: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} @@ -101,7 +101,7 @@ ; CHECK-LABEL: gather_mask_qd: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} @@ -116,7 +116,7 @@ ; CHECK-LABEL: gather_mask_qq: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} @@ -131,7 +131,7 @@ ; CHECK-LABEL: gather_mask_dq: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 ; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} @@ -400,7 +400,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1} ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 @@ -538,7 +538,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 -; CHECK-NEXT: kmovw %k1, %k2 +; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2} ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1} ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 Index: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -322,7 +322,7 @@ define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 @@ -342,7 +342,7 @@ define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 Index: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1654,7 +1654,7 @@ define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0