Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -1925,7 +1925,12 @@ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); unsigned Size = TRI->getSpillSize(*RC); unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment @@ -1992,9 +1997,15 @@ unsigned Reg = CSI[i-1].getReg(); if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; + + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, TRI); @@ -2068,7 +2079,12 @@ X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); } Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -6919,8 +6919,10 @@ (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); if (X86::RFP32RegClass.hasSubClassEq(RC)) return load ? X86::LD_Fp32m : X86::ST_Fp32m; - if (X86::VK32RegClass.hasSubClassEq(RC)) + if (X86::VK32RegClass.hasSubClassEq(RC)) { + assert(STI.hasBWI() && "KMOVD requires BWI"); return load ? X86::KMOVDkm : X86::KMOVDmk; + } llvm_unreachable("Unknown 4-byte regclass"); case 8: if (X86::GR64RegClass.hasSubClassEq(RC)) @@ -6933,8 +6935,10 @@ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; if (X86::RFP64RegClass.hasSubClassEq(RC)) return load ? X86::LD_Fp64m : X86::ST_Fp64m; - if (X86::VK64RegClass.hasSubClassEq(RC)) + if (X86::VK64RegClass.hasSubClassEq(RC)) { + assert(STI.hasBWI() && "KMOVQ requires BWI"); return load ? X86::KMOVQkm : X86::KMOVQmk; + } llvm_unreachable("Unknown 8-byte regclass"); case 10: assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); Index: test/CodeGen/X86/avx512-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx512-intel-ocl.ll +++ test/CodeGen/X86/avx512-intel-ocl.ll @@ -69,10 +69,10 @@ ; WIN64: vmovaps {{.*(%rbp).*}}, %zmm21 # 64-byte Reload ; X64-LABEL: test_prolog_epilog -; X64: kmovq %k7, {{.*}}(%rsp) ## 8-byte Spill -; X64: kmovq %k6, {{.*}}(%rsp) ## 8-byte Spill -; X64: kmovq %k5, {{.*}}(%rsp) ## 8-byte Spill -; X64: kmovq %k4, {{.*}}(%rsp) ## 8-byte Spill +; X64: kmovw %k7, {{.*}}(%rsp) ## 2-byte Spill +; X64: kmovw %k6, {{.*}}(%rsp) ## 2-byte Spill +; X64: kmovw %k5, {{.*}}(%rsp) ## 2-byte Spill +; X64: kmovw %k4, {{.*}}(%rsp) ## 2-byte Spill ; X64: vmovups %zmm31, {{.*}}(%rsp) ## 64-byte Spill ; X64: vmovups %zmm16, {{.*}}(%rsp) ## 64-byte Spill ; X64: call Index: test/CodeGen/X86/x86-interrupt_cc.ll =================================================================== --- test/CodeGen/X86/x86-interrupt_cc.ll +++ test/CodeGen/X86/x86-interrupt_cc.ll @@ -3,21 +3,21 @@ ; Make sure we spill the high numbered zmm registers and K registers with the right encoding. ; CHECK-LABEL: foo -; CHECK: kmovq %k7, {{.+}} -; CHECK64: encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00] -; CHECK32: encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00] +; CHECK: kmovw %k7, {{.+}} +; CHECK64: encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x08,0x00,0x00] +; CHECK32: encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x02,0x00,0x00] ; k6 is used as an anchor for the previous regexp. -; CHECK-NEXT: kmovq %k6 +; CHECK-NEXT: kmovw %k6 ; CHECK64: movups %zmm31, {{.+}} -; CHECK64: encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00] +; CHECK64: encoding: [0x62,0x61,0x7c,0x48,0x11,0x7c,0x24,0x1f] ; zmm30 is used as an anchor for the previous regexp. ; CHECK64-NEXT: movups %zmm30 ; CHECK32-NOT: zmm31 ; CHECK32-NOT: zmm8 ; CHECK32: movups %zmm7, {{.+}} -; CHECK32: encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00] +; CHECK32: encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07] ; zmm6 is used as an anchor for the previous regexp. ; CHECK32-NEXT: movups %zmm6