Index: lib/Target/X86/X86EvexToVex.cpp =================================================================== --- lib/Target/X86/X86EvexToVex.cpp +++ lib/Target/X86/X86EvexToVex.cpp @@ -132,6 +132,37 @@ EvexToVexTable[EvexOp] = VexOp; } +static bool usesExtendedRegister(const MachineInstr &MI) { + auto isHiRegIdx = [](unsigned Reg) { + // Check for XMM register with indexes between 16 - 31. + if (Reg >= X86::XMM16 && Reg <= X86::XMM31) + return true; + + // Check for YMM register with indexes between 16 - 31. + if (Reg >= X86::YMM16 && Reg <= X86::YMM31) + return true; + + return false; + }; + + // Check that operands are not ZMM regs or + // XMM/YMM regs with hi indexes between 16 - 31. + for (const MachineOperand &MO : MI.explicit_operands()) { + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + + assert (!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31)); + + if (isHiRegIdx(Reg)) + return true; + } + + return false; +} + + // For EVEX instructions that can be encoded using VEX encoding // replace them by the VEX encoding in order to reduce size. bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const { @@ -185,34 +216,37 @@ NewOpc = It->second; } - if (!NewOpc) - return false; - - auto isHiRegIdx = [](unsigned Reg) { - // Check for XMM register with indexes between 16 - 31. - if (Reg >= X86::XMM16 && Reg <= X86::XMM31) - return true; + // Special case to turn VALIGND/Q into PALIGNR if we didn't need the extended + // register class. + if (!NewOpc && (MI.getOpcode() == X86::VALIGNDZ128rri || + MI.getOpcode() == X86::VALIGNDZ128rmi || + MI.getOpcode() == X86::VALIGNQZ128rri || + MI.getOpcode() == X86::VALIGNQZ128rmi)) { + if (usesExtendedRegister(MI)) + return false; - // Check for YMM register with indexes between 16 - 31. - if (Reg >= X86::YMM16 && Reg <= X86::YMM31) - return true; + if (MI.getOpcode() == X86::VALIGNDZ128rri || + MI.getOpcode() == X86::VALIGNQZ128rri) + NewOpc = X86::VPALIGNRrri; + else + NewOpc = X86::VPALIGNRrmi; + + unsigned Scale = (MI.getOpcode() == X86::VALIGNQZ128rri || + MI.getOpcode() == X86::VALIGNQZ128rmi) ? 8 : 4; + + MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); + Imm.setImm(Imm.getImm() * Scale); + const MCInstrDesc &MCID = TII->get(NewOpc); + MI.setDesc(MCID); + MI.setAsmPrinterFlag(AC_EVEX_2_VEX); + return true; + } + if (!NewOpc) return false; - }; - // Check that operands are not ZMM regs or - // XMM/YMM regs with hi indexes between 16 - 31. - for (const MachineOperand &MO : MI.explicit_operands()) { - if (!MO.isReg()) - continue; - - unsigned Reg = MO.getReg(); - - assert (!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31)); - - if (isHiRegIdx(Reg)) - return false; - } + if (usesExtendedRegister(MI)) + return false; const MCInstrDesc &MCID = TII->get(NewOpc); MI.setDesc(MCID); Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -4689,8 +4689,8 @@ define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: valignd $2, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf3,0x7d,0x08,0x03,0xd9,0x02] -; CHECK-NEXT: ## xmm3 = xmm1[2,3],xmm0[0,1] +; CHECK-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08] +; CHECK-NEXT: ## xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: valignd $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02] ; CHECK-NEXT: ## xmm2 {%k1} = xmm1[2,3],xmm0[0,1] @@ -4730,8 +4730,8 @@ define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: valignq $1, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf3,0xfd,0x08,0x03,0xd9,0x01] -; CHECK-NEXT: ## xmm3 = xmm1[1],xmm0[0] +; CHECK-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08] +; CHECK-NEXT: ## xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01] ; CHECK-NEXT: ## xmm2 {%k1} = xmm1[1],xmm0[0]