Index: lib/Target/X86/InstPrinter/X86InstComments.cpp =================================================================== --- lib/Target/X86/InstPrinter/X86InstComments.cpp +++ lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -16,7 +16,6 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "Utils/X86ShuffleDecode.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" @@ -173,37 +172,10 @@ llvm_unreachable("Unknown vector reg!"); } -static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT, - unsigned OperandIndex) { +static unsigned getRegOperandNumElts(const MCInst *MI, unsigned ScalarSize, + unsigned OperandIndex) { unsigned OpReg = MI->getOperand(OperandIndex).getReg(); - return MVT::getVectorVT(ScalarVT, - getVectorRegSize(OpReg)/ScalarVT.getSizeInBits()); -} - -/// \brief Extracts the dst type for a given zero extension instruction. -static MVT getZeroExtensionResultType(const MCInst *MI) { - switch (MI->getOpcode()) { - default: - llvm_unreachable("Unknown zero extension instruction"); - // zero extension to i16 - CASE_PMOVZX(PMOVZXBW, m) - CASE_PMOVZX(PMOVZXBW, r) - return getRegOperandVectorVT(MI, MVT::i16, 0); - // zero extension to i32 - CASE_PMOVZX(PMOVZXBD, m) - CASE_PMOVZX(PMOVZXBD, r) - CASE_PMOVZX(PMOVZXWD, m) - CASE_PMOVZX(PMOVZXWD, r) - return getRegOperandVectorVT(MI, MVT::i32, 0); - // zero extension to i64 - CASE_PMOVZX(PMOVZXBQ, m) - CASE_PMOVZX(PMOVZXBQ, r) - CASE_PMOVZX(PMOVZXWQ, m) - CASE_PMOVZX(PMOVZXWQ, r) - CASE_PMOVZX(PMOVZXDQ, m) - CASE_PMOVZX(PMOVZXDQ, r) - return getRegOperandVectorVT(MI, MVT::i64, 0); - } + return getVectorRegSize(OpReg) / ScalarSize; } /// Wraps the destination register name with AVX512 mask/maskz filtering. @@ -262,7 +234,7 @@ case X86::VBLENDPDrmi: case X86::VBLENDPDYrmi: if (MI->getOperand(NumOperands - 1).isImm()) - DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f64, 0), + DecodeBLENDMask(getRegOperandNumElts(MI, 64, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -278,7 +250,7 @@ case X86::VBLENDPSrmi: case X86::VBLENDPSYrmi: if (MI->getOperand(NumOperands - 1).isImm()) - DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f32, 0), + DecodeBLENDMask(getRegOperandNumElts(MI, 32, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -294,7 +266,7 @@ case X86::VPBLENDWrmi: case X86::VPBLENDWYrmi: if (MI->getOperand(NumOperands - 1).isImm()) - DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i16, 0), + DecodeBLENDMask(getRegOperandNumElts(MI, 16, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -308,7 +280,7 @@ case X86::VPBLENDDrmi: case X86::VPBLENDDYrmi: if (MI->getOperand(NumOperands - 1).isImm()) - DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i32, 0), + DecodeBLENDMask(getRegOperandNumElts(MI, 32, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -353,7 +325,7 @@ case X86::VMOVHPDZ128rm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeInsertElementMask(MVT::v2f64, 1, 1, ShuffleMask); + DecodeInsertElementMask(2, 1, 1, ShuffleMask); break; case X86::MOVHPSrm: @@ -361,7 +333,7 @@ case X86::VMOVHPSZ128rm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeInsertElementMask(MVT::v4f32, 2, 2, ShuffleMask); + DecodeInsertElementMask(4, 2, 2, ShuffleMask); break; case X86::MOVLPDrm: @@ -369,7 +341,7 @@ case X86::VMOVLPDZ128rm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeInsertElementMask(MVT::v2f64, 0, 1, ShuffleMask); + DecodeInsertElementMask(2, 0, 1, ShuffleMask); break; case X86::MOVLPSrm: @@ -377,7 +349,7 @@ case X86::VMOVLPSZ128rm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeInsertElementMask(MVT::v4f32, 0, 2, ShuffleMask); + DecodeInsertElementMask(4, 0, 2, ShuffleMask); break; CASE_MOVDUP(MOVSLDUP, r) @@ -386,7 +358,7 @@ CASE_MOVDUP(MOVSLDUP, m) DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVSLDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); + DecodeMOVSLDUPMask(getRegOperandNumElts(MI, 32, 0), ShuffleMask); break; CASE_MOVDUP(MOVSHDUP, r) @@ -395,7 +367,7 @@ CASE_MOVDUP(MOVSHDUP, m) DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVSHDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); + DecodeMOVSHDUPMask(getRegOperandNumElts(MI, 32, 0), ShuffleMask); break; CASE_MOVDUP(MOVDDUP, r) @@ -404,7 +376,7 @@ CASE_MOVDUP(MOVDDUP, m) DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVDDUPMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask); + DecodeMOVDDUPMask(getRegOperandNumElts(MI, 64, 0), ShuffleMask); break; case X86::PSLLDQri: @@ -420,7 +392,7 @@ case X86::VPSLLDQZrm: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodePSLLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0), + DecodePSLLDQMask(getRegOperandNumElts(MI, 8, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -438,7 +410,7 @@ case X86::VPSRLDQZrm: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodePSRLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0), + DecodePSRLDQMask(getRegOperandNumElts(MI, 8, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -452,7 +424,7 @@ Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodePALIGNRMask(getRegOperandVectorVT(MI, MVT::i8, 0), + DecodePALIGNRMask(getRegOperandNumElts(MI, 8, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -470,7 +442,7 @@ Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodeVALIGNMask(getRegOperandVectorVT(MI, MVT::i64, 0), + DecodeVALIGNMask(getRegOperandNumElts(MI, 64, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -488,7 +460,7 @@ Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodeVALIGNMask(getRegOperandVectorVT(MI, MVT::i32, 0), + DecodeVALIGNMask(getRegOperandNumElts(MI, 32, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -500,7 +472,7 @@ CASE_SHUF(PSHUFD, mi) DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::i32, 0), + DecodePSHUFMask(getRegOperandNumElts(MI, 32, 0), 32, MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -512,7 +484,7 @@ CASE_SHUF(PSHUFHW, mi) DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodePSHUFHWMask(getRegOperandVectorVT(MI, MVT::i16, 0), + DecodePSHUFHWMask(getRegOperandNumElts(MI, 16, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -524,7 +496,7 @@ CASE_SHUF(PSHUFLW, mi) DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodePSHUFLWMask(getRegOperandVectorVT(MI, MVT::i16, 0), + DecodePSHUFLWMask(getRegOperandNumElts(MI, 16, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -536,8 +508,7 @@ case X86::MMX_PSHUFWmi: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodePSHUFMask(MVT::v4i16, - MI->getOperand(NumOperands - 1).getImm(), + DecodePSHUFMask(4, 16, MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); break; @@ -547,7 +518,7 @@ case X86::PSWAPDrm: DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSWAPMask(MVT::v2i32, ShuffleMask); + DecodePSWAPMask(2, ShuffleMask); break; CASE_UNPCK(PUNPCKHBW, r) @@ -560,7 +531,7 @@ case X86::MMX_PUNPCKHBWirm: Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask); + DecodeUNPCKHMask(getRegOperandNumElts(MI, 8, 0), 8, ShuffleMask); break; CASE_UNPCK(PUNPCKHWD, r) @@ -573,7 +544,7 @@ case X86::MMX_PUNPCKHWDirm: Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask); + DecodeUNPCKHMask(getRegOperandNumElts(MI, 16, 0), 16, ShuffleMask); break; CASE_UNPCK(PUNPCKHDQ, r) @@ -586,7 +557,7 @@ case X86::MMX_PUNPCKHDQirm: Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask); + DecodeUNPCKHMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask); break; CASE_UNPCK(PUNPCKHQDQ, r) @@ -597,7 +568,7 @@ CASE_UNPCK(PUNPCKHQDQ, m) Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask); + DecodeUNPCKHMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask); break; CASE_UNPCK(PUNPCKLBW, r) @@ -610,7 +581,7 @@ case X86::MMX_PUNPCKLBWirm: Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask); + DecodeUNPCKLMask(getRegOperandNumElts(MI, 8, 0), 8, ShuffleMask); break; CASE_UNPCK(PUNPCKLWD, r) @@ -623,7 +594,7 @@ case X86::MMX_PUNPCKLWDirm: Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask); + DecodeUNPCKLMask(getRegOperandNumElts(MI, 16, 0), 16, ShuffleMask); break; CASE_UNPCK(PUNPCKLDQ, r) @@ -636,7 +607,7 @@ case X86::MMX_PUNPCKLDQirm: Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask); + DecodeUNPCKLMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask); break; CASE_UNPCK(PUNPCKLQDQ, r) @@ -647,7 +618,7 @@ CASE_UNPCK(PUNPCKLQDQ, m) Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask); + DecodeUNPCKLMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask); break; CASE_SHUF(SHUFPD, rri) @@ -657,9 +628,8 @@ CASE_SHUF(SHUFPD, rmi) if (MI->getOperand(NumOperands - 1).isImm()) - DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0), - MI->getOperand(NumOperands - 1).getImm(), - ShuffleMask); + DecodeSHUFPMask(getRegOperandNumElts(MI, 64, 0), 64, + MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -671,7 +641,7 @@ CASE_SHUF(SHUFPS, rmi) if (MI->getOperand(NumOperands - 1).isImm()) - DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0), + DecodeSHUFPMask(getRegOperandNumElts(MI, 32, 0), 32, MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg()); @@ -684,7 +654,7 @@ LLVM_FALLTHROUGH; CASE_VSHUF(64X2, m) - decodeVSHUF64x2FamilyMask(getRegOperandVectorVT(MI, MVT::i64, 0), + decodeVSHUF64x2FamilyMask(getRegOperandNumElts(MI, 64, 0), 64, MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg()); @@ -697,7 +667,7 @@ LLVM_FALLTHROUGH; CASE_VSHUF(32X4, m) - decodeVSHUF64x2FamilyMask(getRegOperandVectorVT(MI, MVT::i32, 0), + decodeVSHUF64x2FamilyMask(getRegOperandNumElts(MI, 32, 0), 32, MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg()); @@ -710,7 +680,7 @@ LLVM_FALLTHROUGH; CASE_UNPCK(UNPCKLPD, m) - DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask); + DecodeUNPCKLMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask); Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -721,7 +691,7 @@ LLVM_FALLTHROUGH; CASE_UNPCK(UNPCKLPS, m) - DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); + DecodeUNPCKLMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask); Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -732,7 +702,7 @@ LLVM_FALLTHROUGH; CASE_UNPCK(UNPCKHPD, m) - DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask); + DecodeUNPCKHMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask); Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -743,7 +713,7 @@ LLVM_FALLTHROUGH; CASE_UNPCK(UNPCKHPS, m) - DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); + DecodeUNPCKHMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask); Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -754,7 +724,7 @@ CASE_VPERMILPI(PERMILPS, m) if (MI->getOperand(NumOperands - 1).isImm()) - DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0), + DecodePSHUFMask(getRegOperandNumElts(MI, 32, 0), 32, MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); @@ -766,7 +736,7 @@ CASE_VPERMILPI(PERMILPD, m) if (MI->getOperand(NumOperands - 1).isImm()) - DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0), + DecodePSHUFMask(getRegOperandNumElts(MI, 64, 0), 64, MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); @@ -781,8 +751,7 @@ case X86::VPERM2I128rm: // For instruction comments purpose, assume the 256-bit vector is v4i64. if (MI->getOperand(NumOperands - 1).isImm()) - DecodeVPERM2X128Mask(MVT::v4i64, - MI->getOperand(NumOperands - 1).getImm(), + DecodeVPERM2X128Mask(4, MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -794,7 +763,7 @@ CASE_VPERM(PERMPD, m) if (MI->getOperand(NumOperands - 1).isImm()) - DecodeVPERMMask(getRegOperandVectorVT(MI, MVT::f64, 0), + DecodeVPERMMask(getRegOperandNumElts(MI, 64, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); @@ -806,7 +775,7 @@ CASE_VPERM(PERMQ, m) if (MI->getOperand(NumOperands - 1).isImm()) - DecodeVPERMMask(getRegOperandVectorVT(MI, MVT::i64, 0), + DecodeVPERMMask(getRegOperandNumElts(MI, 64, 0), MI->getOperand(NumOperands - 1).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); @@ -822,7 +791,7 @@ case X86::MOVSDrm: case X86::VMOVSDrm: case X86::VMOVSDZrm: - DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask); + DecodeScalarMoveMask(2, nullptr == Src2Name, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -836,7 +805,7 @@ case X86::MOVSSrm: case X86::VMOVSSrm: case X86::VMOVSSZrm: - DecodeScalarMoveMask(MVT::v4f32, nullptr == Src2Name, ShuffleMask); + DecodeScalarMoveMask(4, nullptr == Src2Name, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -852,23 +821,22 @@ case X86::MOVQI2PQIrm: case X86::VMOVQI2PQIrm: case X86::VMOVQI2PQIZrm: - DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask); + DecodeZeroMoveLowMask(2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::MOVDI2PDIrm: case X86::VMOVDI2PDIrm: case X86::VMOVDI2PDIZrm: - DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask); + DecodeZeroMoveLowMask(4, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::EXTRQI: if (MI->getOperand(2).isImm() && MI->getOperand(3).isImm()) - DecodeEXTRQIMask(MVT::v16i8, MI->getOperand(2).getImm(), - MI->getOperand(3).getImm(), - ShuffleMask); + DecodeEXTRQIMask(16, 8, MI->getOperand(2).getImm(), + MI->getOperand(3).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -877,9 +845,8 @@ case X86::INSERTQI: if (MI->getOperand(3).isImm() && MI->getOperand(4).isImm()) - DecodeINSERTQIMask(MVT::v16i8, MI->getOperand(3).getImm(), - MI->getOperand(4).getImm(), - ShuffleMask); + DecodeINSERTQIMask(16, 8, MI->getOperand(3).getImm(), + MI->getOperand(4).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -890,39 +857,39 @@ case X86::VBROADCASTI128: CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm) CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm) - DecodeSubVectorBroadcast(MVT::v4f64, MVT::v2f64, ShuffleMask); + DecodeSubVectorBroadcast(4, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm) CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm) - DecodeSubVectorBroadcast(MVT::v8f64, MVT::v2f64, ShuffleMask); + DecodeSubVectorBroadcast(8, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm) CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm) - DecodeSubVectorBroadcast(MVT::v8f64, MVT::v4f64, ShuffleMask); + DecodeSubVectorBroadcast(8, 4, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z256, rm) CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z256, rm) - DecodeSubVectorBroadcast(MVT::v8f32, MVT::v4f32, ShuffleMask); + DecodeSubVectorBroadcast(8, 4, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm) CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm) - DecodeSubVectorBroadcast(MVT::v16f32, MVT::v4f32, ShuffleMask); + DecodeSubVectorBroadcast(16, 4, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm) CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm) - DecodeSubVectorBroadcast(MVT::v16f32, MVT::v8f32, ShuffleMask); + DecodeSubVectorBroadcast(16, 8, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, m) - DecodeSubVectorBroadcast(MVT::v4f32, MVT::v2f32, ShuffleMask); + DecodeSubVectorBroadcast(4, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r) @@ -931,7 +898,7 @@ LLVM_FALLTHROUGH; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m) - DecodeSubVectorBroadcast(MVT::v8f32, MVT::v2f32, ShuffleMask); + DecodeSubVectorBroadcast(8, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r) @@ -940,40 +907,55 @@ LLVM_FALLTHROUGH; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m) - DecodeSubVectorBroadcast(MVT::v16f32, MVT::v2f32, ShuffleMask); + DecodeSubVectorBroadcast(16, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_PMOVZX(PMOVZXBW, r) - CASE_PMOVZX(PMOVZXBD, r) - CASE_PMOVZX(PMOVZXBQ, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; - CASE_PMOVZX(PMOVZXBW, m) + DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + + CASE_PMOVZX(PMOVZXBD, r) + Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXBD, m) + DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + + CASE_PMOVZX(PMOVZXBQ, r) + Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXBQ, m) - DecodeZeroExtendMask(MVT::i8, getZeroExtensionResultType(MI), ShuffleMask); + DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_PMOVZX(PMOVZXWD, r) - CASE_PMOVZX(PMOVZXWQ, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; - CASE_PMOVZX(PMOVZXWD, m) + DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + + CASE_PMOVZX(PMOVZXWQ, r) + Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; CASE_PMOVZX(PMOVZXWQ, m) - DecodeZeroExtendMask(MVT::i16, getZeroExtensionResultType(MI), ShuffleMask); + DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; CASE_PMOVZX(PMOVZXDQ, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); LLVM_FALLTHROUGH; - CASE_PMOVZX(PMOVZXDQ, m) - DecodeZeroExtendMask(MVT::i32, getZeroExtensionResultType(MI), ShuffleMask); + DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; } Index: lib/Target/X86/Utils/X86ShuffleDecode.h =================================================================== --- lib/Target/X86/Utils/X86ShuffleDecode.h +++ lib/Target/X86/Utils/X86ShuffleDecode.h @@ -23,7 +23,6 @@ namespace llvm { template class ArrayRef; -class MVT; enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 }; @@ -32,7 +31,7 @@ // Insert the bottom Len elements from a second source into a vector starting at // element Idx. -void DecodeInsertElementMask(MVT VT, unsigned Idx, unsigned Len, +void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, SmallVectorImpl &ShuffleMask); /// Decode a MOVHLPS instruction as a v2f64/v4f32 shuffle mask. @@ -43,58 +42,68 @@ /// i.e. <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); -void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl &ShuffleMask); +void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl &ShuffleMask); -void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl &ShuffleMask); +void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl &ShuffleMask); -void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl &ShuffleMask); +void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl &ShuffleMask); -void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask); -void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask); -void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask); -void DecodeVALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask); /// Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, + SmallVectorImpl &ShuffleMask); /// Decodes the shuffle masks for pshufhw. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask); /// Decodes the shuffle masks for pshuflw. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodePSHUFLWMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask); /// Decodes a PSWAPD 3DNow! instruction. -void DecodePSWAPMask(MVT VT, SmallVectorImpl &ShuffleMask); +void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl &ShuffleMask); /// Decodes the shuffle masks for shufp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, + SmallVectorImpl &ShuffleMask); /// Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask); +void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, + SmallVectorImpl &ShuffleMask); /// Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask); +void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, + SmallVectorImpl &ShuffleMask); /// Decodes a broadcast of the first element of a vector. -void DecodeVectorBroadcast(MVT DstVT, SmallVectorImpl &ShuffleMask); +void DecodeVectorBroadcast(unsigned NumElts, SmallVectorImpl &ShuffleMask); /// Decodes a broadcast of a subvector to a larger vector type. -void DecodeSubVectorBroadcast(MVT DstVT, MVT SrcVT, +void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, SmallVectorImpl &ShuffleMask); /// Decode a PSHUFB mask from a raw array of constants such as from @@ -103,18 +112,20 @@ SmallVectorImpl &ShuffleMask); /// Decode a BLEND immediate mask into a shuffle mask. -void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodeBLENDMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask); -void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, +void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, SmallVectorImpl &ShuffleMask); /// Decode a shuffle packed values at 128-bit granularity /// immediate mask into a shuffle mask. -void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm, - SmallVectorImpl &ShuffleMask); +void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, + unsigned Imm, SmallVectorImpl &ShuffleMask); /// Decodes the shuffle masks for VPERMQ/VPERMPD. -void DecodeVPERMMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodeVPERMMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask); /// Decode a VPPERM mask from a raw array of constants such as from /// BUILD_VECTOR. @@ -124,30 +135,33 @@ SmallVectorImpl &ShuffleMask); /// Decode a zero extension instruction as a shuffle mask. -void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, +void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, + unsigned NumDstElts, SmallVectorImpl &ShuffleMask); /// Decode a move lower and zero upper instruction as a shuffle mask. -void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl &ShuffleMask); +void DecodeZeroMoveLowMask(unsigned NumElts, SmallVectorImpl &ShuffleMask); /// Decode a scalar float move instruction as a shuffle mask. -void DecodeScalarMoveMask(MVT VT, bool IsLoad, +void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, SmallVectorImpl &ShuffleMask); /// Decode a SSE4A EXTRQ instruction as a shuffle mask. -void DecodeEXTRQIMask(MVT VT, int Len, int Idx, +void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl &ShuffleMask); /// Decode a SSE4A INSERTQ instruction as a shuffle mask. -void DecodeINSERTQIMask(MVT VT, int Len, int Idx, +void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl &ShuffleMask); /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. -void DecodeVPERMILPMask(MVT VT, ArrayRef RawMask, +void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, + ArrayRef RawMask, SmallVectorImpl &ShuffleMask); /// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants. -void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef RawMask, +void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, + ArrayRef RawMask, SmallVectorImpl &ShuffleMask); /// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants. Index: lib/Target/X86/Utils/X86ShuffleDecode.cpp =================================================================== --- lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -14,7 +14,6 @@ #include "X86ShuffleDecode.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/CodeGen/MachineValueType.h" //===----------------------------------------------------------------------===// // Vector Mask Decoding @@ -45,9 +44,8 @@ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; } -void DecodeInsertElementMask(MVT VT, unsigned Idx, unsigned Len, +void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); assert((Idx + Len) <= NumElts && "Insertion out of range"); for (unsigned i = 0; i != NumElts; ++i) @@ -74,41 +72,31 @@ ShuffleMask.push_back(NElts + i); } -void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); +void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl &ShuffleMask) { for (int i = 0, e = NumElts / 2; i < e; ++i) { ShuffleMask.push_back(2 * i); ShuffleMask.push_back(2 * i); } } -void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); +void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl &ShuffleMask) { for (int i = 0, e = NumElts / 2; i < e; ++i) { ShuffleMask.push_back(2 * i + 1); ShuffleMask.push_back(2 * i + 1); } } -void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl &ShuffleMask) { - unsigned VectorSizeInBits = VT.getSizeInBits(); - unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned NumLanes = VectorSizeInBits / 128; - unsigned NumLaneElts = NumElts / NumLanes; - unsigned NumLaneSubElts = 64 / ScalarSizeInBits; +void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl &ShuffleMask) { + const unsigned NumLaneElts = 2; for (unsigned l = 0; l < NumElts; l += NumLaneElts) - for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts) - for (unsigned s = 0; s != NumLaneSubElts; s++) - ShuffleMask.push_back(l + s); + for (unsigned i = 0; i < NumLaneElts; ++i) + ShuffleMask.push_back(l); } -void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { - unsigned VectorSizeInBits = VT.getSizeInBits(); - unsigned NumElts = VectorSizeInBits / 8; - unsigned NumLanes = VectorSizeInBits / 128; - unsigned NumLaneElts = NumElts / NumLanes; +void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + const unsigned NumLaneElts = 16; for (unsigned l = 0; l < NumElts; l += NumLaneElts) for (unsigned i = 0; i < NumLaneElts; ++i) { @@ -118,11 +106,9 @@ } } -void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { - unsigned VectorSizeInBits = VT.getSizeInBits(); - unsigned NumElts = VectorSizeInBits / 8; - unsigned NumLanes = VectorSizeInBits / 128; - unsigned NumLaneElts = NumElts / NumLanes; +void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + const unsigned NumLaneElts = 16; for (unsigned l = 0; l < NumElts; l += NumLaneElts) for (unsigned i = 0; i < NumLaneElts; ++i) { @@ -133,41 +119,36 @@ } } -void DecodePALIGNRMask(MVT VT, unsigned Imm, +void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); - unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8); - - unsigned NumLanes = VT.getSizeInBits() / 128; - unsigned NumLaneElts = NumElts / NumLanes; + const unsigned NumLaneElts = 16; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { - unsigned Base = i + Offset; - // if i+offset is out of this lane then we actually need the other source + unsigned Base = i + Imm; + // if i+imm is out of this lane then we actually need the other source if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; ShuffleMask.push_back(Base + l); } } } -void DecodeVALIGNMask(MVT VT, unsigned Imm, +void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, SmallVectorImpl &ShuffleMask) { - int NumElts = VT.getVectorNumElements(); // Not all bits of the immediate are used so mask it. assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2"); Imm = Imm & (NumElts - 1); - for (int i = 0; i != NumElts; ++i) + for (unsigned i = 0; i != NumElts; ++i) ShuffleMask.push_back(i + Imm); } /// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); - - unsigned NumLanes = VT.getSizeInBits() / 128; +void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned Size = NumElts * ScalarBits; + unsigned NumLanes = Size / 128; if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; @@ -181,10 +162,8 @@ } } -void DecodePSHUFHWMask(MVT VT, unsigned Imm, +void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); - for (unsigned l = 0; l != NumElts; l += 8) { unsigned NewImm = Imm; for (unsigned i = 0, e = 4; i != e; ++i) { @@ -197,10 +176,8 @@ } } -void DecodePSHUFLWMask(MVT VT, unsigned Imm, +void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); - for (unsigned l = 0; l != NumElts; l += 8) { unsigned NewImm = Imm; for (unsigned i = 0, e = 4; i != e; ++i) { @@ -213,8 +190,7 @@ } } -void DecodePSWAPMask(MVT VT, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); +void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl &ShuffleMask) { unsigned NumHalfElts = NumElts / 2; for (unsigned l = 0; l != NumHalfElts; ++l) @@ -226,11 +202,9 @@ /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. -void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); - - unsigned NumLanes = VT.getSizeInBits() / 128; - unsigned NumLaneElts = NumElts / NumLanes; +void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, + unsigned Imm, SmallVectorImpl &ShuffleMask) { + unsigned NumLaneElts = 128 / ScalarBits; unsigned NewImm = Imm; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { @@ -248,12 +222,11 @@ /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// and punpckh*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); - +void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, + SmallVectorImpl &ShuffleMask) { // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate // independently on 128-bit lanes. - unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLanes = (NumElts * ScalarBits) / 128; if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; @@ -268,12 +241,11 @@ /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// and punpckl*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); - +void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, + SmallVectorImpl &ShuffleMask) { // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate // independently on 128-bit lanes. - unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLanes = (NumElts * ScalarBits) / 128; if (NumLanes == 0 ) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; @@ -286,31 +258,29 @@ } /// Decodes a broadcast of the first element of a vector. -void DecodeVectorBroadcast(MVT DstVT, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = DstVT.getVectorNumElements(); +void DecodeVectorBroadcast(unsigned NumElts, + SmallVectorImpl &ShuffleMask) { ShuffleMask.append(NumElts, 0); } /// Decodes a broadcast of a subvector to a larger vector type. -void DecodeSubVectorBroadcast(MVT DstVT, MVT SrcVT, +void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, SmallVectorImpl &ShuffleMask) { - assert(SrcVT.getScalarType() == DstVT.getScalarType() && - "Non matching vector element types"); - unsigned NumElts = SrcVT.getVectorNumElements(); - unsigned Scale = DstVT.getSizeInBits() / SrcVT.getSizeInBits(); + unsigned Scale = DstNumElts / SrcNumElts; for (unsigned i = 0; i != Scale; ++i) - for (unsigned j = 0; j != NumElts; ++j) + for (unsigned j = 0; j != SrcNumElts; ++j) ShuffleMask.push_back(j); } /// \brief Decode a shuffle packed values at 128-bit granularity /// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) /// immediate mask into a shuffle mask. -void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm, - SmallVectorImpl &ShuffleMask) { - unsigned NumLanes = VT.getSizeInBits() / 128; - unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits(); +void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, + unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned NumElementsInLane = 128 / ScalarSize; + unsigned NumLanes = NumElts / NumElementsInLane; unsigned ControlBitsMask = NumLanes - 1; unsigned NumControlBits = NumLanes / 2; @@ -324,9 +294,9 @@ } } -void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, +void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, SmallVectorImpl &ShuffleMask) { - unsigned HalfSize = VT.getVectorNumElements() / 2; + unsigned HalfSize = NumElts / 2; for (unsigned l = 0; l != 2; ++l) { unsigned HalfMask = Imm >> (l * 4); @@ -358,17 +328,13 @@ } } -void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { - int ElementBits = VT.getScalarSizeInBits(); - int NumElements = VT.getVectorNumElements(); - for (int i = 0; i < NumElements; ++i) { +void DecodeBLENDMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + for (unsigned i = 0; i < NumElts; ++i) { // If there are more than 8 elements in the vector, then any immediate blend - // mask applies to each 128-bit lane. There can never be more than - // 8 elements in a 128-bit lane with an immediate blend. - int Bit = NumElements > 8 ? i % (128 / ElementBits) : i; - assert(Bit < 8 && - "Immediate blends only operate over 8 elements at a time!"); - ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i); + // mask wraps around. + unsigned Bit = i % 8; + ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i); } } @@ -412,19 +378,15 @@ } /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. -void DecodeVPERMMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { - assert((VT.is256BitVector() || VT.is512BitVector()) && - (VT.getScalarSizeInBits() == 64) && "Unexpected vector value type"); - unsigned NumElts = VT.getVectorNumElements(); +void DecodeVPERMMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl &ShuffleMask) { for (unsigned l = 0; l != NumElts; l += 4) for (unsigned i = 0; i != 4; ++i) ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3)); } -void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, SmallVectorImpl &Mask) { - unsigned NumDstElts = DstVT.getVectorNumElements(); - unsigned SrcScalarBits = SrcScalarVT.getSizeInBits(); - unsigned DstScalarBits = DstVT.getScalarSizeInBits(); +void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, + unsigned NumDstElts, SmallVectorImpl &Mask) { unsigned Scale = DstScalarBits / SrcScalarBits; assert(SrcScalarBits < DstScalarBits && "Expected zero extension mask to increase scalar size"); @@ -436,27 +398,24 @@ } } -void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); +void DecodeZeroMoveLowMask(unsigned NumElts, + SmallVectorImpl &ShuffleMask) { ShuffleMask.push_back(0); for (unsigned i = 1; i < NumElts; i++) ShuffleMask.push_back(SM_SentinelZero); } -void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &Mask) { +void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, + SmallVectorImpl &Mask) { // First element comes from the first element of second source. // Remaining elements: Load zero extends / Move copies from first source. - unsigned NumElts = VT.getVectorNumElements(); Mask.push_back(NumElts); for (unsigned i = 1; i < NumElts; i++) Mask.push_back(IsLoad ? static_cast(SM_SentinelZero) : i); } -void DecodeEXTRQIMask(MVT VT, int Len, int Idx, +void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl &ShuffleMask) { - assert(VT.is128BitVector() && "Expected 128-bit vector"); - unsigned NumElts = VT.getVectorNumElements(); - unsigned EltSize = VT.getScalarSizeInBits(); unsigned HalfElts = NumElts / 2; // Only the bottom 6 bits are valid for each immediate. @@ -492,11 +451,8 @@ ShuffleMask.push_back(SM_SentinelUndef); } -void DecodeINSERTQIMask(MVT VT, int Len, int Idx, +void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl &ShuffleMask) { - assert(VT.is128BitVector() && "Expected 128-bit vector"); - unsigned NumElts = VT.getVectorNumElements(); - unsigned EltSize = VT.getScalarSizeInBits(); unsigned HalfElts = NumElts / 2; // Only the bottom 6 bits are valid for each immediate. @@ -535,33 +491,32 @@ ShuffleMask.push_back(SM_SentinelUndef); } -void DecodeVPERMILPMask(MVT VT, ArrayRef RawMask, +void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, + ArrayRef RawMask, SmallVectorImpl &ShuffleMask) { - unsigned VecSize = VT.getSizeInBits(); - unsigned EltSize = VT.getScalarSizeInBits(); + unsigned VecSize = NumElts * ScalarBits; unsigned NumLanes = VecSize / 128; - unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes; + unsigned NumEltsPerLane = NumElts / NumLanes; assert((VecSize == 128 || VecSize == 256 || VecSize == 512) && "Unexpected vector size"); - assert((EltSize == 32 || EltSize == 64) && "Unexpected element size"); + assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { uint64_t M = RawMask[i]; - M = (EltSize == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); + M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); unsigned LaneOffset = i & ~(NumEltsPerLane - 1); ShuffleMask.push_back((int)(LaneOffset + M)); } } -void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef RawMask, +void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, + ArrayRef RawMask, SmallVectorImpl &ShuffleMask) { - unsigned VecSize = VT.getSizeInBits(); - unsigned EltSize = VT.getScalarSizeInBits(); + unsigned VecSize = NumElts * ScalarBits; unsigned NumLanes = VecSize / 128; - unsigned NumElts = VT.getVectorNumElements(); unsigned NumEltsPerLane = NumElts / NumLanes; assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size"); - assert((EltSize == 32 || EltSize == 64) && "Unexpected element size"); + assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); assert((NumElts == RawMask.size()) && "Unexpected mask size"); for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { @@ -584,7 +539,7 @@ } int Index = i & ~(NumEltsPerLane - 1); - if (EltSize == 64) + if (ScalarBits == 64) Index += (Selector >> 1) & 0x1; else Index += Selector & 0x3; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -5649,14 +5649,15 @@ assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); - DecodeBLENDMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodeBLENDMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::SHUFP: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); - DecodeSHUFPMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodeSHUFPMask(NumElems, VT.getScalarSizeInBits(), + cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::INSERTPS: @@ -5672,7 +5673,8 @@ isa(N->getOperand(2))) { int BitLen = N->getConstantOperandVal(1); int BitIdx = N->getConstantOperandVal(2); - DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask); + DecodeEXTRQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx, + Mask); IsUnary = true; } break; @@ -5683,20 +5685,21 @@ isa(N->getOperand(3))) { int BitLen = N->getConstantOperandVal(2); int BitIdx = N->getConstantOperandVal(3); - DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask); + DecodeINSERTQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx, + Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); } break; case X86ISD::UNPCKH: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); - DecodeUNPCKHMask(VT, Mask); + DecodeUNPCKHMask(NumElems, VT.getScalarSizeInBits(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::UNPCKL: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); - DecodeUNPCKLMask(VT, Mask); + DecodeUNPCKLMask(NumElems, VT.getScalarSizeInBits(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVHLPS: @@ -5716,7 +5719,8 @@ assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); - DecodePALIGNRMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodePALIGNRMask(NumElems, cast(ImmN)->getZExtValue(), + Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); Ops.push_back(N->getOperand(1)); Ops.push_back(N->getOperand(0)); @@ -5725,38 +5729,43 @@ assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); - DecodePSLLDQMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodePSLLDQMask(NumElems, cast(ImmN)->getZExtValue(), + Mask); IsUnary = true; break; case X86ISD::VSRLDQ: assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); - DecodePSRLDQMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodePSRLDQMask(NumElems, cast(ImmN)->getZExtValue(), + Mask); IsUnary = true; break; case X86ISD::PSHUFD: case X86ISD::VPERMILPI: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodePSHUFMask(NumElems, VT.getScalarSizeInBits(), + cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFHW: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFHWMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodePSHUFHWMask(NumElems, cast(ImmN)->getZExtValue(), + Mask); IsUnary = true; break; case X86ISD::PSHUFLW: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFLWMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodePSHUFLWMask(NumElems, cast(ImmN)->getZExtValue(), + Mask); IsUnary = true; break; case X86ISD::VZEXT_MOVL: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); - DecodeZeroMoveLowMask(VT, Mask); + DecodeZeroMoveLowMask(NumElems, Mask); IsUnary = true; break; case X86ISD::VBROADCAST: { @@ -5772,7 +5781,7 @@ // came from an extract from the original width. If we found one, we // pushed it the Ops vector above. if (N0.getValueType() == VT || !Ops.empty()) { - DecodeVectorBroadcast(VT, Mask); + DecodeVectorBroadcast(NumElems, Mask); IsUnary = true; break; } @@ -5785,7 +5794,7 @@ unsigned MaskEltSize = VT.getScalarSizeInBits(); SmallVector RawMask; if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) { - DecodeVPERMILPMask(VT, RawMask, Mask); + DecodeVPERMILPMask(NumElems, VT.getScalarSizeInBits(), RawMask, Mask); break; } if (auto *C = getTargetConstantFromNode(MaskNode)) { @@ -5814,35 +5823,36 @@ case X86ISD::VPERMI: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodeVPERMMask(NumElems, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::MOVSS: case X86ISD::MOVSD: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); - DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask); + DecodeScalarMoveMask(NumElems, /* IsLoad */ false, Mask); break; case X86ISD::VPERM2X128: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERM2X128Mask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodeVPERM2X128Mask(NumElems, cast(ImmN)->getZExtValue(), + Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVSLDUP: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); - DecodeMOVSLDUPMask(VT, Mask); + DecodeMOVSLDUPMask(NumElems, Mask); IsUnary = true; break; case X86ISD::MOVSHDUP: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); - DecodeMOVSHDUPMask(VT, Mask); + DecodeMOVSHDUPMask(NumElems, Mask); IsUnary = true; break; case X86ISD::MOVDDUP: assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); - DecodeMOVDDUPMask(VT, Mask); + DecodeMOVDDUPMask(NumElems, Mask); IsUnary = true; break; case X86ISD::MOVLPD: @@ -5860,7 +5870,8 @@ unsigned CtrlImm = CtrlOp->getZExtValue(); SmallVector RawMask; if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) { - DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask); + DecodeVPERMIL2PMask(NumElems, VT.getScalarSizeInBits(), CtrlImm, + RawMask, Mask); break; } if (auto *C = getTargetConstantFromNode(MaskNode)) { @@ -6239,7 +6250,8 @@ MVT SrcVT = Src.getSimpleValueType(); if (NumSizeInBits != SrcVT.getSizeInBits()) break; - DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask); + DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), VT.getScalarSizeInBits(), + VT.getVectorNumElements(), Mask); Ops.push_back(Src); return true; }