Index: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp +++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp @@ -1018,7 +1018,8 @@ } static std::string getShuffleComment(const MachineOperand &DstOp, - const MachineOperand &SrcOp, + const MachineOperand &SrcOp1, + const MachineOperand &SrcOp2, ArrayRef Mask) { std::string Comment; @@ -1032,39 +1033,49 @@ }; StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem"; - StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem"; + StringRef Src1Name = + SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem"; + StringRef Src2Name = + SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem"; + + // One source operand, fix the mask to print all elements in one span. + SmallVector ShuffleMask(Mask.begin(), Mask.end()); + if (Src1Name == Src2Name) + for (int i = 0, e = ShuffleMask.size(); i != e; ++i) + if (ShuffleMask[i] >= e) + ShuffleMask[i] -= e; raw_string_ostream CS(Comment); CS << DstName << " = "; - bool NeedComma = false; - bool InSrc = false; - for (int M : Mask) { - // Wrap up any prior entry... - if (M == SM_SentinelZero && InSrc) { - InSrc = false; - CS << "]"; - } - if (NeedComma) + for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { + if (i != 0) CS << ","; - else - NeedComma = true; - - // Print this shuffle... - if (M == SM_SentinelZero) { + if (ShuffleMask[i] == SM_SentinelZero) { CS << "zero"; - } else { - if (!InSrc) { - InSrc = true; - CS << SrcName << "["; - } - if (M == SM_SentinelUndef) + continue; + } + + // Otherwise, it must come from src1 or src2. Print the span of elements + // that comes from this src. + bool isSrc1 = ShuffleMask[i] < (int)e; + CS << (isSrc1 ? Src1Name : Src2Name) << '['; + + bool IsFirst = true; + while (i != e && ShuffleMask[i] != SM_SentinelZero && + (ShuffleMask[i] < (int)e) == isSrc1) { + if (!IsFirst) + CS << ','; + else + IsFirst = false; + if (ShuffleMask[i] == SM_SentinelUndef) CS << "u"; else - CS << M; + CS << ShuffleMask[i] % (int)e; + ++i; } + CS << ']'; + --i; // For loop increments element #. } - if (InSrc) - CS << "]"; CS.flush(); return Comment; @@ -1313,7 +1324,7 @@ SmallVector Mask; DecodePSHUFBMask(C, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask)); + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); } break; } @@ -1340,7 +1351,25 @@ SmallVector Mask; DecodeVPERMILPMask(C, ElSize, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask)); + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); + } + break; + } + case X86::VPPERMrrm: { + if (!OutStreamer->isVerboseAsm()) + break; + assert(MI->getNumOperands() > 6 && + "We should always have at least 6 operands!"); + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp1 = MI->getOperand(1); + const MachineOperand &SrcOp2 = MI->getOperand(2); + const MachineOperand &MaskOp = MI->getOperand(6); + + if (auto *C = getConstantFromPool(*MI, MaskOp)) { + SmallVector Mask; + DecodeVPPERMMask(C, Mask); + if (!Mask.empty()) + OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask)); } break; } Index: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h +++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h @@ -32,6 +32,9 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, SmallVectorImpl &ShuffleMask); +/// Decode a VPPERM variable mask from an IR-level vector constant. +void DecodeVPPERMMask(const Constant *C, SmallVectorImpl &ShuffleMask); + /// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant. void DecodeVPERMVMask(const Constant *C, MVT VT, SmallVectorImpl &ShuffleMask); Index: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -153,6 +153,74 @@ // TODO: Handle funny-looking vectors too. } +void DecodeVPPERMMask(const Constant *C, SmallVectorImpl &ShuffleMask) { + Type *MaskTy = C->getType(); + assert(MaskTy->getPrimitiveSizeInBits() == 128); + + // Only support vector types. + if (!MaskTy->isVectorTy()) + return; + + // Make sure its an integer type. + Type *VecEltTy = MaskTy->getVectorElementType(); + if (!VecEltTy->isIntegerTy()) + return; + + // The shuffle mask requires a byte vector - decode cases with + // wider elements as well. + unsigned BitWidth = cast(VecEltTy)->getBitWidth(); + if ((BitWidth % 8) != 0) + return; + + int NumElts = MaskTy->getVectorNumElements(); + int Scale = BitWidth / 8; + int NumBytes = NumElts * Scale; + ShuffleMask.reserve(NumBytes); + + for (int i = 0; i != NumElts; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa(COp)) { + ShuffleMask.append(Scale, SM_SentinelUndef); + continue; + } + + // VPPERM Operation + // Bits[4:0] - Byte Index (0 - 31) + // Bits[7:5] - Permute Operation + // + // Permute Operation: + // 0 - Source byte (no logical operation). + // 1 - Invert source byte. + // 2 - Bit reverse of source byte. + // 3 - Bit reverse of inverted source byte. + // 4 - 00h (zero - fill). + // 5 - FFh (ones - fill). + // 6 - Most significant bit of source byte replicated in all bit positions. + // 7 - Invert most significant bit of source byte and replicate in all bit positions. + APInt MaskElt = cast(COp)->getValue(); + for (int j = 0; j != Scale; ++j) { + APInt Index = MaskElt.getLoBits(5); + APInt PermuteOp = MaskElt.lshr(5).getLoBits(3); + MaskElt = MaskElt.lshr(8); + + if (PermuteOp == 4) { + ShuffleMask.push_back(SM_SentinelZero); + continue; + } + if (PermuteOp != 0) { + ShuffleMask.clear(); + return; + } + ShuffleMask.push_back((int)Index.getZExtValue()); + } + } + + assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size"); +} + void DecodeVPERMVMask(const Constant *C, MVT VT, SmallVectorImpl &ShuffleMask) { Type *MaskTy = C->getType(); Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -13,8 +13,8 @@ define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: combine_vpperm_identity: ; CHECK: # BB#0: -; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm1[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> ) %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> ) @@ -24,7 +24,7 @@ define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd: ; CHECK: # BB#0: -; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> ) ret <16 x i8> %res0 @@ -33,7 +33,7 @@ define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: combine_vpperm_as_unpckhwd: ; CHECK: # BB#0: -; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> ) ret <16 x i8> %res0