Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -441,6 +441,8 @@ VPSHA, VPSHL, // XOP signed/unsigned integer comparisons VPCOM, VPCOMU, + // XOP packed permute bytes + VPPERM, // Vector multiply packed unsigned doubleword integers PMULUDQ, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -21514,6 +21514,7 @@ case X86ISD::VSRAI: return "X86ISD::VSRAI"; case X86ISD::VROTLI: return "X86ISD::VROTLI"; case X86ISD::VROTRI: return "X86ISD::VROTRI"; + case X86ISD::VPPERM: return "X86ISD::VPPERM"; case X86ISD::CMPP: return "X86ISD::CMPP"; case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -251,6 +251,10 @@ SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>; +def X86vpperm : SDNode<"X86ISD::VPPERM", + SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; + def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; Index: llvm/trunk/lib/Target/X86/X86InstrXOP.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrXOP.td +++ llvm/trunk/lib/Target/X86/X86InstrXOP.td @@ -222,8 +222,47 @@ defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64>; } +multiclass xop4op opc, string OpcodeStr, SDNode OpNode, + ValueType vt128> { + def rr : IXOPi8, + XOP_4V, VEX_I8IMM; + def rm : IXOPi8, + XOP_4V, VEX_I8IMM, VEX_W, MemOp4; + def mr : IXOPi8, + XOP_4V, VEX_I8IMM; + // For disassembler + let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in + def rr_REV : IXOPi8, XOP_4V, VEX_I8IMM, VEX_W, MemOp4; +} + +let ExeDomain = SSEPackedInt in { + defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8>; +} + // Instruction where either second or third source can be memory -multiclass xop4op opc, string OpcodeStr, Intrinsic Int> { +multiclass xop4op_int opc, string OpcodeStr, Intrinsic Int> { def rr : IXOPi8; - defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>; + defm VPCMOV : xop4op_int<0xA2, "vpcmov", int_x86_xop_vpcmov>; } multiclass xop4op256 opc, string OpcodeStr, Intrinsic Int> { Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -2278,6 +2278,7 @@ X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0), X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0), X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0), + X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0), X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0), X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0), X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0), Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -21,11 +21,20 @@ ret <16 x i8> %res1 } -define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { -; CHECK-LABEL: combine_vpperm_as_unpckhwd: +define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd: ; CHECK: # BB#0: ; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> ) ret <16 x i8> %res0 } + +define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: combine_vpperm_as_unpckhwd: +; CHECK: # BB#0: +; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> ) + ret <16 x i8> %res0 +}