Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -181,6 +181,8 @@ def llvm_v8i64_ty : LLVMType; // 8 x i64 def llvm_v16i64_ty : LLVMType; // 16 x i64 +def llvm_v1i128_ty : LLVMType; // 1 x i128 + def llvm_v2f16_ty : LLVMType; // 2 x half (__fp16) def llvm_v4f16_ty : LLVMType; // 4 x half (__fp16) def llvm_v8f16_ty : LLVMType; // 8 x half (__fp16) Index: include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- include/llvm/IR/IntrinsicsPowerPC.td +++ include/llvm/IR/IntrinsicsPowerPC.td @@ -120,6 +120,13 @@ [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +/// PowerPC_Vec_QQQ_Intrinsic - A PowerPC intrinsic that takes two v1i128 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_Vec_QQQ_Intrinsic + : PowerPC_Vec_Intrinsic; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Class Definitions. // @@ -356,6 +363,7 @@ def int_ppc_altivec_vadduws : PowerPC_Vec_WWW_Intrinsic<"vadduws">; def int_ppc_altivec_vaddsws : PowerPC_Vec_WWW_Intrinsic<"vaddsws">; def int_ppc_altivec_vaddcuw : PowerPC_Vec_WWW_Intrinsic<"vaddcuw">; +def int_ppc_altivec_vaddcuq : PowerPC_Vec_QQQ_Intrinsic<"vaddcuq">; // Saturating subs. def int_ppc_altivec_vsububs : PowerPC_Vec_BBB_Intrinsic<"vsububs">; @@ -365,6 +373,7 @@ def int_ppc_altivec_vsubuws : PowerPC_Vec_WWW_Intrinsic<"vsubuws">; def int_ppc_altivec_vsubsws : PowerPC_Vec_WWW_Intrinsic<"vsubsws">; def int_ppc_altivec_vsubcuw : PowerPC_Vec_WWW_Intrinsic<"vsubcuw">; +def int_ppc_altivec_vsubcuq : PowerPC_Vec_QQQ_Intrinsic<"vsubcuq">; let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". // Saturating multiply-adds. @@ -525,6 +534,26 @@ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_altivec_vrfiz : GCCBuiltin<"__builtin_altivec_vrfiz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + + // Add Extended Quadword + def int_ppc_altivec_vaddeuqm : GCCBuiltin<"__builtin_altivec_vaddeuqm">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vaddecuq : GCCBuiltin<"__builtin_altivec_vaddecuq">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + + // Sub Extended Quadword + def int_ppc_altivec_vsubeuqm : GCCBuiltin<"__builtin_altivec_vsubeuqm">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vsubecuq : GCCBuiltin<"__builtin_altivec_vsubecuq">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; } def int_ppc_altivec_vsl : PowerPC_Vec_WWW_Intrinsic<"vsl">; Index: lib/IR/Function.cpp =================================================================== --- lib/IR/Function.cpp +++ lib/IR/Function.cpp @@ -554,7 +554,8 @@ IIT_HALF_VEC_ARG = 29, IIT_SAME_VEC_WIDTH_ARG = 30, IIT_PTR_TO_ARG = 31, - IIT_VEC_OF_PTRS_TO_ELT = 32 + IIT_VEC_OF_PTRS_TO_ELT = 32, + IIT_I128 = 33 }; @@ -601,6 +602,9 @@ case IIT_I64: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64)); return; + case IIT_I128: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 128)); + return; case IIT_V1: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1)); DecodeIITType(NextElt, Infos, OutputTable); Index: lib/Target/PowerPC/PPCCallingConv.td =================================================================== --- lib/Target/PowerPC/PPCCallingConv.td +++ lib/Target/PowerPC/PPCCallingConv.td @@ -62,7 +62,7 @@ // Vector types returned as "direct" go into V2 .. V9; note that only the // ELFv2 ABI fully utilizes all these registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> @@ -114,7 +114,8 @@ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> @@ -172,9 +173,9 @@ CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>, // The first 12 Vector arguments are passed in AltiVec registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, - V10, V11, V12, V13]>>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, + V8, V9, V10, V11, V12, V13]>>>, CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9, VSH10, VSH11, VSH12, VSH13]>>>, Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -406,7 +406,7 @@ setOperationAction(ISD::SUB , VT, Legal); // Vector instructions introduced in P8 - if (Subtarget.hasP8Altivec()) { + if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { setOperationAction(ISD::CTPOP, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); } @@ -620,8 +620,10 @@ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } - if (Subtarget.hasP8Altivec()) + if (Subtarget.hasP8Altivec()) { addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); + addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); + } } if (Subtarget.hasQPX()) { @@ -2472,7 +2474,8 @@ // Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || - ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || + ArgVT == MVT::v1i128) Align = 16; // QPX vector types stored in double-precision are padded to a 32 byte // boundary. @@ -2551,7 +2554,8 @@ } if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || - ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || + ArgVT == MVT::v1i128) if (AvailableVRs > 0) { --AvailableVRs; return false; @@ -3130,6 +3134,7 @@ case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + case MVT::v1i128: if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous @@ -4604,6 +4609,7 @@ case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + case MVT::v1i128: if (++NumVRsUsed <= NumVRs) continue; break; @@ -4849,6 +4855,7 @@ case MVT::i1: case MVT::i32: case MVT::i64: + case MVT::i128: // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. @@ -4966,6 +4973,7 @@ case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + case MVT::v1i128: if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -802,26 +802,37 @@ def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v1i128 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v1i128 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v1i128 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v1i128 VRRC:$src))), (v4f32 VRRC:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>; def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v1i128 VRRC:$src))), (v2i64 VRRC:$src)>; + +def : Pat<(v1i128 (bitconvert (v16i8 VRRC:$src))), (v1i128 VRRC:$src)>; +def : Pat<(v1i128 (bitconvert (v8i16 VRRC:$src))), (v1i128 VRRC:$src)>; +def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>; +def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>; +def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>; // Shuffles. @@ -987,12 +998,29 @@ def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vaddudm $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>; +def VADDUQM : VXForm_1<256, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vadduqm $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (add v1i128:$vA, v1i128:$vB))]>; } // isCommutable +// Vector Quadword Add +def VADDEUQM : VA1a_Int_Ty<60, "vaddeuqm", int_ppc_altivec_vaddeuqm, v1i128>; +def VADDCUQ : VX1_Int_Ty<320, "vaddcuq", int_ppc_altivec_vaddcuq, v1i128>; +def VADDECUQ : VA1a_Int_Ty<61, "vaddecuq", int_ppc_altivec_vaddecuq, v1i128>; + +// Vector Doubleword Subtract def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vsubudm $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>; +// Vector Quadword Subtract +def VSUBUQM : VXForm_1<1280, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubuqm $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (sub v1i128:$vA, v1i128:$vB))]>; +def VSUBEUQM : VA1a_Int_Ty<62, "vsubeuqm", int_ppc_altivec_vsubeuqm, v1i128>; +def VSUBCUQ : VX1_Int_Ty<1344, "vsubcuq", int_ppc_altivec_vsubcuq, v1i128>; +def VSUBECUQ : VA1a_Int_Ty<63, "vsubecuq", int_ppc_altivec_vsubecuq, v1i128>; + // Count Leading Zeros def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB), "vclzb $vD, $vB", IIC_VecGeneral, Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -875,6 +875,11 @@ def : Pat<(v2i64 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v2f64 (bitconvert v1i128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v1i128 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + // sign extension patterns // To extend "in place" from v2i32 to v2i64, we have input data like: // | undef | i32 | undef | i32 | Index: lib/Target/PowerPC/PPCRegisterInfo.td =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.td +++ lib/Target/PowerPC/PPCRegisterInfo.td @@ -288,7 +288,7 @@ (sequence "F%u", 31, 14))>; def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; -def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v4f32], 128, +def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128, (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; Index: test/CodeGen/PowerPC/vec_add_sub_quadword.ll =================================================================== --- test/CodeGen/PowerPC/vec_add_sub_quadword.ll +++ test/CodeGen/PowerPC/vec_add_sub_quadword.ll @@ -0,0 +1,124 @@ +; Check VMX 128-bit integer operations +; +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +define <1 x i128> @test_add(<1 x i128> %x, <1 x i128> %y) nounwind { + %result = add <1 x i128> %x, %y + ret <1 x i128> %result +; CHECK: vadduqm 2, 2, 3 +} + +define <1 x i128> @increment_by_one(<1 x i128> %x) nounwind { + %result = add <1 x i128> %x, + ret <1 x i128> %result +; CHECK vadduqm 2, 2, 3* +} + +define <1 x i128> @increment_by_val(<1 x i128> %x, i128 %val) nounwind { + %tmpvec = insertelement <1 x i128> , i128 %val, i32 0 + %tmpvec2 = insertelement <1 x i128> %tmpvec, i128 %val, i32 1 + %result = add <1 x i128> %x, %tmpvec2 + ret <1 x i128> %result +; CHECK: vadduqm 2, 2, 3 +} + +define <1 x i128> @test_sub(<1 x i128> %x, <1 x i128> %y) nounwind { + %result = sub <1 x i128> %x, %y + ret <1 x i128> %result +; CHECK: vsubuqm 2, 2, 3 +} + +define <1 x i128> @decrement_by_one(<1 x i128> %x) nounwind { + %result = sub <1 x i128> %x, + ret <1 x i128> %result +; CHECK vsubuqm 2, 2, 3 +} + +define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind { + %tmpvec = insertelement <1 x i128> , i128 %val, i32 0 + %tmpvec2 = insertelement <1 x i128> %tmpvec, i128 %val, i32 1 + %result = sub <1 x i128> %x, %tmpvec2 + ret <1 x i128> %result +; CHECK vsubuqm 2, 2, 3 +} + +declare <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x, + <1 x i128> %y) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x, + <1 x i128> %y) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind readnone + +define <1 x i128> @test_vaddeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vaddeuqm +; CHECK: vaddeuqm 2, 2, 3, 4 +} + +define <1 x i128> @test_vaddcuq(<1 x i128> %x, + <1 x i128> %y) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x, + <1 x i128> %y) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vaddcuq +; CHECK: vaddcuq 2, 2, 3 +} + +define <1 x i128> @test_vaddecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vaddecuq +; CHECK: vaddecuq 2, 2, 3, 4 +} + +define <1 x i128> @test_vsubeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vsubeuqm +; CHECK: vsubeuqm 2, 2, 3, 4 +} + +define <1 x i128> @test_vsubcuq(<1 x i128> %x, + <1 x i128> %y) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x, + <1 x i128> %y) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vsubcuq +; CHECK: vsubcuq 2, 2, 3 +} + +define <1 x i128> @test_vsubecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vsubecuq +; CHECK: vsubecuq 2, 2, 3, 4 +} + Index: test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -207,6 +207,18 @@ # CHECK: vadduws 2, 3, 4 0x10 0x43 0x22 0x80 +# CHECK: vadduqm 2, 3, 4 +0x10 0x43 0x21 0x00 + +# CHECK: vaddeuqm 2, 3, 4, 5 +0x10 0x43 0x21 0x7c + +# CHECK: vaddcuq 2, 3, 4 +0x10 0x43 0x21 0x40 + +# CHECK: vaddecuq 2, 3, 4, 5 +0x10 0x43 0x21 0x7d + # CHECK: vsubcuw 2, 3, 4 0x10 0x43 0x25 0x80 @@ -240,6 +252,18 @@ # CHECK: vsubuws 2, 3, 4 0x10 0x43 0x26 0x80 +# CHECK: vsubuqm 2, 3, 4 +0x10 0x43 0x25 0x00 + +# CHECK: vsubeuqm 2, 3, 4, 5 +0x10 0x43 0x21 0x7e + +# CHECK: vsubcuq 2, 3, 4 +0x10 0x43 0x25 0x40 + +# CHECK: vsubecuq 2, 3, 4, 5 +0x10 0x43 0x21 0x7f + # CHECK: vmulesb 2, 3, 4 0x10 0x43 0x23 0x08 Index: test/MC/PowerPC/ppc64-encoding-vmx.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-vmx.s +++ test/MC/PowerPC/ppc64-encoding-vmx.s @@ -237,7 +237,19 @@ # CHECK-BE: vadduws 2, 3, 4 # encoding: [0x10,0x43,0x22,0x80] # CHECK-LE: vadduws 2, 3, 4 # encoding: [0x80,0x22,0x43,0x10] vadduws 2, 3, 4 - +# CHECK-BE: vadduqm 2, 3, 4 # encoding: [0x10,0x43,0x21,0x00] +# CHECK-LE: vadduqm 2, 3, 4 # encoding: [0x00,0x21,0x43,0x10] + vadduqm 2, 3, 4 +# CHECK-BE: vaddeuqm 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7c] +# CHECK-LE: vaddeuqm 2, 3, 4, 5 # encoding: [0x7c,0x21,0x43,0x10] + vaddeuqm 2, 3, 4, 5 +# CHECK-BE: vaddcuq 2, 3, 4 # encoding: [0x10,0x43,0x21,0x40] +# CHECK-LE: vaddcuq 2, 3, 4 # encoding: [0x40,0x21,0x43,0x10] + vaddcuq 2, 3, 4 +# CHECK-BE: vaddecuq 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7d] +# CHECK-LE: vaddecuq 2, 3, 4, 5 # encoding: [0x7d,0x21,0x43,0x10] + vaddecuq 2, 3, 4, 5 + # CHECK-BE: vsubcuw 2, 3, 4 # encoding: [0x10,0x43,0x25,0x80] # CHECK-LE: vsubcuw 2, 3, 4 # encoding: [0x80,0x25,0x43,0x10] vsubcuw 2, 3, 4 @@ -271,7 +283,19 @@ # CHECK-BE: vsubuws 2, 3, 4 # encoding: [0x10,0x43,0x26,0x80] # CHECK-LE: vsubuws 2, 3, 4 # encoding: [0x80,0x26,0x43,0x10] vsubuws 2, 3, 4 - +# CHECK-BE: vsubuqm 2, 3, 4 # encoding: [0x10,0x43,0x25,0x00] +# CHECK-LE: vsubuqm 2, 3, 4 # encoding: [0x00,0x25,0x43,0x10] + vsubuqm 2, 3, 4 +# CHECK-BE: vsubeuqm 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7e] +# CHECK-LE: vsubeuqm 2, 3, 4, 5 # encoding: [0x7e,0x21,0x43,0x10] + vsubeuqm 2, 3, 4, 5 +# CHECK-BE: vsubcuq 2, 3, 4 # encoding: [0x10,0x43,0x25,0x40] +# CHECK-LE: vsubcuq 2, 3, 4 # encoding: [0x40,0x25,0x43,0x10] + vsubcuq 2, 3, 4 +# CHECK-BE: vsubecuq 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7f] +# CHECK-LE: vsubecuq 2, 3, 4, 5 # encoding: [0x7f,0x21,0x43,0x10] + vsubecuq 2, 3, 4, 5 + # CHECK-BE: vmulesb 2, 3, 4 # encoding: [0x10,0x43,0x23,0x08] # CHECK-LE: vmulesb 2, 3, 4 # encoding: [0x08,0x23,0x43,0x10] vmulesb 2, 3, 4 Index: utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- utils/TableGen/IntrinsicEmitter.cpp +++ utils/TableGen/IntrinsicEmitter.cpp @@ -260,7 +260,8 @@ IIT_HALF_VEC_ARG = 29, IIT_SAME_VEC_WIDTH_ARG = 30, IIT_PTR_TO_ARG = 31, - IIT_VEC_OF_PTRS_TO_ELT = 32 + IIT_VEC_OF_PTRS_TO_ELT = 32, + IIT_I128 = 33 }; @@ -275,6 +276,7 @@ case 16: return Sig.push_back(IIT_I16); case 32: return Sig.push_back(IIT_I32); case 64: return Sig.push_back(IIT_I64); + case 128: return Sig.push_back(IIT_I128); } }