Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -181,6 +181,8 @@ def llvm_v8i64_ty : LLVMType; // 8 x i64 def llvm_v16i64_ty : LLVMType; // 16 x i64 +def llvm_v1i128_ty : LLVMType; // 1 x i128 + def llvm_v2f16_ty : LLVMType; // 2 x half (__fp16) def llvm_v4f16_ty : LLVMType; // 4 x half (__fp16) def llvm_v8f16_ty : LLVMType; // 8 x half (__fp16) Index: include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- include/llvm/IR/IntrinsicsPowerPC.td +++ include/llvm/IR/IntrinsicsPowerPC.td @@ -121,6 +121,13 @@ [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +/// PowerPC_Vec_QQQ_Intrinsic - A PowerPC intrinsic that takes two v1i128 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_Vec_QQQ_Intrinsic + : PowerPC_Vec_Intrinsic; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Class Definitions. // @@ -357,6 +364,7 @@ def int_ppc_altivec_vadduws : PowerPC_Vec_WWW_Intrinsic<"vadduws">; def int_ppc_altivec_vaddsws : PowerPC_Vec_WWW_Intrinsic<"vaddsws">; def int_ppc_altivec_vaddcuw : PowerPC_Vec_WWW_Intrinsic<"vaddcuw">; +def int_ppc_altivec_vaddcuq : PowerPC_Vec_QQQ_Intrinsic<"vaddcuq">; // Saturating subs. def int_ppc_altivec_vsububs : PowerPC_Vec_BBB_Intrinsic<"vsububs">; @@ -366,6 +374,7 @@ def int_ppc_altivec_vsubuws : PowerPC_Vec_WWW_Intrinsic<"vsubuws">; def int_ppc_altivec_vsubsws : PowerPC_Vec_WWW_Intrinsic<"vsubsws">; def int_ppc_altivec_vsubcuw : PowerPC_Vec_WWW_Intrinsic<"vsubcuw">; +def int_ppc_altivec_vsubcuq : PowerPC_Vec_QQQ_Intrinsic<"vsubcuq">; let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". // Saturating multiply-adds. @@ -540,6 +549,26 @@ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_altivec_vrfiz : GCCBuiltin<"__builtin_altivec_vrfiz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + + // Add Extended Quadword + def int_ppc_altivec_vaddeuqm : GCCBuiltin<"__builtin_altivec_vaddeuqm">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vaddecuq : GCCBuiltin<"__builtin_altivec_vaddecuq">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + + // Sub Extended Quadword + def int_ppc_altivec_vsubeuqm : GCCBuiltin<"__builtin_altivec_vsubeuqm">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vsubecuq : GCCBuiltin<"__builtin_altivec_vsubecuq">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; } def int_ppc_altivec_vsl : PowerPC_Vec_WWW_Intrinsic<"vsl">; Index: lib/IR/Function.cpp =================================================================== --- lib/IR/Function.cpp +++ lib/IR/Function.cpp @@ -548,7 +548,8 @@ IIT_HALF_VEC_ARG = 29, IIT_SAME_VEC_WIDTH_ARG = 30, IIT_PTR_TO_ARG = 31, - IIT_VEC_OF_PTRS_TO_ELT = 32 + IIT_VEC_OF_PTRS_TO_ELT = 32, + IIT_I128 = 33 }; @@ -595,6 +596,9 @@ case IIT_I64: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64)); return; + case IIT_I128: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 128)); + return; case IIT_V1: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1)); DecodeIITType(NextElt, Infos, OutputTable); Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -403,15 +403,8 @@ // will selectively turn on ones that can be effectively codegen'd. for (MVT VT : MVT::vector_valuetypes()) { // add/sub are legal for all supported vector VT's. - // This check is temporary until support for quadword add/sub is added - if (VT.SimpleTy != MVT::v1i128) { - setOperationAction(ISD::ADD , VT, Legal); - setOperationAction(ISD::SUB , VT, Legal); - } - else { - setOperationAction(ISD::ADD , VT, Expand); - setOperationAction(ISD::SUB , VT, Expand); - } + setOperationAction(ISD::ADD , VT, Legal); + setOperationAction(ISD::SUB , VT, Legal); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -1025,12 +1025,29 @@ def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vaddudm $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>; +def VADDUQM : VXForm_1<256, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vadduqm $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (add v1i128:$vA, v1i128:$vB))]>; } // isCommutable +// Vector Quadword Add +def VADDEUQM : VA1a_Int_Ty<60, "vaddeuqm", int_ppc_altivec_vaddeuqm, v1i128>; +def VADDCUQ : VX1_Int_Ty<320, "vaddcuq", int_ppc_altivec_vaddcuq, v1i128>; +def VADDECUQ : VA1a_Int_Ty<61, "vaddecuq", int_ppc_altivec_vaddecuq, v1i128>; + +// Vector Doubleword Subtract def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vsubudm $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>; +// Vector Quadword Subtract +def VSUBUQM : VXForm_1<1280, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubuqm $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (sub v1i128:$vA, v1i128:$vB))]>; +def VSUBEUQM : VA1a_Int_Ty<62, "vsubeuqm", int_ppc_altivec_vsubeuqm, v1i128>; +def VSUBCUQ : VX1_Int_Ty<1344, "vsubcuq", int_ppc_altivec_vsubcuq, v1i128>; +def VSUBECUQ : VA1a_Int_Ty<63, "vsubecuq", int_ppc_altivec_vsubecuq, v1i128>; + // Count Leading Zeros def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB), "vclzb $vD, $vB", IIC_VecGeneral, Index: test/CodeGen/PowerPC/ppc64-i128-abi.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-i128-abi.ll +++ test/CodeGen/PowerPC/ppc64-i128-abi.ll @@ -12,46 +12,46 @@ ; VSX: ; %a is passed in register 34 -; On LE, ensure %a is swapped before being used (using xxswapd) -; Similarly, on LE ensure the results are swapped before being returned in -; register 34 +; The value of 1 is stored in the TOC. +; On LE, ensure the value of 1 is swapped before being used (using xxswapd). ; VMX (no VSX): ; %a is passed in register 2 -; No swaps are necessary on LE +; The value of 1 is stored in the TOC. +; No swaps are necessary when using P8 Vector instructions on LE define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind { %tmp = add <1 x i128> %a, ret <1 x i128> %tmp +; FIXME: Seems a 128-bit literal is materialized by loading from the TOC. There +; should be a better way of doing this. + ; CHECK-LE-LABEL: @v1i128_increment_by_one -; CHECK-LE: xxswapd [[PARAM1:[0-9]+]], 34 -; CHECK-LE: stxvd2x [[PARAM1]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-LE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-LE: xxswapd 34, [[RESULT]] +; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE: xxswapd 35, [[VAL]] +; CHECK-LE: vadduqm 2, 2, 3 ; CHECK-LE: blr ; CHECK-BE-LABEL: @v1i128_increment_by_one -; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34 -; CHECK-BE: stxvd2x 34, {{[0-9]+}}, {{[0-9]+}} -; CHECK-BE: lxvd2x 34, {{[0-9]+}}, {{[0-9]+}} +; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}} +; CHECK-BE-NOT: xxswapd +; CHECK-BE: vadduqm 2, 2, 3 ; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}} ; CHECK-BE: blr ; CHECK-NOVSX-LABEL: @v1i128_increment_by_one ; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}} ; CHECK-NOVSX-NOT: stxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -; CHECK-NOVSX: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK-NOVSX: lvx [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} ; CHECK-NOVSX-NOT: lxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} ; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}} +; CHECK-NOVSX: vadduqm 2, 2, [[VAL]] ; CHECK-NOVSX: blr } ; VSX: ; %a is passed in register 34 ; %b is passed in register 35 -; On LE, ensure the contents of 34 and 35 are swapped before being used -; Similarly, on LE ensure the results are swapped before being returned in -; register 34 +; No swaps are necessary when using P8 Vector instructions on LE ; VMX (no VSX): ; %a is passewd in register 2 ; %b is passed in register 3 @@ -62,30 +62,20 @@ ret <1 x i128> %tmp ; CHECK-LE-LABEL: @v1i128_increment_by_val -; CHECK-LE-DAG: xxswapd [[PARAM1:[0-9]+]], 34 -; CHECK-LE-DAG: xxswapd [[PARAM2:[0-9]+]], 35 -; CHECK-LE-DAG: stxvd2x [[PARAM1]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-LE-DAG: stxvd2x [[PARAM2]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-LE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-LE: xxswapd 34, [[RESULT]] +; CHECK-LE-NOT: xxswapd +; CHECK-LE: adduqm 2, 2, 3 ; CHECK-LE: blr ; CHECK-BE-LABEL: @v1i128_increment_by_val ; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34 ; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 35 -; CHECK-BE-DAG: stxvd2x 34, {{[0-9]+}}, {{[0-9]+}} -; CHECK-BE-DAG: stxvd2x 35, {{[0-9]+}}, {{[0-9]+}} -; CHECK-BE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} ; CHECK-BE-NOT: xxswapd 34, [[RESULT]] +; CHECK-BE: adduqm 2, 2, 3 ; CHECK-BE: blr ; CHECK-NOVSX-LABEL: @v1i128_increment_by_val -; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}} -; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}} -; CHECK-NOVSX-DAG: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK-NOVSX-DAG: stvx 3, {{[0-9]+}}, {{[0-9]+}} -; CHECK-NOVSX: lvx [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} ; CHECK-NOVSX-NOT: xxswapd 34, [[RESULT]] +; CHECK-NOVSX: adduqm 2, 2, 3 ; CHECK-NOVSX: blr } Index: test/CodeGen/PowerPC/vec_add_sub_quadword.ll =================================================================== --- test/CodeGen/PowerPC/vec_add_sub_quadword.ll +++ test/CodeGen/PowerPC/vec_add_sub_quadword.ll @@ -0,0 +1,130 @@ +; Check VMX 128-bit integer operations +; +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +define <1 x i128> @test_add(<1 x i128> %x, <1 x i128> %y) nounwind { + %result = add <1 x i128> %x, %y + ret <1 x i128> %result +; CHECK-LABEL: @test_add +; CHECK: vadduqm 2, 2, 3 +} + +define <1 x i128> @increment_by_one(<1 x i128> %x) nounwind { + %result = add <1 x i128> %x, + ret <1 x i128> %result +; CHECK-LABEL: @increment_by_one +; CHECK vadduqm 2, 2, 3 +} + +define <1 x i128> @increment_by_val(<1 x i128> %x, i128 %val) nounwind { + %tmpvec = insertelement <1 x i128> , i128 %val, i32 0 + %tmpvec2 = insertelement <1 x i128> %tmpvec, i128 %val, i32 1 + %result = add <1 x i128> %x, %tmpvec2 + ret <1 x i128> %result +; CHECK-LABEL: @increment_by_val +; CHECK: vadduqm 2, 2, 3 +} + +define <1 x i128> @test_sub(<1 x i128> %x, <1 x i128> %y) nounwind { + %result = sub <1 x i128> %x, %y + ret <1 x i128> %result +; CHECK-LABEL: @test_sub +; CHECK: vsubuqm 2, 2, 3 +} + +define <1 x i128> @decrement_by_one(<1 x i128> %x) nounwind { + %result = sub <1 x i128> %x, + ret <1 x i128> %result +; CHECK-LABEL: @decrement_by_one +; CHECK vsubuqm 2, 2, 3 +} + +define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind { + %tmpvec = insertelement <1 x i128> , i128 %val, i32 0 + %tmpvec2 = insertelement <1 x i128> %tmpvec, i128 %val, i32 1 + %result = sub <1 x i128> %x, %tmpvec2 + ret <1 x i128> %result +; CHECK-LABEL: @decrement_by_val +; CHECK vsubuqm 2, 2, 3 +} + +declare <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x, + <1 x i128> %y) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x, + <1 x i128> %y) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind readnone + +define <1 x i128> @test_vaddeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) + ret <1 x i128> %tmp +; CHECK-LABEL: @test_vaddeuqm +; CHECK: vaddeuqm 2, 2, 3, 4 +} + +define <1 x i128> @test_vaddcuq(<1 x i128> %x, + <1 x i128> %y) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x, + <1 x i128> %y) + ret <1 x i128> %tmp +; CHECK-LABEL: @test_vaddcuq +; CHECK: vaddcuq 2, 2, 3 +} + +define <1 x i128> @test_vaddecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) + ret <1 x i128> %tmp +; CHECK-LABEL: @test_vaddecuq +; CHECK: vaddecuq 2, 2, 3, 4 +} + +define <1 x i128> @test_vsubeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vsubeuqm +; CHECK: vsubeuqm 2, 2, 3, 4 +} + +define <1 x i128> @test_vsubcuq(<1 x i128> %x, + <1 x i128> %y) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x, + <1 x i128> %y) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vsubcuq +; CHECK: vsubcuq 2, 2, 3 +} + +define <1 x i128> @test_vsubecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) nounwind { + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x, + <1 x i128> %y, + <1 x i128> %z) + ret <1 x i128> %tmp +; CHECK-LABEL: test_vsubecuq +; CHECK: vsubecuq 2, 2, 3, 4 +} + Index: test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -207,6 +207,18 @@ # CHECK: vadduws 2, 3, 4 0x10 0x43 0x22 0x80 +# CHECK: vadduqm 2, 3, 4 +0x10 0x43 0x21 0x00 + +# CHECK: vaddeuqm 2, 3, 4, 5 +0x10 0x43 0x21 0x7c + +# CHECK: vaddcuq 2, 3, 4 +0x10 0x43 0x21 0x40 + +# CHECK: vaddecuq 2, 3, 4, 5 +0x10 0x43 0x21 0x7d + # CHECK: vsubcuw 2, 3, 4 0x10 0x43 0x25 0x80 @@ -240,6 +252,18 @@ # CHECK: vsubuws 2, 3, 4 0x10 0x43 0x26 0x80 +# CHECK: vsubuqm 2, 3, 4 +0x10 0x43 0x25 0x00 + +# CHECK: vsubeuqm 2, 3, 4, 5 +0x10 0x43 0x21 0x7e + +# CHECK: vsubcuq 2, 3, 4 +0x10 0x43 0x25 0x40 + +# CHECK: vsubecuq 2, 3, 4, 5 +0x10 0x43 0x21 0x7f + # CHECK: vmulesb 2, 3, 4 0x10 0x43 0x23 0x08 Index: test/MC/PowerPC/ppc64-encoding-vmx.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-vmx.s +++ test/MC/PowerPC/ppc64-encoding-vmx.s @@ -237,7 +237,19 @@ # CHECK-BE: vadduws 2, 3, 4 # encoding: [0x10,0x43,0x22,0x80] # CHECK-LE: vadduws 2, 3, 4 # encoding: [0x80,0x22,0x43,0x10] vadduws 2, 3, 4 - +# CHECK-BE: vadduqm 2, 3, 4 # encoding: [0x10,0x43,0x21,0x00] +# CHECK-LE: vadduqm 2, 3, 4 # encoding: [0x00,0x21,0x43,0x10] + vadduqm 2, 3, 4 +# CHECK-BE: vaddeuqm 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7c] +# CHECK-LE: vaddeuqm 2, 3, 4, 5 # encoding: [0x7c,0x21,0x43,0x10] + vaddeuqm 2, 3, 4, 5 +# CHECK-BE: vaddcuq 2, 3, 4 # encoding: [0x10,0x43,0x21,0x40] +# CHECK-LE: vaddcuq 2, 3, 4 # encoding: [0x40,0x21,0x43,0x10] + vaddcuq 2, 3, 4 +# CHECK-BE: vaddecuq 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7d] +# CHECK-LE: vaddecuq 2, 3, 4, 5 # encoding: [0x7d,0x21,0x43,0x10] + vaddecuq 2, 3, 4, 5 + # CHECK-BE: vsubcuw 2, 3, 4 # encoding: [0x10,0x43,0x25,0x80] # CHECK-LE: vsubcuw 2, 3, 4 # encoding: [0x80,0x25,0x43,0x10] vsubcuw 2, 3, 4 @@ -271,7 +283,19 @@ # CHECK-BE: vsubuws 2, 3, 4 # encoding: [0x10,0x43,0x26,0x80] # CHECK-LE: vsubuws 2, 3, 4 # encoding: [0x80,0x26,0x43,0x10] vsubuws 2, 3, 4 - +# CHECK-BE: vsubuqm 2, 3, 4 # encoding: [0x10,0x43,0x25,0x00] +# CHECK-LE: vsubuqm 2, 3, 4 # encoding: [0x00,0x25,0x43,0x10] + vsubuqm 2, 3, 4 +# CHECK-BE: vsubeuqm 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7e] +# CHECK-LE: vsubeuqm 2, 3, 4, 5 # encoding: [0x7e,0x21,0x43,0x10] + vsubeuqm 2, 3, 4, 5 +# CHECK-BE: vsubcuq 2, 3, 4 # encoding: [0x10,0x43,0x25,0x40] +# CHECK-LE: vsubcuq 2, 3, 4 # encoding: [0x40,0x25,0x43,0x10] + vsubcuq 2, 3, 4 +# CHECK-BE: vsubecuq 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7f] +# CHECK-LE: vsubecuq 2, 3, 4, 5 # encoding: [0x7f,0x21,0x43,0x10] + vsubecuq 2, 3, 4, 5 + # CHECK-BE: vmulesb 2, 3, 4 # encoding: [0x10,0x43,0x23,0x08] # CHECK-LE: vmulesb 2, 3, 4 # encoding: [0x08,0x23,0x43,0x10] vmulesb 2, 3, 4 Index: utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- utils/TableGen/IntrinsicEmitter.cpp +++ utils/TableGen/IntrinsicEmitter.cpp @@ -260,7 +260,8 @@ IIT_HALF_VEC_ARG = 29, IIT_SAME_VEC_WIDTH_ARG = 30, IIT_PTR_TO_ARG = 31, - IIT_VEC_OF_PTRS_TO_ELT = 32 + IIT_VEC_OF_PTRS_TO_ELT = 32, + IIT_I128 = 33 }; @@ -275,6 +276,7 @@ case 16: return Sig.push_back(IIT_I16); case 32: return Sig.push_back(IIT_I32); case 64: return Sig.push_back(IIT_I64); + case 128: return Sig.push_back(IIT_I128); } }