Index: include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- include/llvm/IR/IntrinsicsPowerPC.td +++ include/llvm/IR/IntrinsicsPowerPC.td @@ -608,6 +608,11 @@ def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; } def int_ppc_altivec_vexptefp : PowerPC_Vec_FF_Intrinsic<"vexptefp">; Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -1142,7 +1142,9 @@ def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB), (VPKUDUM $vB, $vA)>; - +def VGBBD : VX2_Int_Ty2<1292, "vgbbd", int_ppc_altivec_vgbbd, v16i8, v16i8>; +def VBPERMQ : VX1_Int_Ty2<1356, "vbpermq", int_ppc_altivec_vbpermq, + v2i64, v16i8>; } // end HasP8Altivec // Crypto instructions (from builtins) Index: test/CodeGen/PowerPC/builtins-ppc-p8vector.ll =================================================================== --- test/CodeGen/PowerPC/builtins-ppc-p8vector.ll +++ test/CodeGen/PowerPC/builtins-ppc-p8vector.ll @@ -0,0 +1,91 @@ +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+power8-vector -mattr=-vsx < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-VSX + +@vsc = global <16 x i8> , align 16 +@vuc = global <16 x i8> , align 16 +@res_vll = common global <2 x i64> zeroinitializer, align 16 +@res_vull = common global <2 x i64> zeroinitializer, align 16 +@res_vsc = common global <16 x i8> zeroinitializer, align 16 +@res_vuc = common global <16 x i8> zeroinitializer, align 16 + +; Function Attrs: nounwind +define void @test1() { +entry: + %__a.addr.i = alloca <16 x i8>, align 16 + %__b.addr.i = alloca <16 x i8>, align 16 + %0 = load <16 x i8>, <16 x i8>* @vsc, align 16 + %1 = load <16 x i8>, <16 x i8>* @vsc, align 16 + store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16 + store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16 + %2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16 + %3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16 + %4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3) + store <2 x i64> %4, <2 x i64>* @res_vll, align 16 + ret void +; CHECK-LABEL: @test1 +; CHECK: lvx [[REG1:[0-9]+]], +; CHECK: lvx [[REG2:[0-9]+]], +; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]] +; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +; Function Attrs: nounwind +define void @test2() { +entry: + %__a.addr.i = alloca <16 x i8>, align 16 + %__b.addr.i = alloca <16 x i8>, align 16 + %0 = load <16 x i8>, <16 x i8>* @vuc, align 16 + %1 = load <16 x i8>, <16 x i8>* @vuc, align 16 + store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16 + store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16 + %2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16 + %3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16 + %4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3) + store <2 x i64> %4, <2 x i64>* @res_vull, align 16 + ret void +; CHECK-LABEL: @test2 +; CHECK: lvx [[REG1:[0-9]+]], +; CHECK: lvx [[REG2:[0-9]+]], +; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]] +; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +; Function Attrs: nounwind +define void @test3() { +entry: + %__a.addr.i = alloca <16 x i8>, align 16 + %0 = load <16 x i8>, <16 x i8>* @vsc, align 16 + store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16 + %1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @res_vsc, align 16 + ret void +; CHECK-LABEL: @test3 +; CHECK: lvx [[REG1:[0-9]+]], +; CHECK: vgbbd {{[0-9]+}}, [[REG1]] +; CHECK-VSX: vgbbd {{[0-9]+}}, {{[0-9]+}} +} + +; Function Attrs: nounwind +define void @test4() { +entry: + %__a.addr.i = alloca <16 x i8>, align 16 + %0 = load <16 x i8>, <16 x i8>* @vuc, align 16 + store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16 + %1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @res_vuc, align 16 + ret void +; CHECK-LABEL: @test4 +; CHECK: lvx [[REG1:[0-9]+]], +; CHECK: vgbbd {{[0-9]+}}, [[REG1]] +; CHECK-VSX: vgbbd {{[0-9]+}}, {{[0-9]+}} +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8>, <16 x i8>) + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8>) Index: test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -630,6 +630,12 @@ # CHECK: vrsqrtefp 2, 3 0x10 0x40 0x19 0x4a +# CHECK: vgbbd 2, 3 +0x10 0x40 0x1d 0x0c + +# CHECK: vbpermq 2, 5, 17 +0x10 0x45 0x8d 0x4c + # CHECK: vclzb 2, 3 0x10 0x40 0x1f 0x02 Index: test/MC/PowerPC/ppc64-encoding-vmx.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-vmx.s +++ test/MC/PowerPC/ppc64-encoding-vmx.s @@ -686,6 +686,12 @@ # CHECK-BE: vrsqrtefp 2, 3 # encoding: [0x10,0x40,0x19,0x4a] # CHECK-LE: vrsqrtefp 2, 3 # encoding: [0x4a,0x19,0x40,0x10] vrsqrtefp 2, 3 +# CHECK-BE: vgbbd 2, 3 # encoding: [0x10,0x40,0x1d,0x0c] +# CHECK-LE: vgbbd 2, 3 # encoding: [0x0c,0x1d,0x40,0x10] + vgbbd 2, 3 +# CHECK-BE: vbpermq 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x4c] +# CHECK-LE: vbpermq 2, 5, 17 # encoding: [0x4c,0x8d,0x45,0x10] + vbpermq 2, 5, 17 # Vector count leading zero instructions # CHECK-BE: vclzb 2, 3 # encoding: [0x10,0x40,0x1f,0x02]