Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -306,6 +306,10 @@ BUILTIN(__builtin_altivec_vclrlb, "V16cV16cUi", "") BUILTIN(__builtin_altivec_vclrrb, "V16cV16cUi", "") +// P10 Vector Shift built-ins. +BUILTIN(__builtin_altivec_vsldbi, "V16UcV16UcV16UcIi", "") +BUILTIN(__builtin_altivec_vsrdbi, "V16UcV16UcV16UcIi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16830,6 +16830,62 @@ return __builtin_altivec_vclrrb((vector signed char)__a, __n); #endif } + +/* vec_sldbi */ + +#define vec_sldb(__a, __b, __c) \ + _Generic((__a), vector signed char \ + : (vector signed char)__builtin_altivec_vsldbi( \ + (vector signed char)(__a), (__b), (__c)), \ + vector unsigned char \ + : (vector unsigned char)__builtin_altivec_vsldbi( \ + (vector unsigned char)(__a), (__b), (__c)), \ + vector signed short \ + : (vector signed short)__builtin_altivec_vsldbi( \ + (vector signed short)(__a), (__b), (__c)), \ + vector unsigned short \ + : (vector unsigned short)__builtin_altivec_vsldbi( \ + (vector unsigned short)(__a), (__b), (__c)), \ + vector signed int \ + : (vector signed int)__builtin_altivec_vsldbi( \ + (vector signed int)(__a), (__b), (__c)), \ + vector unsigned int \ + : (vector unsigned int)__builtin_altivec_vsldbi( \ + (vector unsigned int)(__a), (__b), (__c)), \ + vector signed long long \ + : (vector signed int)__builtin_altivec_vsldbi( \ + (vector signed int)(__a), (__b), (__c)), \ + vector unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vsldbi( \ + (vector unsigned long long)(__a), (__b), (__c))) + +/* vec_srdbi */ + +#define vec_srdb(__a, __b, __c) \ + _Generic((__a), vector signed char \ + : (vector signed char)__builtin_altivec_vsrdbi( \ + (vector signed char)(__a), (__b), (__c)), \ + vector unsigned char \ + : (vector unsigned char)__builtin_altivec_vsrdbi( \ + (vector unsigned char)(__a), (__b), (__c)), \ + vector signed short \ + : (vector signed short)__builtin_altivec_vsrdbi( \ + (vector signed short)(__a), (__b), (__c)), \ + vector unsigned short \ + : (vector unsigned short)__builtin_altivec_vsrdbi( \ + (vector unsigned short)(__a), (__b), (__c)), \ + vector signed int \ + : (vector signed int)__builtin_altivec_vsrdbi( \ + (vector signed int)(__a), (__b), (__c)), \ + vector unsigned int \ + : (vector unsigned int)__builtin_altivec_vsrdbi( \ + (vector unsigned int)(__a), (__b), (__c)), \ + vector signed long long \ + : (vector signed int)__builtin_altivec_vsrdbi( \ + (vector signed int)(__a), (__b), (__c)), \ + vector unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vsrdbi( \ + (vector unsigned long long)(__a), (__b), (__c))) #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -5,10 +5,13 @@ #include -vector signed char vsca; -vector unsigned char vuca; -vector unsigned short vusa; -vector unsigned int vuia; +vector signed char vsca, vscb; +vector unsigned char vuca, vucb; +vector unsigned short vssa, vssb; +vector unsigned short vusa, vusb; +vector signed int vsia, vsib; +vector unsigned int vuia, vuib; +vector signed long long vslla, vsllb; vector unsigned long long vulla, vullb; unsigned int uia; @@ -79,3 +82,113 @@ // CHECK-LE-NEXT: ret <16 x i8> return vec_clrr(vuca, uia); } + +vector signed char test_vec_sldb_sc(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_sldb(vsca, vscb, 0); +} + +vector unsigned char test_vec_sldb_uc(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_sldb(vuca, vucb, 0); +} + +vector signed short test_vec_sldb_ss(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_sldb(vssa, vssb, 0); +} + +vector unsigned short test_vec_sldb_us(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_sldb(vusa, vusb, 0); +} + +vector signed int test_vec_sldb_si(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_sldb(vsia, vsib, 0); +} + +vector unsigned int test_vec_sldb_ui(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_sldb(vuia, vuib, 0); +} + +vector signed long long test_vec_sldb_sll(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_sldb(vslla, vsllb, 0); +} + +vector unsigned long long test_vec_sldb_ull(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_sldb(vulla, vullb, 0); +} + +vector signed char test_vec_srdb_sc(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_srdb(vsca, vscb, 0); +} + +vector unsigned char test_vec_srdb_uc(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_srdb(vuca, vucb, 0); +} + +vector signed short test_vec_srdb_ss(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_srdb(vssa, vssb, 0); +} + +vector unsigned short test_vec_srdb_us(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_srdb(vusa, vusb, 0); +} + +vector signed int test_vec_srdb_si(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_srdb(vsia, vsib, 0); +} + +vector unsigned int test_vec_srdb_ui(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_srdb(vuia, vuib, 0); +} + +vector signed long long test_vec_srdb_sll(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_srdb(vslla, vsllb, 0); +} + +vector unsigned long long test_vec_srdb_ull(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_srdb(vulla, vullb, 0); +} Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -425,6 +425,14 @@ def int_ppc_altivec_vclrrb : GCCBuiltin<"__builtin_altivec_vclrrb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + + // P10 Vector Shift Double Bit Immediate. + def int_ppc_altivec_vsldbi : GCCBuiltin<"__builtin_altivec_vsldbi">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_ppc_altivec_vsrdbi : GCCBuiltin<"__builtin_altivec_vsrdbi">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; } // Vector average. Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -177,6 +177,26 @@ let Inst{31} = XT{5}; } +// VN-Form: [PO VRT VRA VRB PS SD XO] +// SD is "Shift Direction" . +class VNForm_VTAB5_SD3 xo, bits<2> ps, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VRT; + bits<5> VRA; + bits<5> VRB; + bits<3> SD; + + let Pattern = pattern; + + let Inst{6-10} = VRT; + let Inst{11-15} = VRA; + let Inst{16-20} = VRB; + let Inst{21-22} = ps; + let Inst{23-25} = SD; + let Inst{26-31} = xo; +} + multiclass MLS_DForm_R_SI34_RTA5_MEM_p opcode, dag OOL, dag IOL, dag PCRel_IOL, string asmstr, InstrItinClass itin> { @@ -552,6 +572,16 @@ "vclrrb $vD, $vA, $rB", IIC_VecGeneral, [(set v16i8:$vD, (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>; + def VSLDBI : + VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), + "vsldbi $VRT, $VRA, $VRB, $SH", IIC_VecGeneral, + [(set v16i8:$VRT, + (int_ppc_altivec_vsldbi v16i8:$VRA, v16i8:$VRB, i32:$SH))]>; + def VSRDBI : + VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), + "vsrdbi $VRT, $VRA, $VRB, $SH", IIC_VecGeneral, + [(set v16i8:$VRT, + (int_ppc_altivec_vsrdbi v16i8:$VRA, v16i8:$VRB, i32:$SH))]>; } //---------------------------- Anonymous Patterns ----------------------------// Index: llvm/test/CodeGen/PowerPC/p10-permute-ops.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-permute-ops.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s + +define <16 x i8> @testVSLDBI(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: testVSLDBI: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldbi v2, v2, v3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.vsldbi(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.altivec.vsldbi(<16 x i8>, <16 x i8>, i32 immarg) + +define <16 x i8> @testVSRDBI(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: testVSRDBI: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrdbi v2, v2, v3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.vsrdbi(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.altivec.vsrdbi(<16 x i8>, <16 x i8>, i32 immarg) Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -30,3 +30,9 @@ # CHECK: vclrrb 1, 4, 3 0x10 0x24 0x19 0xcd + +# CHECK: vsldbi 2, 3, 4, 5 +0x10 0x43 0x21 0x56 + +# CHECK: vsrdbi 2, 3, 4, 5 +0x10 0x43 0x23 0x56 Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -33,3 +33,9 @@ # CHECK-BE: vclrrb 1, 4, 3 # encoding: [0x10,0x24,0x19,0xcd] # CHECK-LE: vclrrb 1, 4, 3 # encoding: [0xcd,0x19,0x24,0x10] vclrrb 1, 4, 3 +# CHECK-BE: vsldbi 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x56] +# CHECK-LE: vsldbi 2, 3, 4, 5 # encoding: [0x56,0x21,0x43,0x10] + vsldbi 2, 3, 4, 5 +# CHECK-BE: vsrdbi 2, 3, 4, 5 # encoding: [0x10,0x43,0x23,0x56] +# CHECK-LE: vsrdbi 2, 3, 4, 5 # encoding: [0x56,0x23,0x43,0x10] + vsrdbi 2, 3, 4, 5