Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -306,6 +306,10 @@ BUILTIN(__builtin_altivec_vclrlb, "V16cV16cUi", "") BUILTIN(__builtin_altivec_vclrrb, "V16cV16cUi", "") +// P10 Vector insert with immediate built-ins. +BUILTIN(__builtin_altivec_vinsw, "V4UiULLiIi", "") +BUILTIN(__builtin_altivec_vinsd, "V2ULLiULLiIi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16830,6 +16830,88 @@ return __builtin_altivec_vclrrb((vector signed char)__a, __n); #endif } + +/* vec_replace */ + +#ifdef __LITTLE_ENDIAN__ +#define vec_replace_elt(__a, __b, __c) \ + _Generic( \ + (__b), signed int \ + : (vector signed int)__builtin_altivec_vinsw((signed int)(__b), \ + (12 - ((__c)*4))), \ + unsigned int \ + : (vector unsigned int)__builtin_altivec_vinsw((unsigned int)(__b), \ + (12 - ((__c)*4))), \ + float \ + : (vector float)__builtin_altivec_vinsw((float)(__b), (12 - ((__c)*4))), \ + signed long long \ + : (vector signed long long)__builtin_altivec_vinsd( \ + (signed long long)(__b), (8 - ((__c)*8))), \ + unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vinsd( \ + (unsigned long long)(__b), (8 - ((__c)*8))), \ + double \ + : (vector double)__builtin_altivec_vinsd((double)(__b), \ + (8 - ((__c)*8)))) + +#define vec_replace_unaligned(__a, __b, __c) \ + _Generic((__b), signed int \ + : (vector unsigned char)__builtin_altivec_vinsw((signed int)(__b), \ + (12 - (__c))), \ + unsigned int \ + : (vector unsigned char)__builtin_altivec_vinsw( \ + (unsigned int)(__b), (12 - (__c))), \ + float \ + : (vector unsigned char)__builtin_altivec_vinsw((float)(__b), \ + (12 - (__c))), \ + signed long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + (signed long long)(__b), (8 - (__c))), \ + unsigned long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + (unsigned long long)(__b), (8 - (__c))), \ + double \ + : (vector unsigned char)__builtin_altivec_vinsd((double)(__b), \ + (8 - (__c)))) + +#else +#define vec_replace_elt(__a, __b, __c) \ + _Generic((__b), signed int \ + : (vector signed int)__builtin_altivec_vinsw((signed int)(__b), \ + ((__c)*4)), \ + unsigned int \ + : (vector unsigned int)__builtin_altivec_vinsw((unsigned int)(__b), \ + ((__c)*4)), \ + float \ + : (vector float)__builtin_altivec_vinsw((float)(__b), ((__c)*4)), \ + signed long long \ + : (vector signed long long)__builtin_altivec_vinsd( \ + (signed long long)(__b), ((__c)*8)), \ + unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vinsd( \ + (unsigned long long)(__b), ((__c)*8)), \ + double \ + : (vector double)__builtin_altivec_vinsd((double)(__b), ((__c)*8))) + +#define vec_replace_unaligned(__a, __b, __c) \ + _Generic( \ + (__b), signed int \ + : (vector unsigned char)__builtin_altivec_vinsw((signed int)(__b), \ + (__c)), \ + unsigned int \ + : (vector unsigned char)__builtin_altivec_vinsw((unsigned int)(__b), \ + (__c)), \ + float \ + : (vector unsigned char)__builtin_altivec_vinsw((float)(__b), (__c)), \ + signed long long \ + : (vector unsigned char)__builtin_altivec_vinsd((signed long long)(__b), \ + (__c)), \ + unsigned long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + (unsigned long long)(__b), (__c)), \ + double \ + : (vector unsigned char)__builtin_altivec_vinsd((double)(__b), (__c))) +#endif #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -3124,6 +3124,10 @@ SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); case PPC::BI__builtin_pack_vector_int128: return SemaVSXCheck(TheCall); + case PPC::BI__builtin_altivec_vinsw: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 12); + case PPC::BI__builtin_altivec_vinsd: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 8); } return SemaBuiltinConstantArgRange(TheCall, i, l, u); } Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -3,6 +3,14 @@ // RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \ // RUN: -o - | FileCheck %s +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ +// RUN: -target-cpu pwr10 -triple powerpc64-unknown-unknown -emit-llvm %s \ +// RUN: -o - | FileCheck %s -check-prefix=CHECK-BE + +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ +// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \ +// RUN: -o - | FileCheck %s -check-prefix=CHECK-LE + #include vector signed char vsca; @@ -10,7 +18,16 @@ vector unsigned short vusa; vector unsigned int vuia; vector unsigned long long vulla, vullb; +vector signed int vsia; +vector float vfa; +vector signed long long vslla; +vector double vda; unsigned int uia; +signed int sia; +float fa; +signed long long slla; +unsigned long long ulla; +double da; vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> @@ -79,3 +96,115 @@ // CHECK-LE-NEXT: ret <16 x i8> return vec_clrr(vuca, uia); } + +vector signed int test_vec_replace_elt_si(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: ret <4 x i32> + return vec_replace_elt(vsia, sia, 0); +} + +vector unsigned int test_vec_replace_elt_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: ret <4 x i32> + return vec_replace_elt(vuia, uia, 0); +} + +vector float test_vec_replace_elt_f(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-BE-NEXT: ret <4 x float> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-LE-NEXT: ret <4 x float> + return vec_replace_elt(vfa, fa, 0); +} + +vector signed long long test_vec_replace_elt_sll(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: ret <2 x i64> + return vec_replace_elt(vslla, slla, 0); +} + +vector unsigned long long test_vec_replace_elt_ull(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: ret <2 x i64> + return vec_replace_elt(vulla, ulla, 0); +} + +vector double test_vec_replace_elt_d(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-BE-NEXT: ret <2 x double> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-LE-NEXT: ret <2 x double> + return vec_replace_elt(vda, da, 0); +} + +vector unsigned char test_vec_replace_unaligned_si(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vsia, sia, 0); +} + +vector unsigned char test_vec_replace_unaligned_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vuia, uia, 0); +} + +vector unsigned char test_vec_replace_unaligned_f(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vfa, fa, 0); +} + +vector unsigned char test_vec_replace_unaligned_sll(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vslla, slla, 0); +} + +vector unsigned char test_vec_replace_unaligned_ull(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vulla, ulla, 0); +} + +vector unsigned char test_vec_replace_unaligned_d(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vda, da, 0); +} Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -425,6 +425,14 @@ def int_ppc_altivec_vclrrb : GCCBuiltin<"__builtin_altivec_vclrrb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + + // P10 Vector Insert with immediate. + def int_ppc_altivec_vinsw : GCCBuiltin<"__builtin_altivec_vinsw">, + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_ppc_altivec_vinsd : GCCBuiltin<"__builtin_altivec_vinsd">, + Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; } // Vector average. Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -206,6 +206,29 @@ isPCRel; } +// VX-Form: [PO VRT / UIM RB XO]. +// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent +// "/ UIM" (unused bit followed by a 4-bit immediate). +class VX_VRT5_UIM5_RB5 xo, string opc, list pattern> + : VXForm_1; + +class VXForm_RD5_MP_VB5 xo, bits<4> eo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + bit MP; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-14} = eo; + let Inst{15} = MP; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + def PrefixInstrs : Predicate<"PPCSubTarget->hasPrefixInstrs()">; def IsISA3_1 : Predicate<"PPCSubTarget->isISA3_1()">; @@ -552,6 +575,10 @@ "vclrrb $vD, $vA, $rB", IIC_VecGeneral, [(set v16i8:$vD, (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>; + def VINSW : VX_VRT5_UIM5_RB5<207, "vinsw", [(set v4i32:$VRT, (int_ppc_altivec_vinsw + i64:$RB, timm:$UIM))]>; + def VINSD : VX_VRT5_UIM5_RB5<463, "vinsd", [(set v2i64:$VRT, (int_ppc_altivec_vinsd + i64:$RB, timm:$UIM))]>; } //---------------------------- Anonymous Patterns ----------------------------// Index: llvm/test/CodeGen/PowerPC/p10-permute-ops.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-permute-ops.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s + +define <4 x i32> @testVINSW(i64 %a) { +; CHECK-LABEL: testVINSW: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsw v2, r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinsw(i64 %a, i32 1) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.altivec.vinsw(i64, i32 immarg) + +define <2 x i64> @testVINSD(i64 %a) { +; CHECK-LABEL: testVINSD: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsd v2, r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x i64> @llvm.ppc.altivec.vinsd(i64 %a, i32 1) + ret <2 x i64> %0 +} +declare <2 x i64> @llvm.ppc.altivec.vinsd(i64, i32 immarg) Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -30,3 +30,9 @@ # CHECK: vclrrb 1, 4, 3 0x10 0x24 0x19 0xcd + +# CHECK: vinsw 2, 3, 12 +0x10 0x4c 0x18 0xcf + +# CHECK: vinsd 2, 3, 12 +0x10 0x4c 0x19 0xcf Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -33,3 +33,9 @@ # CHECK-BE: vclrrb 1, 4, 3 # encoding: [0x10,0x24,0x19,0xcd] # CHECK-LE: vclrrb 1, 4, 3 # encoding: [0xcd,0x19,0x24,0x10] vclrrb 1, 4, 3 +# CHECK-BE: vinsw 2, 3, 12 # encoding: [0x10,0x4c,0x18,0xcf] +# CHECK-LE: vinsw 2, 3, 12 # encoding: [0xcf,0x18,0x4c,0x10] + vinsw 2, 3, 12 +# CHECK-BE: vinsd 2, 3, 12 # encoding: [0x10,0x4c,0x19,0xcf] +# CHECK-LE: vinsd 2, 3, 12 # encoding: [0xcf,0x19,0x4c,0x10] + vinsd 2, 3, 12