Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -336,6 +336,10 @@ BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiULLiV4Ui", "") BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiULLiV4Ui", "") +// P10 Vector insert with immediate built-ins. +BUILTIN(__builtin_altivec_vinsw, "V4UiV4UiULLiIi", "") +BUILTIN(__builtin_altivec_vinsd, "V2ULLiV2ULLiULLiIi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -17027,6 +17027,108 @@ #endif } +/* vec_replace_elt */ + +#define F2L(x) \ + ((union { \ + float f; \ + unsigned int ui; \ + })(x)) \ + .ui +#define DP2LL(x) \ + ((union { \ + double d; \ + unsigned long long ull; \ + })(x)) \ + .ull + + +#ifdef __LITTLE_ENDIAN__ +#define vec_replace_elt(__a, __b, __c) \ + _Generic((__b), signed int \ + : (vector signed int)__builtin_altivec_vinsw( \ + __a, (signed int)(__b), (12 - ((__c)*4))), \ + unsigned int \ + : (vector unsigned int)__builtin_altivec_vinsw( \ + __a, (unsigned int)(__b), (12 - ((__c)*4))), \ + float \ + : (vector float)__builtin_altivec_vinsw(__a, F2L((float)(__b)), \ + (12 - ((__c)*4))), \ + signed long long \ + : (vector signed long long)__builtin_altivec_vinsd( \ + __a, (signed long long)(__b), (8 - ((__c)*8))), \ + unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vinsd( \ + __a, (unsigned long long)(__b), (8 - ((__c)*8))), \ + double \ + : (vector double)__builtin_altivec_vinsd(__a, DP2LL((double)(__b)), \ + (8 - ((__c)*8)))) +#else +#define vec_replace_elt(__a, __b, __c) \ + _Generic((__b), signed int \ + : (vector signed int)__builtin_altivec_vinsw( \ + __a, (signed int)(__b), ((__c)*4)), \ + unsigned int \ + : (vector unsigned int)__builtin_altivec_vinsw( \ + __a, (unsigned int)(__b), ((__c)*4)), \ + float \ + : (vector float)__builtin_altivec_vinsw(__a, F2L((float)(__b)), \ + ((__c)*4)), \ + signed long long \ + : (vector signed long long)__builtin_altivec_vinsd( \ + __a, (signed long long)(__b), ((__c)*8)), \ + unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vinsd( \ + __a, (unsigned long long)(__b), ((__c)*8)), \ + double \ + : (vector double)__builtin_altivec_vinsd(__a, DP2LL((double)(__b)), \ + ((__c)*8))) +#endif + +/* vec_replace_unaligned */ + +#ifdef __LITTLE_ENDIAN__ +#define vec_replace_unaligned(__a, __b, __c) \ + _Generic((__b), signed int \ + : (vector unsigned char)__builtin_altivec_vinsw( \ + __a, (signed int)(__b), (12 - (__c))), \ + unsigned int \ + : (vector unsigned char)__builtin_altivec_vinsw( \ + __a, (unsigned int)(__b), (12 - (__c))), \ + float \ + : (vector unsigned char)__builtin_altivec_vinsw( \ + __a, F2L((float)(__b)), (12 - (__c))), \ + signed long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + __a, (signed long long)(__b), (8 - (__c))), \ + unsigned long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + __a, (unsigned long long)(__b), (8 - (__c))), \ + double \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + __a, DP2LL((double)(__b)), (8 - (__c)))) +#else +#define vec_replace_unaligned(__a, __b, __c) \ + _Generic((__b), signed int \ + : (vector unsigned char)__builtin_altivec_vinsw( \ + __a, (signed int)(__b), (__c)), \ + unsigned int \ + : (vector unsigned char)__builtin_altivec_vinsw( \ + __a, (unsigned int)(__b), (__c)), \ + float \ + : (vector unsigned char)__builtin_altivec_vinsw( \ + __a, F2L((float)(__b)), (__c)), \ + signed long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + __a, (signed long long)(__b), (__c)), \ + unsigned long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + __a, (unsigned long long)(__b), (__c)), \ + double \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + __a, DP2LL((double)(__b)), (__c))) +#endif + #ifdef __VSX__ /* vec_permx */ Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -1,16 +1,12 @@ // REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ // RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \ -// RUN: -o - | FileCheck %s +// RUN: -o - | FileCheck %s -check-prefixes=CHECK,CHECK-LE // RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ // RUN: -target-cpu pwr10 -triple powerpc64-unknown-unknown -emit-llvm %s \ // RUN: -o - | FileCheck %s -check-prefix=CHECK-BE -// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ -// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \ -// RUN: -o - | FileCheck %s -check-prefix=CHECK-LE - #include vector signed char vsca, vscb; @@ -28,6 +24,10 @@ unsigned char uca; unsigned short usa; unsigned long long ulla; +signed int sia; +signed long long slla; +float fa; +double da; vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> @@ -512,3 +512,115 @@ // CHECK-LE-NEXT: ret <4 x i32> return vec_inserth(vuia, vuib, uia); } + +vector signed int test_vec_replace_elt_si(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 12 + // CHECK-LE-NEXT: ret <4 x i32> + return vec_replace_elt(vsia, sia, 0); +} + +vector unsigned int test_vec_replace_elt_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 4 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-LE-NEXT: ret <4 x i32> + return vec_replace_elt(vuia, uia, 1); +} + +vector float test_vec_replace_elt_f(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-BE-NEXT: ret <4 x float> + // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 4 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-LE-NEXT: ret <4 x float> + return vec_replace_elt(vfa, fa, 2); +} + +vector signed long long test_vec_replace_elt_sll(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-LE-NEXT: ret <2 x i64> + return vec_replace_elt(vslla, slla, 0); +} + +vector unsigned long long test_vec_replace_elt_ull(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-LE-NEXT: ret <2 x i64> + return vec_replace_elt(vulla, ulla, 0); +} + +vector double test_vec_replace_elt_d(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-BE: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-BE-NEXT: ret <2 x double> + // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-LE: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-LE-NEXT: ret <2 x double> + return vec_replace_elt(vda, da, 1); +} + +vector unsigned char test_vec_replace_unaligned_si(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 6 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 6 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vsia, sia, 6); +} + +vector unsigned char test_vec_replace_unaligned_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 4 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vuia, uia, 8); +} + +vector unsigned char test_vec_replace_unaligned_f(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 12 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vfa, fa, 12); +} + +vector unsigned char test_vec_replace_unaligned_sll(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vslla, slla, 6); +} + +vector unsigned char test_vec_replace_unaligned_ull(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vulla, ulla, 7); +} + +vector unsigned char test_vec_replace_unaligned_d(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vda, da, 8); +} Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -522,6 +522,16 @@ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i64_ty, llvm_v4i32_ty], [IntrNoMem]>; + + // P10 Vector Insert with immediate. + def int_ppc_altivec_vinsw : GCCBuiltin<"__builtin_altivec_vinsw">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_ppc_altivec_vinsd : GCCBuiltin<"__builtin_altivec_vinsd">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; } // Vector average. Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -775,18 +775,26 @@ (int_ppc_altivec_vsrdbi v16i8:$VRA, v16i8:$VRB, i32:$SH))]>; - def VINSW : VXForm_VRT5_UIM5_RB5_ins<207, "vinsw", []>; - def VINSD : VXForm_VRT5_UIM5_RB5_ins<463, "vinsd", []>; + def VINSW : + VXForm_VRT5_UIM5_RB5_ins<207, "vinsw", + [(set v4i32:$vD, + (int_ppc_altivec_vinsw v4i32:$vDi, i64:$rB, + timm:$UIM))]>; + def VINSD : + VXForm_VRT5_UIM5_RB5_ins<463, "vinsd", + [(set v2i64:$vD, + (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, + timm:$UIM))]>; def VINSBVLX : VXForm_VTB5_RA5_ins<15, "vinsbvlx", - [(set v16i8:$vD, - (int_ppc_altivec_vinsbvlx v16i8:$vDi, i64:$rA, - v16i8:$vB))]>; + [(set v16i8:$vD, + (int_ppc_altivec_vinsbvlx v16i8:$vDi, i64:$rA, + v16i8:$vB))]>; def VINSBVRX : VXForm_VTB5_RA5_ins<271, "vinsbvrx", - [(set v16i8:$vD, - (int_ppc_altivec_vinsbvrx v16i8:$vDi, i64:$rA, - v16i8:$vB))]>; + [(set v16i8:$vD, + (int_ppc_altivec_vinsbvrx v16i8:$vDi, i64:$rA, + v16i8:$vB))]>; def VINSHVLX : VXForm_VTB5_RA5_ins<79, "vinshvlx", [(set v8i16:$vD, Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll =================================================================== --- llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll +++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll @@ -231,3 +231,25 @@ ret <4 x i32> %0 } declare <4 x i32> @llvm.ppc.altivec.vinswvrx(<4 x i32>, i64, <4 x i32>) + +define <4 x i32> @testVINSW(<4 x i32> %a, i64 %b) { +; CHECK-LABEL: testVINSW: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsw v2, r5, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32> %a, i64 %b, i32 1) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32>, i64, i32 immarg) + +define <2 x i64> @testVINSD(<2 x i64> %a, i64 %b) { +; CHECK-LABEL: testVINSD: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsd v2, r5, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x i64> @llvm.ppc.altivec.vinsd(<2 x i64> %a, i64 %b, i32 1) + ret <2 x i64> %0 +} +declare <2 x i64> @llvm.ppc.altivec.vinsd(<2 x i64>, i64, i32 immarg)