Index: include/clang/Basic/BuiltinsPPC.def =================================================================== --- include/clang/Basic/BuiltinsPPC.def +++ include/clang/Basic/BuiltinsPPC.def @@ -417,6 +417,9 @@ BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "") +BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "") +BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -35,6 +35,11 @@ using namespace CodeGen; using namespace llvm; +static +int64_t clamp(int64_t value, int64_t low, int64_t high) { + return std::min(high, std::max(low, value)); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -8168,6 +8173,67 @@ llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } + case PPC::BI__builtin_vsx_insertword: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); + + // Third argument is a compile time constant int. It must be clamped to + // to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast(Ops[2]); + assert(ArgCI); + int64_t index = clamp(ArgCI->getSExtValue(), 0, 12); + + // Need to cast the second argument from a vector of cahr to a vector + // of long long. + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + + if(getTarget().isLittleEndian()) { + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2]; + ShuffleElts[0] = ConstantInt::get(Int32Ty, 1); + ShuffleElts[1] = ConstantInt::get(Int32Ty, 0); + Constant* ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + // Reverse the double words in the second argument. + Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], ShuffleMask); + + // reverse the index + index = 12 - index; + } + + Ops[2] = ConstantInt::getSigned(Int32Ty, index); + return Builder.CreateCall(F, Ops); + } + case PPC::BI__builtin_vsx_extractuword: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); + + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + + // The second argument is a compile time constant int that needs to + // be clamped to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast(Ops[1]); + assert(ArgCI); + int64_t index = clamp(ArgCI->getSExtValue(), 0, 12); + + if(getTarget().isLittleEndian()) { + // Reverse the index + index = 12 - index; + Ops[1] = ConstantInt::getSigned(Int32Ty, index); + + // Emit the call, then reverse the double words of the results vector. + Value *Call = Builder.CreateCall(F, Ops); + + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2]; + ShuffleElts[0] = ConstantInt::get(Int32Ty, 1); + ShuffleElts[1] = ConstantInt::get(Int32Ty, 0); + Constant* ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + Value* ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); + return ShuffleCall; + } else { + Ops[1] = ConstantInt::getSigned(Int32Ty, index); + return Builder.CreateCall(F, Ops); + } + } } } Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -12456,6 +12456,9 @@ #ifdef __POWER9_VECTOR__ +#define vec_insert4b __builtin_vsx_insertword +#define vec_extract4b __builtin_vsx_extractuword + /* vec_extract_exp */ static __inline__ vector unsigned int __ATTRS_o_ai Index: test/CodeGen/builtins-ppc-p9vector.c =================================================================== --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -1180,3 +1180,27 @@ // CHECK-LE-NEXT: ret <4 x float> return vec_extract_fp32_from_shortl(vusa); } +vector unsigned char test116(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 7) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK-LE: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-LE-NEXT: [[T2:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> [[T1]], i32 5) +// CHECK-LE-NEXT: bitcast <4 x i32> T2 to <16 x i8> + return vec_insert4b(vuia, vuca, 7); +} +vector unsigned char test117(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 5) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK-LE: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-LE-NEXT: [[T2:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> [[T1]], i32 7) +// CHECK-LE-NEXT: bitcast <4 x i32> T2 to <16 x i8> + return vec_insert4b(vsia, vuca, 5); +} +vector unsigned long long test118(void) { +// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 11) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK-LE: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 1) +// CHECK-LE-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> +// CHECK-LE-NEXT: ret <2 x i64> + return vec_extract4b(vuca, 11); +}