diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -335,6 +335,8 @@ BUILTIN(__builtin_altivec_vinshvrx, "V8UsV8UsULLiV8Us", "") BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiULLiV4Ui", "") BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiULLiV4Ui", "") +BUILTIN(__builtin_altivec_vec_replace_elt, "V4UiV4UiUiIi", "t") +BUILTIN(__builtin_altivec_vec_replace_unaligned, "V4UiV4UiUiIi", "t") // VSX built-ins. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14268,6 +14268,97 @@ Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); return Builder.CreateCall(F, {X, Undef}); } + case PPC::BI__builtin_altivec_vec_replace_elt: { + // The third argument of vec_replace_elt must be a compile time constant + // and will be emitted either to the vinsw or vinsd instruction. + ConstantInt *ArgCI = dyn_cast(Ops[2]); + assert(ArgCI && + "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw); + int64_t ConstArg = ArgCI->getSExtValue(); + Value *Call; + if (Ops[1]->getType()->getPrimitiveSizeInBits() == 32) { + // When the second argument is 32 bits, it can either be an integer or + // a float. The vinsw intrinsic is used in this case. + ConstArg *= 4; + // Fix the constant according to endianess. + if (getTarget().isLittleEndian()) + ConstArg = 12 - ConstArg; + Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg); + // Perform additional handling if the second argument is a float. + if (Ops[1]->getType()->isFloatTy()) { + Ops[0] = Builder.CreateBitCast(Ops[0], + llvm::FixedVectorType::get(Int32Ty, 4)); + Ops[1] = Builder.CreateBitCast(Ops[1], Int32Ty); + Call = Builder.CreateCall(F, Ops); + return Builder.CreateBitCast(Call, ResultType); + } + } else if (Ops[1]->getType()->getPrimitiveSizeInBits() == 64) { + // When the second argument is 64 bits, it can either be a long long or + // a double. The vinsd intrinsic is used in this case. + F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd); + ConstArg *= 8; + // Fix the constant according to endianness. + if (getTarget().isLittleEndian()) + ConstArg = 8 - ConstArg; + Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg); + // Perform additional handling if the second argument is a double. + if (Ops[1]->getType()->isDoubleTy()) { + Ops[0] = Builder.CreateBitCast(Ops[0], + llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); + Call = Builder.CreateCall(F, Ops); + return Builder.CreateBitCast(Call, + llvm::FixedVectorType::get(DoubleTy, 2)); + } + } + Call = Builder.CreateCall(F, Ops); + return Call; + } + case PPC::BI__builtin_altivec_vec_replace_unaligned: { + // The third argument of vec_replace_unaligned must be a compile time + // constant and will be emitted either to the vinsw or vinsd instruction. + ConstantInt *ArgCI = dyn_cast(Ops[2]); + assert(ArgCI && + "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw); + int64_t ConstArg = ArgCI->getSExtValue(); + Value *Call; + if (Ops[1]->getType()->getPrimitiveSizeInBits() == 32) { + // When the second argument is 32 bits, it can either be an integer or + // a float. The vinsw intrinsic is used in this case. + + // Fix the constant if we are on little endian. + if (getTarget().isLittleEndian()) + ConstArg = 12 - ConstArg; + Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg); + // Perform additional handling if the second argument is a float. + if (Ops[1]->getType()->isFloatTy()) { + Ops[0] = Builder.CreateBitCast(Ops[0], + llvm::FixedVectorType::get(Int32Ty, 4)); + Ops[1] = Builder.CreateBitCast(Ops[1], Int32Ty); + } + } else if (Ops[1]->getType()->getPrimitiveSizeInBits() == 64) { + // When the second argument is 64 bits, it can either be a long long or + // a double. The vinsd intrinsic is used in this case. + F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd); + // Fix the constant if we are on little endian. + if (getTarget().isLittleEndian()) + ConstArg = 8 - ConstArg; + Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg); + // Perform additional handling if the second argument is a double. + if (Ops[1]->getType()->isDoubleTy()) { + Ops[0] = Builder.CreateBitCast(Ops[0], + llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); + } + } + // Emit the call, and bitcast the result to a vector of char. + Call = Builder.CreateCall(F, Ops); + Call = Builder.CreateBitCast(Call, llvm::FixedVectorType::get(Int8Ty, 16)); + return Call; + } case PPC::BI__builtin_altivec_vpopcntb: case PPC::BI__builtin_altivec_vpopcnth: case PPC::BI__builtin_altivec_vpopcntw: diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -17095,6 +17095,14 @@ return __builtin_vsx_xxblendvd(__a, __b, __c); } +/* vec_replace_elt */ + +#define vec_replace_elt __builtin_altivec_vec_replace_elt + +/* vec_replace_unaligned */ + +#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned + /* vec_splati */ #define vec_splati(__a) \ diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -24,10 +24,14 @@ vector unsigned __int128 vui128a, vui128b, vui128c; vector float vfa, vfb; vector double vda, vdb; +signed int sia; unsigned int uia, uib; unsigned char uca; unsigned short usa; +signed long long slla; unsigned long long ulla; +float fa; +double da; vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> @@ -593,3 +597,123 @@ // CHECK-NEXT: ret i32 return vec_test_lsbb_all_zeros(vuca); } + +vector signed int test_vec_replace_elt_si(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12 + // CHECK-NEXT: ret <4 x i32> + return vec_replace_elt(vsia, sia, 0); +} + +vector unsigned int test_vec_replace_elt_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8 + // CHECK-NEXT: ret <4 x i32> + return vec_replace_elt(vuia, uia, 1); +} + +vector float test_vec_replace_elt_f(void) { + // CHECK-BE: bitcast float %{{.+}} to i32 + // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-BE-NEXT: ret <4 x float> + // CHECK: bitcast float %{{.+}} to i32 + // CHECK-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4 + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-NEXT: ret <4 x float> + return vec_replace_elt(vfa, fa, 2); +} + +vector signed long long test_vec_replace_elt_sll(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-NEXT: ret <2 x i64> + return vec_replace_elt(vslla, slla, 0); +} + +vector unsigned long long test_vec_replace_elt_ull(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-NEXT: ret <2 x i64> + return vec_replace_elt(vulla, ulla, 0); +} + +vector double test_vec_replace_elt_d(void) { + // CHECK-BE: bitcast double %{{.+}} to i64 + // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-BE-NEXT: ret <2 x double> + // CHECK: bitcast double %{{.+}} to i64 + // CHECK-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-NEXT: ret <2 x double> + return vec_replace_elt(vda, da, 1); +} + +vector unsigned char test_vec_replace_unaligned_si(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6 + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_replace_unaligned(vsia, sia, 6); +} + +vector unsigned char test_vec_replace_unaligned_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4 + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_replace_unaligned(vuia, uia, 8); +} + +vector unsigned char test_vec_replace_unaligned_f(void) { + // CHECK-BE: bitcast float %{{.+}} to i32 + // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK: bitcast float %{{.+}} to i32 + // CHECK-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0 + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_replace_unaligned(vfa, fa, 12); +} + +vector unsigned char test_vec_replace_unaligned_sll(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2 + // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_replace_unaligned(vslla, slla, 6); +} + +vector unsigned char test_vec_replace_unaligned_ull(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1 + // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_replace_unaligned(vulla, ulla, 7); +} + +vector unsigned char test_vec_replace_unaligned_d(void) { + // CHECK-BE: bitcast double %{{.+}} to i64 + // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK: bitcast double %{{.+}} to i64 + // CHECK-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0 + // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_replace_unaligned(vda, da, 8); +}