diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -643,6 +643,9 @@ BUILTIN(__builtin_alloca_with_align, "v*zIz", "Fn") BUILTIN(__builtin_call_with_static_chain, "v.", "nt") +BUILTIN(__builtin_elementwise_max, "v.", "nct") +BUILTIN(__builtin_elementwise_min, "v.", "nct") + BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_store, "v.", "nFt") diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11303,6 +11303,9 @@ "%select{non-pointer|function pointer|void pointer}0 argument to " "'__builtin_launder' is not allowed">; +def err_builtin_invalid_arg_type: Error < + "%ordinal0 argument must be a %1 (was %2)">; + def err_builtin_matrix_disabled: Error< "matrix types extension is disabled. Pass -fenable-matrix to enable it">; def err_matrix_index_not_integer: Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12724,6 +12724,8 @@ bool CheckPPCMMAType(QualType Type, SourceLocation TypeLoc); + bool SemaBuiltinElementwiseMath(CallExpr *TheCall); + // Matrix builtin handling. ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall, ExprResult CallResult); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3101,6 +3101,39 @@ return RValue::get(V); } + case Builtin::BI__builtin_elementwise_max: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result; + if (Op0->getType()->isIntOrIntVectorTy()) { + QualType Ty = E->getArg(0)->getType(); + if (auto *VecTy = Ty->getAs()) + Ty = VecTy->getElementType(); + Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() + ? llvm::Intrinsic::smax + : llvm::Intrinsic::umax, + Op0, Op1, nullptr, "elt.max"); + } else + Result = Builder.CreateMaxNum(Op0, Op1, "elt.max"); + return RValue::get(Result); + } + case Builtin::BI__builtin_elementwise_min: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result; + if (Op0->getType()->isIntOrIntVectorTy()) { + QualType Ty = E->getArg(0)->getType(); + if (auto *VecTy = Ty->getAs()) + Ty = VecTy->getElementType(); + Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() + ? llvm::Intrinsic::smin + : llvm::Intrinsic::umin, + Op0, Op1, nullptr, "elt.min"); + } else + Result = Builder.CreateMinNum(Op0, Op1, "elt.min"); + return RValue::get(Result); + } + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs(); Value *MatValue = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1976,6 +1976,11 @@ break; } + case Builtin::BI__builtin_elementwise_min: + case Builtin::BI__builtin_elementwise_max: + if (SemaBuiltinElementwiseMath(TheCall)) + return ExprError(); + break; case Builtin::BI__builtin_matrix_transpose: return SemaBuiltinMatrixTranspose(TheCall, TheCallResult); @@ -16518,6 +16523,49 @@ _2, _3, _4)); } +// Check if \p Ty is a valid type for the elementwise math builtins. If it is +// not a valid type, emit an error message and return true. Otherwise return +// false. +static bool checkMathBuiltinElementType(Sema &S, SourceLocation Loc, + QualType Ty) { + if (!Ty->getAs() && !ConstantMatrixType::isValidElementType(Ty)) { + S.Diag(Loc, diag::err_builtin_invalid_arg_type) + << 1 << "vector, integer or floating point type" << Ty; + return true; + } + return false; +} + +bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) { + if (checkArgCount(*this, TheCall, 2)) + return true; + + ExprResult A = TheCall->getArg(0); + ExprResult B = TheCall->getArg(1); + // Do standard promotions between the two arguments, returning their common + // type. + QualType Res = + UsualArithmeticConversions(A, B, TheCall->getExprLoc(), ACK_Comparison); + if (A.isInvalid() || B.isInvalid()) + return true; + + QualType TyA = A.get()->getType(); + QualType TyB = B.get()->getType(); + + if (Res.isNull() || TyA.getCanonicalType() != TyB.getCanonicalType()) + return Diag(A.get()->getBeginLoc(), + diag::err_typecheck_call_different_arg_types) + << TyA << TyB; + + if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA)) + return true; + + TheCall->setArg(0, A.get()); + TheCall->setArg(1, B.get()); + TheCall->setType(Res); + return false; +} + ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall, ExprResult CallResult) { if (checkArgCount(*this, TheCall, 1)) diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -0,0 +1,149 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef short int si8 __attribute__((ext_vector_type(8))); +typedef unsigned int u4 __attribute__((ext_vector_type(4))); + +__attribute__((address_space(1))) int int_as_one; +typedef int bar; +bar b; + +void test_builtin_elementwise_max(float f1, float f2, double d1, double d2, + float4 vf1, float4 vf2, long long int i1, + long long int i2, si8 vi1, si8 vi2, + unsigned u1, unsigned u2, u4 vu1, u4 vu2) { + // CHECK-LABEL: define void @test_builtin_elementwise_max( + + // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 + // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4 + // CHECK-NEXT: call float @llvm.maxnum.f32(float %0, float %1) + f1 = __builtin_elementwise_max(f1, f2); + + // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 + // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8 + // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]]) + d1 = __builtin_elementwise_max(d1, d2); + + // CHECK: [[D2:%.+]] = load double, double* %d2.addr, align 8 + // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]]) + d1 = __builtin_elementwise_max(20.0, d2); + + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 + // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 + // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) + vf1 = __builtin_elementwise_max(vf1, vf2); + + // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 + // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 + // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]]) + i1 = __builtin_elementwise_max(i1, i2); + + // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 + // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10) + i1 = __builtin_elementwise_max(i1, 10); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 + // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) + vi1 = __builtin_elementwise_max(vi1, vi2); + + // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 + // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 + // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]]) + u1 = __builtin_elementwise_max(u1, u2); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 + // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) + vu1 = __builtin_elementwise_max(vu1, vu2); + + // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 + // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 + // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) + const float4 cvf1 = vf1; + vf1 = __builtin_elementwise_max(cvf1, vf2); + + // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 + // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 + // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) + vf1 = __builtin_elementwise_max(vf2, cvf1); + + // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 + // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 + // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]]) + int_as_one = __builtin_elementwise_max(int_as_one, b); + + // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97) + i1 = __builtin_elementwise_max(1, 'a'); +} + +void test_builtin_elementwise_min(float f1, float f2, double d1, double d2, + float4 vf1, float4 vf2, long long int i1, + long long int i2, si8 vi1, si8 vi2, + unsigned u1, unsigned u2, u4 vu1, u4 vu2) { + // CHECK-LABEL: define void @test_builtin_elementwise_min( + // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 + // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4 + // CHECK-NEXT: call float @llvm.minnum.f32(float %0, float %1) + f1 = __builtin_elementwise_min(f1, f2); + + // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 + // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8 + // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]]) + d1 = __builtin_elementwise_min(d1, d2); + + // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 + // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00) + d1 = __builtin_elementwise_min(d1, 2.0); + + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 + // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 + // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) + vf1 = __builtin_elementwise_min(vf1, vf2); + + // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 + // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 + // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]]) + i1 = __builtin_elementwise_min(i1, i2); + + // CHECK: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 + // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]]) + i1 = __builtin_elementwise_min(-11, i2); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 + // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) + vi1 = __builtin_elementwise_min(vi1, vi2); + + // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 + // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 + // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]]) + u1 = __builtin_elementwise_min(u1, u2); + + // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 + // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64 + // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 + // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]]) + u1 = __builtin_elementwise_min(u1, i2); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 + // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) + vu1 = __builtin_elementwise_min(vu1, vu2); + + // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 + // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 + // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) + const float4 cvf1 = vf1; + vf1 = __builtin_elementwise_min(cvf1, vf2); + + // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 + // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 + // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) + vf1 = __builtin_elementwise_min(vf2, cvf1); + + // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 + // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 + // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]]) + int_as_one = __builtin_elementwise_min(int_as_one, b); +} diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -0,0 +1,116 @@ +// RUN: %clang_cc1 -std=c99 %s -pedantic -verify -triple=x86_64-apple-darwin9 + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef int int3 __attribute__((ext_vector_type(3))); + +struct Foo { + char *p; +}; + +__attribute__((address_space(1))) int int_as_one; +typedef int bar; +bar b; + +void test_builtin_elementwise_max(int i, short s, double d, float4 v, int3 iv, int *p) { + i = __builtin_elementwise_max(p, d); + // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}} + + struct Foo foo = __builtin_elementwise_max(i, i); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_elementwise_max(i); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} + + i = __builtin_elementwise_max(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} + + i = __builtin_elementwise_max(i, i, i); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} + + i = __builtin_elementwise_max(v, iv); + // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}} + + s = __builtin_elementwise_max(i, s); + + enum e { one, + two }; + i = __builtin_elementwise_max(one, two); + + enum f { three }; + enum f x = __builtin_elementwise_max(one, three); + + _ExtInt(32) ext; + ext = __builtin_elementwise_max(ext, ext); + + const int ci; + i = __builtin_elementwise_max(ci, i); + i = __builtin_elementwise_max(i, ci); + i = __builtin_elementwise_max(ci, ci); + + i = __builtin_elementwise_max(i, int_as_one); // ok (attributes don't match)? + i = __builtin_elementwise_max(i, b); // ok (sugar doesn't match)? + + int A[10]; + A = __builtin_elementwise_max(A, A); + // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'int *')}} + + int(ii); + int j; + j = __builtin_elementwise_max(i, j); + + _Complex float c1, c2; + c1 = __builtin_elementwise_max(c1, c2); + // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}} +} + +void test_builtin_elementwise_min(int i, short s, double d, float4 v, int3 iv, int *p) { + i = __builtin_elementwise_min(p, d); + // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}} + + struct Foo foo = __builtin_elementwise_min(i, i); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_elementwise_min(i); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} + + i = __builtin_elementwise_min(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} + + i = __builtin_elementwise_min(i, i, i); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} + + i = __builtin_elementwise_min(v, iv); + // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}} + + s = __builtin_elementwise_min(i, s); + + enum e { one, + two }; + i = __builtin_elementwise_min(one, two); + + enum f { three }; + enum f x = __builtin_elementwise_min(one, three); + + _ExtInt(32) ext; + ext = __builtin_elementwise_min(ext, ext); + + const int ci; + i = __builtin_elementwise_min(ci, i); + i = __builtin_elementwise_min(i, ci); + i = __builtin_elementwise_min(ci, ci); + + i = __builtin_elementwise_min(i, int_as_one); // ok (attributes don't match)? + i = __builtin_elementwise_min(i, b); // ok (sugar doesn't match)? + + int A[10]; + A = __builtin_elementwise_min(A, A); + // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'int *')}} + + int(ii); + int j; + j = __builtin_elementwise_min(i, j); + + _Complex float c1, c2; + c1 = __builtin_elementwise_min(c1, c2); + // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}} +} diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/builtins-elementwise-math.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 %s -std=c++17 -pedantic -verify -triple=x86_64-apple-darwin9 + +// Simple is_const implementation. +struct true_type { + static const bool value = true; +}; + +struct false_type { + static const bool value = false; +}; + +template struct is_const : false_type {}; +template struct is_const : true_type {}; + +// expected-no-diagnostics + +void test_builtin_elementwise_max() { + const int a = 2; + int b = 1; + static_assert(!is_const::value); + static_assert(!is_const::value); + static_assert(!is_const::value); +} + +void test_builtin_elementwise_min() { + const int a = 2; + int b = 1; + static_assert(!is_const::value); + static_assert(!is_const::value); + static_assert(!is_const::value); +}