diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -646,6 +646,8 @@ BUILTIN(__builtin_elementwise_abs, "v.", "nct") BUILTIN(__builtin_elementwise_max, "v.", "nct") BUILTIN(__builtin_elementwise_min, "v.", "nct") +BUILTIN(__builtin_reduce_max, "v.", "nct") +BUILTIN(__builtin_reduce_min, "v.", "nct") BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt") diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12716,6 +12716,7 @@ ExprResult CallResult); ExprResult SemaBuiltinElementwiseMathTwoArgs(CallExpr *TheCall, ExprResult CallResult); + ExprResult SemaBuiltinReduceMath(CallExpr *TheCall, ExprResult CallResult); // Matrix builtin handling. ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3145,6 +3145,44 @@ return RValue::get(Result); } + case Builtin::BI__builtin_reduce_max: { + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smax; + else + return llvm::Intrinsic::vector_reduce_umax; + } + return llvm::Intrinsic::vector_reduce_fmax; + }; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); + } + + case Builtin::BI__builtin_reduce_min: { + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smin; + else + return llvm::Intrinsic::vector_reduce_umin; + } + return llvm::Intrinsic::vector_reduce_fmin; + }; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); + } + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs(); Value *MatValue = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1978,11 +1978,14 @@ case Builtin::BI__builtin_elementwise_abs: return SemaBuiltinElementwiseMathOneArg(TheCall, TheCallResult); - case Builtin::BI__builtin_elementwise_min: case Builtin::BI__builtin_elementwise_max: return SemaBuiltinElementwiseMathTwoArgs(TheCall, TheCallResult); + case Builtin::BI__builtin_reduce_max: + case Builtin::BI__builtin_reduce_min: + return SemaBuiltinReduceMath(TheCall, TheCallResult); + case Builtin::BI__builtin_matrix_transpose: return SemaBuiltinMatrixTranspose(TheCall, TheCallResult); case Builtin::BI__builtin_matrix_column_major_load: @@ -16712,6 +16715,23 @@ return CallResult; } +ExprResult Sema::SemaBuiltinReduceMath(CallExpr *TheCall, + ExprResult CallResult) { + if (checkArgCount(*this, TheCall, 1)) + return ExprError(); + + Expr *A = TheCall->getArg(0); + QualType TyA = A->getType(); + + const VectorType *VecTy = TyA->getAs(); + if (!VecTy) + return Diag(A->getBeginLoc(), diag::err_elementwise_math_invalid_arg_type_2) + << "vector" << TyA; + + TheCall->setType(VecTy->getElementType()); + return CallResult; +} + ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall, ExprResult CallResult) { if (checkArgCount(*this, TheCall, 1)) diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -110,3 +110,33 @@ // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) vu1 = __builtin_elementwise_min(vu1, vu2); } + +void test_builtin_reduce_max(float4 vf1, si8 vi1, u4 vu1) { + // CHECK-LABEL: define void @test_builtin_reduce_max( + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 + // CHECK-NEXT: call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1]]) + float r1 = __builtin_reduce_max(vf1); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_max(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_max(vu1); +} + +void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) { + // CHECK-LABEL: define void @test_builtin_reduce_min( + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 + // CHECK-NEXT: call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1]]) + float r1 = __builtin_reduce_min(vf1); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_min(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_min(vu1); +} diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -61,3 +61,31 @@ i = __builtin_elementwise_min(v, iv); // expected-error@-1 {{argument types do not match, 'float4' (vector of 4 'float' values) != 'int3' (vector of 3 'int' values)}} } + +void test_builtin_reduce_max(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_max(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_max(v, v); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_max(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_max(i); + // expected-error@-1 {{argument must have a vector type, but was 'int'}} +} + +void test_builtin_reduce_min(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_min(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_min(v, v); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_min(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_min(i); + // expected-error@-1 {{argument must have a vector type, but was 'int'}} +}