diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -577,6 +577,8 @@ BUILTIN(__builtin_alloca_with_align, "v*zIz", "Fn") BUILTIN(__builtin_call_with_static_chain, "v.", "nt") +BUILTIN(__builtin_matrix_transpose, "v.", "nFt") + // "Overloaded" Atomic operator builtins. These are overloaded to support data // types of i8, i16, i32, i64, and i128. The front-end sees calls to the // non-suffixed version of these (which has a bogus type) and transforms them to diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10768,6 +10768,9 @@ def err_matrix_subscript_comma: Error< "comma expressions are not allowed as indices in matrix subscript expressions">; +def err_builtin_matrix_arg: Error< + "%select{first|second}0 argument must be a matrix">; + def err_preserve_field_info_not_field : Error< "__builtin_preserve_field_info argument %0 not a field access">; def err_preserve_field_info_not_const: Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12108,6 +12108,11 @@ int ArgNum, unsigned ExpectedFieldNum, bool AllowName); bool SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall); + + // Matrix builtin handling. + ExprResult SemaBuiltinMatrixTransposeOverload(CallExpr *TheCall, + ExprResult CallResult); + public: enum FormatStringType { FST_Scanf, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/MatrixBuilder.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/TargetParser.h" @@ -1636,6 +1637,10 @@ return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } +const ConstantMatrixType *getMatrixTy(QualType Ty) { + return cast(Ty.getCanonicalType()); +}; + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -2373,6 +2378,15 @@ return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BI__builtin_matrix_transpose: { + const ConstantMatrixType *MatrixTy = getMatrixTy(E->getArg(0)->getType()); + Value *MatValue = EmitScalarExpr(E->getArg(0)); + MatrixBuilder MB(Builder); + Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(), + MatrixTy->getNumColumns()); + return RValue::get(Result); + } + case Builtin::BIfinite: case Builtin::BI__finite: case Builtin::BIfinitef: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1896,7 +1896,7 @@ return ExprError(); break; case Builtin::BI__builtin_frame_address: - case Builtin::BI__builtin_return_address: + case Builtin::BI__builtin_return_address: { if (SemaBuiltinConstantArgRange(TheCall, 0, 0, 0xFFFF)) return ExprError(); @@ -1913,6 +1913,20 @@ break; } + case Builtin::BI__builtin_matrix_transpose: + if (!getLangOpts().MatrixTypes) { + Diag(TheCall->getBeginLoc(), diag::err_builtin_matrix_disabled); + return ExprError(); + } + + switch (BuiltinID) { + case Builtin::BI__builtin_matrix_transpose: + return SemaBuiltinMatrixTransposeOverload(TheCall, TheCallResult); + default: + llvm_unreachable("All matrix builtins should be handled here!"); + } + } + // Since the target specific builtins for each arch overlap, only check those // of the arch we are compiling for. if (Context.BuiltinInfo.isTSBuiltin(BuiltinID)) { @@ -15028,3 +15042,26 @@ rhs, std::bind(&Sema::AddPotentialMisalignedMembers, std::ref(*this), _1, _2, _3, _4)); } + +ExprResult Sema::SemaBuiltinMatrixTransposeOverload(CallExpr *TheCall, + ExprResult CallResult) { + if (checkArgCount(*this, TheCall, 1)) + return ExprError(); + + Expr *Arg = TheCall->getArg(0); + if (!Arg->getType()->isConstantMatrixType()) { + Diag(Arg->getBeginLoc(), diag::err_builtin_matrix_arg) << 0; + return ExprError(); + } + + // Create returned matrix type by swapping rows and columns of the argument + // matrix type. + ConstantMatrixType const *MType = + cast(Arg->getType().getCanonicalType()); + QualType ResultType = Context.getConstantMatrixType( + MType->getElementType(), MType->getNumColumns(), MType->getNumRows()); + + // Change the return type to the type of the returned matrix. + TheCall->setType(ResultType); + return CallResult; +} diff --git a/clang/test/CodeGen/matrix-type-builtins.c b/clang/test/CodeGen/matrix-type-builtins.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/matrix-type-builtins.c @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// Tests for the matrix type builtins. + +typedef double dx5x5_t __attribute__((matrix_type(5, 5))); +typedef float fx2x3_t __attribute__((matrix_type(2, 3))); +typedef float fx3x2_t __attribute__((matrix_type(3, 2))); +typedef int ix20x4_t __attribute__((matrix_type(20, 4))); +typedef int ix4x20_t __attribute__((matrix_type(4, 20))); +typedef unsigned ux1x6_t __attribute__((matrix_type(1, 6))); +typedef unsigned ux6x1_t __attribute__((matrix_type(6, 1))); + +void transpose_double_5x5(dx5x5_t *a) { + // CHECK-LABEL: define void @transpose_double_5x5( + // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8 + // CHECK-NEXT: [[TRANS:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[A]], i32 5, i32 5) + // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [25 x double]* %a_t to <25 x double>* + // CHECK-NEXT: store <25 x double> [[TRANS]], <25 x double>* [[AT_ADDR]], align 8 + dx5x5_t a_t = __builtin_matrix_transpose(*a); +} + +void transpose_float_3x2(fx3x2_t *a) { + // CHECK-LABEL: define void @transpose_float_3x2( + // CHECK: [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4 + // CHECK-NEXT: [[TRANS:%.*]] = call <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> [[A]], i32 3, i32 2) + // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [6 x float]* %a_t to <6 x float>* + // CHECK-NEXT: store <6 x float> [[TRANS]], <6 x float>* [[AT_ADDR]], align 4 + + fx2x3_t a_t = __builtin_matrix_transpose(*a); +} + +void transpose_int_20x4(ix20x4_t *a) { + // CHECK-LABEL: define void @transpose_int_20x4( + // CHECK: [[A:%.*]] = load <80 x i32>, <80 x i32>* {{.*}}, align 4 + // CHECK-NEXT: [[TRANS:%.*]] = call <80 x i32> @llvm.matrix.transpose.v80i32(<80 x i32> [[A]], i32 20, i32 4) + // CHECK-NEXT: [[AT_ADDR:%.*]] = bitcast [80 x i32]* %a_t to <80 x i32>* + // CHECK-NEXT: store <80 x i32> [[TRANS]], <80 x i32>* [[AT_ADDR]], align 4 + + ix4x20_t a_t = __builtin_matrix_transpose(*a); +} + +struct Foo { + ux1x6_t in; + ux6x1_t out; +}; + +void transpose_struct_member(struct Foo *F) { + // CHECK-LABEL: define void @transpose_struct_member( + // CHECK: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4 + // CHECK-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6) + // CHECK-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 8 + // CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr inbounds %struct.Foo, %struct.Foo* [[F_ADDR]], i32 0, i32 1 + // CHECK-NEXT: [[OUT_PTR_C:%.*]] = bitcast [6 x i32]* [[OUT_PTR]] to <6 x i32>* + // CHECK-NEXT: store <6 x i32> [[M_T]], <6 x i32>* [[OUT_PTR_C]], align 4 + + F->out = __builtin_matrix_transpose(F->in); +} + +void transpose_transpose_struct_member(struct Foo *F) { + // CHECK-LABEL: define void @transpose_transpose_struct_member( + // CHECK: [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4 + // CHECK-NEXT: [[M_T:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M]], i32 1, i32 6) + // CHECK-NEXT: [[M_T2:%.*]] = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> [[M_T]], i32 6, i32 1) + // CHECK-NEXT: [[F_ADDR:%.*]] = load %struct.Foo*, %struct.Foo** %F.addr, align 8 + // CHECK-NEXT: [[IN_PTR:%.*]] = getelementptr inbounds %struct.Foo, %struct.Foo* [[F_ADDR]], i32 0, i32 0 + // CHECK-NEXT: [[IN_PTR_C:%.*]] = bitcast [6 x i32]* [[IN_PTR]] to <6 x i32>* + // CHECK-NEXT: store <6 x i32> [[M_T2]], <6 x i32>* [[IN_PTR_C]], align 4 + + F->in = __builtin_matrix_transpose(__builtin_matrix_transpose(F->in)); +} diff --git a/clang/test/CodeGenCXX/matrix-type-builtins.cpp b/clang/test/CodeGenCXX/matrix-type-builtins.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/matrix-type-builtins.cpp @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++17 | FileCheck %s + +// Tests for the matrix type builtins. + +template +using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns))); + +template +struct MyMatrix { + matrix_t value; +}; + +template +MyMatrix transpose(const MyMatrix &M) { + MyMatrix Res; + Res.value = __builtin_matrix_transpose(M.value); + return Res; +} + +void test_transpose_template1() { + // CHECK-LABEL: define void @_Z24test_transpose_template1v() + // CHECK: call void @_Z9transposeIiLj4ELj10EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.0* sret align 4 %M1_t, %struct.MyMatrix* nonnull align 4 dereferenceable(160) %M1) + + // CHECK-LABEL: define linkonce_odr void @_Z9transposeIiLj4ELj10EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE( + // CHECK: [[M:%.*]] = load <40 x i32>, <40 x i32>* {{.*}}, align 4 + // CHECK-NEXT: [[M_T:%.*]] = call <40 x i32> @llvm.matrix.transpose.v40i32(<40 x i32> [[M]], i32 4, i32 10) + + MyMatrix M1; + MyMatrix M1_t = transpose(M1); +} + +void test_transpose_template2(MyMatrix &M) { + // CHECK-LABEL: define void @_Z24test_transpose_template2R8MyMatrixIdLj7ELj6EE( + // CHECK: call void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.2* sret align 8 %ref.tmp1, %struct.MyMatrix.1* nonnull align 8 dereferenceable(336) %0) + // CHECK-NEXT: call void @_Z9transposeIdLj6ELj7EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.1* sret align 8 %ref.tmp, %struct.MyMatrix.2* nonnull align 8 dereferenceable(336) %ref.tmp1) + // CHECK-NEXT: call void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE(%struct.MyMatrix.2* sret align 8 %M2_t, %struct.MyMatrix.1* nonnull align 8 dereferenceable(336) %ref.tmp) + + // CHECK-LABEL: define linkonce_odr void @_Z9transposeIdLj7ELj6EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE( + // CHECK: [[M:%.*]] = load <42 x double>, <42 x double>* {{.*}}, align 8 + // CHECK-NEXT: [[M_T:%.*]] = call <42 x double> @llvm.matrix.transpose.v42f64(<42 x double> [[M]], i32 7, i32 6) + // CHECK-NEXT: [[RES_ADDR:%.*]] = getelementptr inbounds %struct.MyMatrix.2, %struct.MyMatrix.2* %agg.result, i32 0, i32 0 + // CHECK-NEXT: [[RES_ADDR_C:%.*]] = bitcast [42 x double]* [[RES_ADDR]] to <42 x double>* + // CHECK-NEXT: store <42 x double> [[M_T]], <42 x double>* [[RES_ADDR_C]], align 8 + + // CHECK-LABEL: define linkonce_odr void @_Z9transposeIdLj6ELj7EE8MyMatrixIT_XT1_EXT0_EERKS0_IS1_XT0_EXT1_EE( + // CHECK: [[M:%.*]] = load <42 x double>, <42 x double>* {{.*}}, align 8 + // CHECK-NEXT: [[M_T:%.*]] = call <42 x double> @llvm.matrix.transpose.v42f64(<42 x double> [[M]], i32 6, i32 7) + // CHECK-NEXT: [[RES_ADDR:%.*]] = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0 + // CHECK-NEXT: [[RES_ADDR_C:%.*]] = bitcast [42 x double]* [[RES_ADDR]] to <42 x double>* + // CHECK-NEXT: store <42 x double> [[M_T]], <42 x double>* [[RES_ADDR_C]], align 8 + + MyMatrix M2_t = transpose(transpose(transpose(M))); +} diff --git a/clang/test/Sema/matrix-type-builtins.c b/clang/test/Sema/matrix-type-builtins.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/matrix-type-builtins.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -fenable-matrix -pedantic -verify -triple=x86_64-apple-darwin9 + +typedef float sx5x10_t __attribute__((matrix_type(5, 10))); +typedef int ix3x2_t __attribute__((matrix_type(3, 2))); + +void transpose(sx5x10_t a, ix3x2_t b, int *c, int d) { + a = __builtin_matrix_transpose(b); + // expected-error@-1 {{assigning to 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') from incompatible type 'int __attribute__((matrix_type(2, 3)))'}} + b = __builtin_matrix_transpose(b); + // expected-error@-1 {{assigning to 'ix3x2_t' (aka 'int __attribute__((matrix_type(3, 2)))') from incompatible type 'int __attribute__((matrix_type(2, 3)))'}} + __builtin_matrix_transpose(c); + // expected-error@-1 {{first argument must be a matrix}} + __builtin_matrix_transpose(d); + // expected-error@-1 {{first argument must be a matrix}} + __builtin_matrix_transpose("test"); + // expected-error@-1 {{first argument must be a matrix}} +} diff --git a/clang/test/SemaCXX/matrix-type-builtins.cpp b/clang/test/SemaCXX/matrix-type-builtins.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/matrix-type-builtins.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 %s -fenable-matrix -pedantic -std=c++11 -verify -triple=x86_64-apple-darwin9 + +template +struct MyMatrix { + using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns))); + + matrix_t value; +}; + +template +typename MyMatrix::matrix_t transpose(MyMatrix &A) { + char *v1 = __builtin_matrix_transpose(A.value); + // expected-error@-1 {{cannot initialize a variable of type 'char *' with an rvalue of type 'unsigned int __attribute__((matrix_type(3, 2)))'}} + // expected-error@-2 {{cannot initialize a variable of type 'char *' with an rvalue of type 'unsigned int __attribute__((matrix_type(3, 3)))'}} + + __builtin_matrix_transpose(A); + // expected-error@-1 {{first argument must be a matrix}} + // expected-error@-2 {{first argument must be a matrix}} + + return __builtin_matrix_transpose(A.value); + // expected-error@-1 {{cannot initialize return object of type 'typename MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 3)))') with an rvalue of type 'unsigned int __attribute__((matrix_type(3, 2)))'}} + // expected-error@-2 {{cannot initialize return object of type 'typename MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 3)))') with an rvalue of type 'unsigned int __attribute__((matrix_type(3, 3)))'}} +} + +void test_transpose_template(unsigned *Ptr1, float *Ptr2) { + MyMatrix Mat1; + MyMatrix Mat2; + Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1); + Mat1.value = transpose(Mat1); + // expected-note@-1 {{in instantiation of function template specialization 'transpose' requested here}} + + Mat1.value = transpose(Mat2); + // expected-note@-1 {{in instantiation of function template specialization 'transpose' requested here}} +}