diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -3475,6 +3475,11 @@ NumElements <= ConstantMatrixTypeBitfields::MaxElementsPerDimension; } + /// Returns the maximum number of elements per dimension. + static unsigned getMaxElementsPerDimension() { + return ConstantMatrixTypeBitfields::MaxElementsPerDimension; + } + void Profile(llvm::FoldingSetNodeID &ID) { Profile(ID, getElementType(), getNumRows(), getNumColumns(), getTypeClass()); diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -576,6 +576,7 @@ BUILTIN(__builtin_call_with_static_chain, "v.", "nt") BUILTIN(__builtin_matrix_transpose, "v.", "nFt") +BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt") // "Overloaded" Atomic operator builtins. These are overloaded to support data // types of i8, i16, i32, i64, and i128. The front-end sees calls to the diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10757,6 +10757,18 @@ def err_builtin_matrix_arg: Error< "%select{first|second}0 argument must be a matrix">; +def err_builtin_matrix_scalar_int_arg: Error< + "%select{row|column|stride}0 argument must be %select{an unsigned integer|a constant unsigned integer expression}1">; + +def err_builtin_matrix_pointer_arg: Error< + "%select{first|second}0 argument must be a pointer to a valid matrix element type">; + +def err_builtin_matrix_stride_too_small: Error< + "stride must be greater or equal to the number of rows">; + +def err_builtin_matrix_invalid_dimension: Error< + "%select{row|column}0 dimension is outside the allowed range [1, %1]">; + def err_preserve_field_info_not_field : Error< "__builtin_preserve_field_info argument %0 not a field access">; def err_preserve_field_info_not_const: Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12086,6 +12086,8 @@ // Matrix builtin handling. ExprResult SemaBuiltinMatrixTransposeOverload(CallExpr *TheCall, ExprResult CallResult); + ExprResult SemaBuiltinMatrixColumnMajorLoadOverload(CallExpr *TheCall, + ExprResult CallResult); public: enum FormatStringType { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2376,6 +2376,39 @@ return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BI__builtin_matrix_column_major_load: { + MatrixBuilder MB(Builder); + // Emit everything that isn't dependent on the first parameter type + Value *Stride = EmitScalarExpr(E->getArg(3)); + const ConstantMatrixType *ResultTy = getMatrixTy(E->getType()); + + // If it's an address we need to emit the pointer + // otherwise, emit the array + Value *Result = nullptr; + if (const PointerType *PTy = + dyn_cast(E->getArg(0)->getType())) { + Address Src = EmitPointerWithAlignment(E->getArg(0)); + EmitNonNullArgCheck(RValue::get(Src.getPointer()), + E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), + FD, 0); + Result = MB.CreateMatrixColumnwiseLoad( + Src.getPointer(), ResultTy->getNumRows(), ResultTy->getNumColumns(), + Stride, "matrix"); + } else if (const ArrayType *ATy = + dyn_cast(E->getArg(0)->getType())) { + Address Src = EmitArrayToPointerDecay(E->getArg(0)); + EmitNonNullArgCheck(RValue::get(Src.getPointer()), + E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), + FD, 0); + Result = MB.CreateMatrixColumnwiseLoad( + Src.getPointer(), ResultTy->getNumRows(), ResultTy->getNumColumns(), + Stride, "matrix"); + } else { + llvm_unreachable( + "CGBuiltin.cpp: First argument must either be a pointer or an array"); + } + return RValue::get(Result); + } case Builtin::BI__builtin_matrix_transpose: { const ConstantMatrixType *MatrixTy = getMatrixTy(E->getArg(0)->getType()); Value *MatValue = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1913,6 +1913,7 @@ } case Builtin::BI__builtin_matrix_transpose: + case Builtin::BI__builtin_matrix_column_major_load: if (!getLangOpts().MatrixTypes) { Diag(TheCall->getBeginLoc(), diag::err_builtin_matrix_disabled); return ExprError(); @@ -1921,6 +1922,8 @@ switch (BuiltinID) { case Builtin::BI__builtin_matrix_transpose: return SemaBuiltinMatrixTransposeOverload(TheCall, TheCallResult); + case Builtin::BI__builtin_matrix_column_major_load: + return SemaBuiltinMatrixColumnMajorLoadOverload(TheCall, TheCallResult); default: llvm_unreachable("All matrix builtins should be handled here!"); } @@ -14814,3 +14817,82 @@ TheCall->setType(ResultType); return CallResult; } + +// Get and verify the matrix dimensions. +static llvm::Optional +getAndVerifyMatrixDimension(Expr *Expr, unsigned ErrIdx, Sema &S) { + llvm::APSInt Value(64); + SourceLocation ErrorPos; + if (!Expr->isIntegerConstantExpr(Value, S.Context, &ErrorPos)) { + S.Diag(Expr->getBeginLoc(), diag::err_builtin_matrix_scalar_int_arg) + << ErrIdx << 1; + return {}; + } + uint64_t Dim = Value.getZExtValue(); + if (!ConstantMatrixType::isDimensionValid(Dim)) { + S.Diag(Expr->getBeginLoc(), diag::err_builtin_matrix_invalid_dimension) + << ErrIdx << ConstantMatrixType::getMaxElementsPerDimension(); + return {}; + } + return Dim; +} + +ExprResult +Sema::SemaBuiltinMatrixColumnMajorLoadOverload(CallExpr *TheCall, + ExprResult CallResult) { + if (checkArgCount(*this, TheCall, 4)) + return ExprError(); + + Expr *DataExpr = TheCall->getArg(0); + Expr *StrideExpr = TheCall->getArg(3); + bool ArgError = false; + + // Check data pointer. + QualType ElementType; + if (!(DataExpr->getType()->isPointerType() || + DataExpr->getType()->isArrayType())) { + Diag(DataExpr->getBeginLoc(), diag::err_builtin_matrix_pointer_arg) << 0; + ArgError = true; + } else { + if (const PointerType *PTy = dyn_cast(DataExpr->getType())) + ElementType = PTy->getPointeeType(); + else if (const ArrayType *ATy = dyn_cast(DataExpr->getType())) + ElementType = ATy->getElementType(); + else + llvm_unreachable("Pointer Expression must be a pointer or an array"); + ElementType.removeLocalConst(); + if (!ConstantMatrixType::isValidElementType(ElementType)) { + Diag(DataExpr->getBeginLoc(), diag::err_builtin_matrix_pointer_arg) << 0; + ArgError = true; + } + } + + // Check rows and columns arguments. + auto MaybeRows = getAndVerifyMatrixDimension(TheCall->getArg(1), 0, *this); + auto MaybeCols = getAndVerifyMatrixDimension(TheCall->getArg(2), 1, *this); + + // Check stride. + if (!StrideExpr->getType()->isIntegralType(Context)) { + Diag(StrideExpr->getBeginLoc(), diag::err_builtin_matrix_scalar_int_arg) + << 2 << 1; + ArgError = true; + } else { + llvm::APSInt Value(64); + if (StrideExpr->isIntegerConstantExpr(Value, Context)) { + uint64_t Stride = Value.getZExtValue(); + if (MaybeRows && Stride < *MaybeRows) { + Diag(StrideExpr->getBeginLoc(), + diag::err_builtin_matrix_stride_too_small); + ArgError = true; + } + } + } + + if (ArgError || !MaybeRows || !MaybeCols) + return ExprError(); + + QualType ReturnType = + Context.getConstantMatrixType(ElementType, *MaybeRows, *MaybeCols); + TheCall->setType(ReturnType); + return CallResult; +} diff --git a/clang/test/CodeGen/matrix-type-builtins.c b/clang/test/CodeGen/matrix-type-builtins.c --- a/clang/test/CodeGen/matrix-type-builtins.c +++ b/clang/test/CodeGen/matrix-type-builtins.c @@ -72,3 +72,42 @@ } // CHECK: declare <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32>, i32 immarg, i32 immarg) + +void column_major_load1(double *a, float *b, int *c) { + // CHECK-LABEL: define void @column_major_load1(double* %a, float* %b, i32* %c) #0 { + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca double*, align 8 + // CHECK-NEXT: %b.addr = alloca float*, align 8 + // CHECK-NEXT: %c.addr = alloca i32*, align 8 + // CHECK-NEXT: %m_a1 = alloca [25 x double], align 8 + // CHECK-NEXT: %m_a2 = alloca [25 x double], align 8 + // CHECK-NEXT: %m_b = alloca [6 x float], align 4 + // CHECK-NEXT: %m_c = alloca [80 x i32], align 4 + // CHECK-NEXT: store double* %a, double** %a.addr, align 8 + // CHECK-NEXT: store float* %b, float** %b.addr, align 8 + // CHECK-NEXT: store i32* %c, i32** %c.addr, align 8 + // CHECK-NEXT: %0 = load double*, double** %a.addr, align 8 + // CHECK-NEXT: %matrix = call <25 x double> @llvm.matrix.columnwise.load.v25f64.p0f64(double* %0, i32 5, i32 5, i32 5) + // CHECK-NEXT: %1 = bitcast [25 x double]* %m_a1 to <25 x double>* + // CHECK-NEXT: store <25 x double> %matrix, <25 x double>* %1, align 8 + dx5x5_t m_a1 = __builtin_matrix_column_major_load(a, 5, 5, 5); + + // CHECK-NEXT: %2 = load double*, double** %a.addr, align 8 + // CHECK-NEXT: %matrix1 = call <25 x double> @llvm.matrix.columnwise.load.v25f64.p0f64(double* %2, i32 9, i32 5, i32 5) + // CHECK-NEXT: %3 = bitcast [25 x double]* %m_a2 to <25 x double>* + // CHECK-NEXT: store <25 x double> %matrix1, <25 x double>* %3, align 8 + dx5x5_t m_a2 = __builtin_matrix_column_major_load(a, 5, 5, 9); + + // CHECK-NEXT: %4 = load float*, float** %b.addr, align 8 + // CHECK-NEXT: %matrix2 = call <6 x float> @llvm.matrix.columnwise.load.v6f32.p0f32(float* %4, i32 3, i32 2, i32 3) + // CHECK-NEXT: %5 = bitcast [6 x float]* %m_b to <6 x float>* + // CHECK-NEXT: store <6 x float> %matrix2, <6 x float>* %5, align 4 + fx2x3_t m_b = __builtin_matrix_column_major_load(b, 2, 3, 3); + + // CHECK-NEXT: %6 = load i32*, i32** %c.addr, align 8 + // CHECK-NEXT: %matrix3 = call <80 x i32> @llvm.matrix.columnwise.load.v80i32.p0i32(i32* %6, i32 4, i32 4, i32 20) + // CHECK-NEXT: %7 = bitcast [80 x i32]* %m_c to <80 x i32>* + // CHECK-NEXT: store <80 x i32> %matrix3, <80 x i32>* %7, align 4 + // CHECK-NEXT: ret void + ix4x20_t m_c = __builtin_matrix_column_major_load(c, 4, 20, 4); +} diff --git a/clang/test/CodeGenCXX/matrix-type-builtins.cpp b/clang/test/CodeGenCXX/matrix-type-builtins.cpp --- a/clang/test/CodeGenCXX/matrix-type-builtins.cpp +++ b/clang/test/CodeGenCXX/matrix-type-builtins.cpp @@ -122,3 +122,92 @@ // CHECK-NEXT: %1 = load <42 x double>, <42 x double>* %0, align 8 // CHECK-NEXT: %2 = call <42 x double> @llvm.matrix.transpose.v42f64(<42 x double> %1, i32 6, i32 7) // CHECK-NEXT: ret <42 x double> %2 + +void column_major_load(float *Ptr1, unsigned *Ptr2) { + // CHECK-LABEL: define void @_Z17column_major_loadPfPj(float* %Ptr1, i32* %Ptr2) #0 { + // CHECK-NEXT: entry: + // CHECK-NEXT: %Ptr1.addr = alloca float*, align 8 + // CHECK-NEXT: %Ptr2.addr = alloca i32*, align 8 + // CHECK-NEXT: %M1 = alloca [28 x float], align 4 + // CHECK-NEXT: %M2 = alloca [105 x float], align 4 + // CHECK-NEXT: %M3 = alloca [80 x i32], align 4 + // CHECK-NEXT: store float* %Ptr1, float** %Ptr1.addr, align 8 + // CHECK-NEXT: store i32* %Ptr2, i32** %Ptr2.addr, align 8 + // CHECK-NEXT: %0 = load float*, float** %Ptr1.addr, align 8 + // CHECK-NEXT: %matrix = call <28 x float> @llvm.matrix.columnwise.load.v28f32.p0f32(float* %0, i32 5, i32 4, i32 7) + // CHECK-NEXT: %1 = bitcast [28 x float]* %M1 to <28 x float>* + // CHECK-NEXT: store <28 x float> %matrix, <28 x float>* %1, align 4 + matrix_t M1 = __builtin_matrix_column_major_load(Ptr1, 4, 7, 5); + + // CHECK-NEXT: %2 = load float*, float** %Ptr1.addr, align 8 + // CHECK-NEXT: %matrix1 = call <105 x float> @llvm.matrix.columnwise.load.v105f32.p0f32(float* %2, i32 15, i32 15, i32 7) + // CHECK-NEXT: %3 = bitcast [105 x float]* %M2 to <105 x float>* + // CHECK-NEXT: store <105 x float> %matrix1, <105 x float>* %3, align 4 + matrix_t M2 = __builtin_matrix_column_major_load(Ptr1, 15, 7, 15); + + // CHECK-NEXT: %4 = load i32*, i32** %Ptr2.addr, align 8 + // CHECK-NEXT: %matrix2 = call <80 x i32> @llvm.matrix.columnwise.load.v80i32.p0i32(i32* %4, i32 32, i32 16, i32 5) + // CHECK-NEXT: %5 = bitcast [80 x i32]* %M3 to <80 x i32>* + // CHECK-NEXT: store <80 x i32> %matrix2, <80 x i32>* %5, align 4 + // CHECK-NEXT: ret void + auto M3 = __builtin_matrix_column_major_load(Ptr2, 16, 5, 32); +} + +template +matrix_t column_major_load(T *Ptr) { + return __builtin_matrix_column_major_load(Ptr, R, C, R); +} + +void test_column_major_load_template(int *Ptr1, double *Ptr2) { + // CHECK-LABEL: define void @_Z31test_column_major_load_templatePiPd(i32* %Ptr1, double* %Ptr2) #5 { + // CHECK-NEXT: entry: + // CHECK-NEXT: %Ptr1.addr = alloca i32*, align 8 + // CHECK-NEXT: %Ptr2.addr = alloca double*, align 8 + // CHECK-NEXT: %M1 = alloca [40 x i32], align 4 + // CHECK-NEXT: %M2 = alloca [63 x i32], align 4 + // CHECK-NEXT: %M3 = alloca [63 x double], align 8 + // CHECK-NEXT: store i32* %Ptr1, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: store double* %Ptr2, double** %Ptr2.addr, align 8 + // CHECK-NEXT: %0 = load i32*, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: %call = call <40 x i32> @_Z17column_major_loadIiLj10ELj4EEU11matrix_typeXT0_EXT1_ET_PS0_(i32* %0) + // CHECK-NEXT: %1 = bitcast [40 x i32]* %M1 to <40 x i32>* + // CHECK-NEXT: store <40 x i32> %call, <40 x i32>* %1, align 4 + matrix_t M1 = column_major_load(Ptr1); + + // CHECK-NEXT: %2 = load i32*, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: %call1 = call <63 x i32> @_Z17column_major_loadIiLj7ELj9EEU11matrix_typeXT0_EXT1_ET_PS0_(i32* %2) + // CHECK-NEXT: %3 = bitcast [63 x i32]* %M2 to <63 x i32>* + // CHECK-NEXT: store <63 x i32> %call1, <63 x i32>* %3, align 4 + matrix_t M2 = column_major_load(Ptr1); + + // CHECK-NEXT: %4 = load double*, double** %Ptr2.addr, align 8 + // CHECK-NEXT: %call2 = call <63 x double> @_Z17column_major_loadIdLj7ELj9EEU11matrix_typeXT0_EXT1_ET_PS0_(double* %4) + // CHECK-NEXT: %5 = bitcast [63 x double]* %M3 to <63 x double>* + // CHECK-NEXT: store <63 x double> %call2, <63 x double>* %5, align 8 + // CHECK-NEXT: ret void + matrix_t M3 = column_major_load(Ptr2); +} + +// CHECK-LABEL: define linkonce_odr <40 x i32> @_Z17column_major_loadIiLj10ELj4EEU11matrix_typeXT0_EXT1_ET_PS0_(i32* %Ptr) +// CHECK-NEXT: entry: +// CHECK-NEXT: %Ptr.addr = alloca i32*, align 8 +// CHECK-NEXT: store i32* %Ptr, i32** %Ptr.addr, align 8 +// CHECK-NEXT: %0 = load i32*, i32** %Ptr.addr, align 8 +// CHECK-NEXT: %matrix = call <40 x i32> @llvm.matrix.columnwise.load.v40i32.p0i32(i32* %0, i32 10, i32 10, i32 4) +// CHECK-NEXT: ret <40 x i32> %matrix + +// CHECK-LABEL: define linkonce_odr <63 x i32> @_Z17column_major_loadIiLj7ELj9EEU11matrix_typeXT0_EXT1_ET_PS0_(i32* %Ptr) +// CHECK-NEXT: entry: +// CHECK-NEXT: %Ptr.addr = alloca i32*, align 8 +// CHECK-NEXT: store i32* %Ptr, i32** %Ptr.addr, align 8 +// CHECK-NEXT: %0 = load i32*, i32** %Ptr.addr, align 8 +// CHECK-NEXT: %matrix = call <63 x i32> @llvm.matrix.columnwise.load.v63i32.p0i32(i32* %0, i32 7, i32 7, i32 9) +// CHECK-NEXT: ret <63 x i32> %matrix + +// CHECK-LABEL: define linkonce_odr <63 x double> @_Z17column_major_loadIdLj7ELj9EEU11matrix_typeXT0_EXT1_ET_PS0_(double* %Ptr) +// CHECK-NEXT: entry: +// CHECK-NEXT: %Ptr.addr = alloca double*, align 8 +// CHECK-NEXT: store double* %Ptr, double** %Ptr.addr, align 8 +// CHECK-NEXT: %0 = load double*, double** %Ptr.addr, align 8 +// CHECK-NEXT: %matrix = call <63 x double> @llvm.matrix.columnwise.load.v63f64.p0f64(double* %0, i32 7, i32 7, i32 9) +// CHECK-NEXT: ret <63 x double> %matrix diff --git a/clang/test/Sema/matrix-type-builtins.c b/clang/test/Sema/matrix-type-builtins.c --- a/clang/test/Sema/matrix-type-builtins.c +++ b/clang/test/Sema/matrix-type-builtins.c @@ -15,3 +15,48 @@ __builtin_matrix_transpose("test"); // expected-error@-1 {{first argument must be a matrix}} } + +struct Foo { + unsigned x; +}; + +void column_major_load(float *p1, int *p2, _Bool *p3, struct Foo *p4) { + sx5x10_t a1 = __builtin_matrix_column_major_load(p1, 5, 11, 5); + // expected-error@-1 {{initializing 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') with an expression of incompatible type 'float __attribute__((matrix_type(5, 11)))'}} + sx5x10_t a2 = __builtin_matrix_column_major_load(p1, 5, 9, 5); + // expected-error@-1 {{initializing 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') with an expression of incompatible type 'float __attribute__((matrix_type(5, 9)))'}} + sx5x10_t a3 = __builtin_matrix_column_major_load(p1, 6, 10, 6); + // expected-error@-1 {{initializing 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') with an expression of incompatible type 'float __attribute__((matrix_type(6, 10)))'}} + sx5x10_t a4 = __builtin_matrix_column_major_load(p1, 4, 10, 4); + // expected-error@-1 {{initializing 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') with an expression of incompatible type 'float __attribute__((matrix_type(4, 10)))'}} + sx5x10_t a5 = __builtin_matrix_column_major_load(p1, 6, 9, 6); + // expected-error@-1 {{initializing 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') with an expression of incompatible type 'float __attribute__((matrix_type(6, 9)))'}} + sx5x10_t a6 = __builtin_matrix_column_major_load(p2, 5, 10, 6); + // expected-error@-1 {{initializing 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') with an expression of incompatible type 'int __attribute__((matrix_type(5, 10)))'}} + + sx5x10_t a7 = __builtin_matrix_column_major_load(p1, 5, 10, 3); + // expected-error@-1 {{stride must be greater or equal to the number of rows}} + + sx5x10_t a8 = __builtin_matrix_column_major_load(p3, 5, 10, 6); + // expected-error@-1 {{first argument must be a pointer to a valid matrix element type}} + + sx5x10_t a9 = __builtin_matrix_column_major_load(p4, 5, 10, 6); + // expected-error@-1 {{first argument must be a pointer to a valid matrix element type}} + + sx5x10_t a10 = __builtin_matrix_column_major_load(p1, 1ull << 21, 10, 6); + // expected-error@-1 {{row dimension is outside the allowed range [1, 1048575}} + sx5x10_t a11 = __builtin_matrix_column_major_load(p1, 10, 1ull << 21, 10); + // expected-error@-1 {{column dimension is outside the allowed range [1, 1048575}} + + sx5x10_t a12 = __builtin_matrix_column_major_load( + 10, // expected-error {{first argument must be a pointer to a valid matrix element type}} + 1ull << 21, // expected-error {{row dimension is outside the allowed range [1, 1048575]}} + 1ull << 21, // expected-error {{column dimension is outside the allowed range [1, 1048575]}} + ""); // expected-error {{stride argument must be a constant unsigned integer expression}} + + sx5x10_t a13 = __builtin_matrix_column_major_load( + 10, // expected-error {{first argument must be a pointer to a valid matrix element type}} + "", // expected-error {{row argument must be a constant unsigned integer expression}} + "", // expected-error {{column argument must be a constant unsigned integer expression}} + 10); +} diff --git a/clang/test/SemaCXX/matrix-type-builtins.cpp b/clang/test/SemaCXX/matrix-type-builtins.cpp --- a/clang/test/SemaCXX/matrix-type-builtins.cpp +++ b/clang/test/SemaCXX/matrix-type-builtins.cpp @@ -32,3 +32,27 @@ Mat1.value = transpose(Mat2); // expected-note@-1 {{in instantiation of function template specialization 'transpose' requested here}} } + +template +typename MyMatrix::matrix_t column_major_load(MyMatrix &A, EltTy0 *Ptr) { + char *v1 = __builtin_matrix_column_major_load(Ptr, 9, 4, 10); + // expected-error@-1 {{cannot initialize a variable of type 'char *' with an rvalue of type 'unsigned int __attribute__((matrix_type(9, 4)))'}} + // expected-error@-2 {{cannot initialize a variable of type 'char *' with an rvalue of type 'unsigned int __attribute__((matrix_type(9, 4)))'}} + // expected-error@-3 {{cannot initialize a variable of type 'char *' with an rvalue of type 'float __attribute__((matrix_type(9, 4)))'}} + + return __builtin_matrix_column_major_load(Ptr, R0, C0, R0); + // expected-error@-1 {{cannot initialize return object of type 'typename MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(5, 5)))') with an rvalue of type 'unsigned int __attribute__((matrix_type(2, 3)))'}} + // expected-error@-2 {{cannot initialize return object of type 'typename MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 3)))') with an rvalue of type 'float __attribute__((matrix_type(2, 3)))'}} +} + +void test_column_major_loads_template(unsigned *Ptr1, float *Ptr2) { + MyMatrix Mat1; + Mat1.value = column_major_load(Mat1, Ptr1); + // expected-note@-1 {{in instantiation of function template specialization 'column_major_load' requested here}} + column_major_load(Mat1, Ptr1); + // expected-note@-1 {{in instantiation of function template specialization 'column_major_load' requested here}} + + MyMatrix Mat2; + Mat1.value = column_major_load(Mat2, Ptr2); + // expected-note@-1 {{in instantiation of function template specialization 'column_major_load' requested here}} +}