diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -573,6 +573,8 @@ BUILTIN(__builtin_alloca_with_align, "v*zIz", "Fn") BUILTIN(__builtin_call_with_static_chain, "v.", "nt") +BUILTIN(__builtin_matrix_insert, "v.", "nt") + // "Overloaded" Atomic operator builtins. These are overloaded to support data // types of i8, i16, i32, i64, and i128. The front-end sees calls to the // non-suffixed version of these (which has a bogus type) and transforms them to diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10243,6 +10243,15 @@ def err_builtin_matrix_disabled: Error< "Builtin matrix support is disabled. Pass -fenable-matrix to enable it.">; +def err_builtin_matrix_arg: Error< + "%select{First|Second}0 argument must be a matrix">; + +def err_builtin_matrix_scalar_int_arg: Error< + "%select{Row|Column|Offset|Stride}0 argument must be %select{an unsigned integer|a constant unsigned integer expression}1">; + +def err_builtin_matrix_implicit_cast_error: Error< + "Implicit cast to from %0 to %1 failed">; + def err_preserve_field_info_not_field : Error< "__builtin_preserve_field_info argument %0 not a field access">; def err_preserve_field_info_not_const: Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11562,6 +11562,11 @@ int ArgNum, unsigned ExpectedFieldNum, bool AllowName); bool SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall); + + // Matrix Builtin intrinsic handling. + ExprResult SemaBuiltinMatrixInsertOverload(CallExpr *TheCall, + ExprResult CallResult); + public: enum FormatStringType { FST_Scanf, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/MatrixBuilder.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/TargetParser.h" @@ -1600,6 +1601,10 @@ return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } +const clang::MatrixType *getMatrixTy(QualType Ty) { + return cast(Ty.getCanonicalType()); +}; + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -2336,6 +2341,22 @@ V = Builder.CreateFCmpUNO(V, V, "cmp"); return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BI__builtin_matrix_insert: { + MatrixBuilder MB(Builder); + Value *MatValue = EmitScalarExpr(E->getArg(0)); + const MatrixType *MatrixTy = getMatrixTy(E->getArg(0)->getType()); + Value *RowValue = EmitScalarExpr(E->getArg(1)); + Value *ColValue = EmitScalarExpr(E->getArg(2)); + Value *ValValue = EmitScalarExpr(E->getArg(3)); + // Check that the ValValue and the Matrix value match, or do an implict cast + // if they don't + assert(ValValue->getType() == + cast(MatValue->getType())->getElementType() && + "Inserted type must match matrix data type"); + + return RValue::get(MB.CreateMatrixInsert(MatValue, ValValue, RowValue, + ColValue, MatrixTy->getNumRows())); + } case Builtin::BIfinite: case Builtin::BI__finite: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1522,6 +1522,19 @@ if (SemaBuiltinOSLogFormat(TheCall)) return ExprError(); break; + + case Builtin::BI__builtin_matrix_insert: + if (!getLangOpts().EnableMatrix) { + Diag(TheCall->getBeginLoc(), diag::err_builtin_matrix_disabled); + return ExprError(); + } + + switch (BuiltinID) { + case Builtin::BI__builtin_matrix_insert: + return SemaBuiltinMatrixInsertOverload(TheCall, TheCallResult); + default: + llvm_unreachable("All matrix builtins should be handled here!"); + } } // Since the target specific builtins for each arch overlap, only check those @@ -14955,3 +14968,121 @@ rhs, std::bind(&Sema::AddPotentialMisalignedMembers, std::ref(*this), _1, _2, _3, _4)); } + +// +/// \brief Uses the call to create an overloaded matrix insertion decl +/// +/// SemaBuiltinMatrixInsertOverload - Handle __builtin_matrix_insert. +/// This builtin is declared as taking everything and returning nothing, +/// so all inputs will need to be checked to ensure that things are correct. +/// This function should take a matrix, a row and column to insert to, and +/// a value (same type as a matrix element) that is being inserted. +/// It then returns the matrix with that element inserted. +ExprResult Sema::SemaBuiltinMatrixInsertOverload(CallExpr *TheCall, + ExprResult CallResult) { + // This function take four parameters + // Matrix -- type matrix (row, column, elt) + // Row -- type integer (constant int for now) + // Column -- type integer (constant int for now) + // Value -- type same as elt type + // + // Returns: Matrix with the value inserted at that coordinate + + // First part of this method focuses on handling the creation of the + // overloaded function type Second part focuses on setting up the call with + // the correct return type and parameters + + // This builtin should have four parameters passed to it + if (checkArgCount(*this, TheCall, 4)) + return ExprError(); + + Expr *Callee = TheCall->getCallee(); + DeclRefExpr *DRE = cast(Callee->IgnoreParenCasts()); + FunctionDecl *FDecl = cast(DRE->getDecl()); + + // Some typechecking to ensure that the parameters are correct + Expr *MatArg = TheCall->getArg(0); + Expr *RowArg = TheCall->getArg(1); + Expr *ColArg = TheCall->getArg(2); + Expr *ValArg = TheCall->getArg(3); + { + QualType MTy = MatArg->getType(); + QualType RTy = RowArg->getType(); + QualType CTy = RowArg->getType(); + QualType VTy = ValArg->getType(); + + bool ArgError = false; + if (!MTy->isMatrixType()) { + Diag(MatArg->getBeginLoc(), diag::err_builtin_matrix_arg) << 0; + ArgError = true; + } + if (!RTy->isUnsignedIntegerType()) { + Diag(RowArg->getBeginLoc(), diag::err_builtin_matrix_scalar_int_arg) + << 0 << 0; + ArgError = true; + } + if (!CTy->isUnsignedIntegerType()) { + Diag(ColArg->getBeginLoc(), diag::err_builtin_matrix_scalar_int_arg) + << 1 << 0; + ArgError = true; + } + if (ArgError) + return ExprError(); + + // Either the value type being inserted is the same as the matrix element + // type or it needs to be something that can be casted to the matrix element + // type. + QualType EltTy = cast(MTy.getCanonicalType())->getElementType(); + + // We'll try to allow Itegral to floating point conversions. + // Otherwise it's an error! + if (EltTy != VTy) { + ExprResult TypeCastRes = ImplicitCastExpr::Create( + Context, EltTy, CK_IntegralToFloating, ValArg, nullptr, VK_RValue); + + if (TypeCastRes.isInvalid()) { + Diag(MatArg->getBeginLoc(), + diag::err_builtin_matrix_implicit_cast_error) + << EltTy << VTy; + return ExprError(); + } + ValArg = TypeCastRes.get(); + TheCall->setArg(3, ValArg); + } + } + + // Create new function prototype + + // Convert l-value matrix input to r-value if necessary + if (!MatArg->isRValue()) { + ExprResult Res = + ImplicitCastExpr::Create(Context, MatArg->getType(), CK_LValueToRValue, + MatArg, nullptr, VK_RValue); + assert(!Res.isInvalid() && "Matrix Cast failed"); + TheCall->setArg(0, Res.get()); + } + + llvm::SmallVector ParameterTypes = { + MatArg->getType().getCanonicalType(), RowArg->getType(), + ColArg->getType(), ValArg->getType()}; + + // Create a new DeclRefExpr to refer to the new decl. + DeclRefExpr *NewDRE = DeclRefExpr::Create( + Context, DRE->getQualifierLoc(), SourceLocation(), FDecl, + /*enclosing*/ false, DRE->getLocation(), Context.BuiltinFnTy, + DRE->getValueKind(), nullptr, nullptr, DRE->isNonOdrUse()); + + // Set the callee in the CallExpr. + // FIXME: This loses syntactic information. + QualType CalleePtrTy = Context.getPointerType(FDecl->getType()); + ExprResult PromotedCall = + ImpCastExprToType(NewDRE, CalleePtrTy, CK_BuiltinFnToFnPtr); + TheCall->setCallee(PromotedCall.get()); + + // Change the result type of the call to match the original value type. This + // is arbitrary, but the codegen for these builtins ins design to handle it + // gracefully. + TheCall->setType(MatArg->getType()); + + return CallResult; +} diff --git a/clang/test/CodeGen/builtin-matrix.c b/clang/test/CodeGen/builtin-matrix.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtin-matrix.c @@ -0,0 +1,126 @@ +// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +typedef double dx5x5_t __attribute__((matrix_type(5, 5))); +typedef float fx2x3_t __attribute__((matrix_type(2, 3))); + +// Check that we can use __builtin_matrix_insert on different floating point +// matrixes and +void insert_fp(dx5x5_t a, double d, fx2x3_t b, float e) { + // CHECK-LABEL: @insert_fp( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %d.addr = alloca double, align 8 + // CHECK-NEXT: %b.addr = alloca [6 x float], align 4 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: store double %d, double* %d.addr, align 8 + // CHECK-NEXT: %1 = bitcast [6 x float]* %b.addr to <6 x float>* + // CHECK-NEXT: store <6 x float> %b, <6 x float>* %1, align 4 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: %2 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %3 = load double, double* %d.addr, align 8 + // CHECK-NEXT: %4 = insertelement <25 x double> %2, double %3, i32 5 + // CHECK-NEXT: store <25 x double> %4, <25 x double>* %0, align 8 + // CHECK-NEXT: %5 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %6 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %7 = insertelement <6 x float> %5, float %6, i32 1 + // CHECK-NEXT: store <6 x float> %7, <6 x float>* %1, align 4 + // CHECK-NEXT: %8 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %9 = load double, double* %d.addr, align 8 + // CHECK-NEXT: %10 = insertelement <25 x double> %8, double %9, i32 1 + // CHECK-NEXT: store <25 x double> %10, <25 x double>* %0, align 8 + // CHECK-NEXT: %11 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %12 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %13 = insertelement <6 x float> %11, float %12, i32 3 + // CHECK-NEXT: store <6 x float> %13, <6 x float>* %1, align 4 + // CHECK-NEXT: ret void + + a = __builtin_matrix_insert(a, 0u, 1u, d); + b = __builtin_matrix_insert(b, 1u, 0u, e); + a = __builtin_matrix_insert(a, 1u, 0u, d); + b = __builtin_matrix_insert(b, 1u, 1u, e); +} + +// Check that we can can use __builtin_matrix_insert on integer matrixes. +typedef int ix9x3_t __attribute__((matrix_type(9, 3))); +void insert_int(ix9x3_t a, int i) { + // CHECK-LABEL: @insert_int( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %i.addr = alloca i32, align 4 + // CHECK-NEXT: %0 = bitcast [27 x i32]* %a.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %a, <27 x i32>* %0, align 4 + // CHECK-NEXT: store i32 %i, i32* %i.addr, align 4 + // CHECK-NEXT: %1 = load <27 x i32>, <27 x i32>* %0, align 4 + // CHECK-NEXT: %2 = load i32, i32* %i.addr, align 4 + // CHECK-NEXT: %3 = insertelement <27 x i32> %1, i32 %2, i32 13 + // CHECK-NEXT: store <27 x i32> %3, <27 x i32>* %0, align 4 + // CHECK-NEXT: ret void + + a = __builtin_matrix_insert(a, 4u, 1u, i); +} + +// Check that we can can use __builtin_matrix_insert on FP and integer +// matrixes. +typedef int ix9x3_t __attribute__((matrix_type(9, 3))); +void insert_int_fp(ix9x3_t *a, int i, fx2x3_t b, float e) { + // CHECK-LABEL: @insert_int_fp( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [27 x i32]*, align 8 + // CHECK-NEXT: %i.addr = alloca i32, align 4 + // CHECK-NEXT: %b.addr = alloca [6 x float], align 4 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: store [27 x i32]* %a, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: store i32 %i, i32* %i.addr, align 4 + // CHECK-NEXT: %0 = bitcast [6 x float]* %b.addr to <6 x float>* + // CHECK-NEXT: store <6 x float> %b, <6 x float>* %0, align 4 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: %1 = load [27 x i32]*, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: %2 = bitcast [27 x i32]* %1 to <27 x i32>* + // CHECK-NEXT: %3 = load <27 x i32>, <27 x i32>* %2, align 4 + // CHECK-NEXT: %4 = load i32, i32* %i.addr, align 4 + // CHECK-NEXT: %5 = insertelement <27 x i32> %3, i32 %4, i32 13 + // CHECK-NEXT: %6 = load [27 x i32]*, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: %7 = bitcast [27 x i32]* %6 to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %5, <27 x i32>* %7, align 4 + // CHECK-NEXT: %8 = load <6 x float>, <6 x float>* %0, align 4 + // CHECK-NEXT: %9 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %10 = insertelement <6 x float> %8, float %9, i32 3 + // CHECK-NEXT: store <6 x float> %10, <6 x float>* %0, align 4 + // CHECK-NEXT: ret void + + *a = __builtin_matrix_insert(*a, 4u, 1u, i); + b = __builtin_matrix_insert(b, 1u, 1u, e); +} + +// Check that we can use overloaded versions of __builtin_matrix_insert on +// matrixes with matching dimensions, but different element types. +typedef double dx3x3_t __attribute__((matrix_type(3, 3))); +typedef float fx3x3_t __attribute__((matrix_type(3, 3))); +void insert_matching_dimensions(dx3x3_t a, double i, fx3x3_t b, float e) { + // CHECK-LABEL: @insert_matching_dimensions( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [9 x double], align 8 + // CHECK-NEXT: %i.addr = alloca double, align 8 + // CHECK-NEXT: %b.addr = alloca [9 x float], align 4 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: %0 = bitcast [9 x double]* %a.addr to <9 x double>* + // CHECK-NEXT: store <9 x double> %a, <9 x double>* %0, align 8 + // CHECK-NEXT: store double %i, double* %i.addr, align 8 + // CHECK-NEXT: %1 = bitcast [9 x float]* %b.addr to <9 x float>* + // CHECK-NEXT: store <9 x float> %b, <9 x float>* %1, align 4 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: %2 = load <9 x double>, <9 x double>* %0, align 8 + // CHECK-NEXT: %3 = load double, double* %i.addr, align 8 + // CHECK-NEXT: %4 = insertelement <9 x double> %2, double %3, i32 5 + // CHECK-NEXT: store <9 x double> %4, <9 x double>* %0, align 8 + // CHECK-NEXT: %5 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %6 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %7 = insertelement <9 x float> %5, float %6, i32 7 + // CHECK-NEXT: store <9 x float> %7, <9 x float>* %1, align 4 + // CHECK-NEXT: ret void + + a = __builtin_matrix_insert(a, 2u, 1u, i); + b = __builtin_matrix_insert(b, 1u, 2u, e); +} diff --git a/clang/test/CodeGenCXX/builtin-matrix.cpp b/clang/test/CodeGenCXX/builtin-matrix.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/builtin-matrix.cpp @@ -0,0 +1,150 @@ +// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s + +typedef double dx5x5_t __attribute__((matrix_type(5, 5))); +using fx2x3_t = float __attribute__((matrix_type(2, 3))); + +void insert_fp(dx5x5_t *a, double d, fx2x3_t *b, float e) { + *a = __builtin_matrix_insert(*a, 0u, 1u, d); + *b = __builtin_matrix_insert(*b, 1u, 0u, e); + + // CHECK-LABEL: @_Z9insert_fpPDm5_5_ddPDm2_3_ff( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double]*, align 8 + // CHECK-NEXT: %d.addr = alloca double, align 8 + // CHECK-NEXT: %b.addr = alloca [6 x float]*, align 8 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: store [25 x double]* %a, [25 x double]** %a.addr, align 8 + // CHECK-NEXT: store double %d, double* %d.addr, align 8 + // CHECK-NEXT: store [6 x float]* %b, [6 x float]** %b.addr, align 8 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: %0 = load [25 x double]*, [25 x double]** %a.addr, align 8 + // CHECK-NEXT: %1 = bitcast [25 x double]* %0 to <25 x double>* + // CHECK-NEXT: %2 = load <25 x double>, <25 x double>* %1, align 8 + // CHECK-NEXT: %3 = load double, double* %d.addr, align 8 + // CHECK-NEXT: %4 = insertelement <25 x double> %2, double %3, i32 5 + // CHECK-NEXT: %5 = load [25 x double]*, [25 x double]** %a.addr, align 8 + // CHECK-NEXT: %6 = bitcast [25 x double]* %5 to <25 x double>* + // CHECK-NEXT: store <25 x double> %4, <25 x double>* %6, align 8 + // CHECK-NEXT: %7 = load [6 x float]*, [6 x float]** %b.addr, align 8 + // CHECK-NEXT: %8 = bitcast [6 x float]* %7 to <6 x float>* + // CHECK-NEXT: %9 = load <6 x float>, <6 x float>* %8, align 4 + // CHECK-NEXT: %10 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %11 = insertelement <6 x float> %9, float %10, i32 1 + // CHECK-NEXT: %12 = load [6 x float]*, [6 x float]** %b.addr, align 8 + // CHECK-NEXT: %13 = bitcast [6 x float]* %12 to <6 x float>* + // CHECK-NEXT: store <6 x float> %11, <6 x float>* %13, align 4 + // CHECK-NEXT: ret void +} + +typedef int ix9x3_t __attribute__((matrix_type(9, 3))); + +void insert_int(ix9x3_t *a, int i) { + *a = __builtin_matrix_insert(*a, 4u, 1u, i); + + // CHECK-LABEL: @_Z10insert_intPDm9_3_ii( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [27 x i32]*, align 8 + // CHECK-NEXT: %i.addr = alloca i32, align 4 + // CHECK-NEXT: store [27 x i32]* %a, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: store i32 %i, i32* %i.addr, align 4 + // CHECK-NEXT: %0 = load [27 x i32]*, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: %1 = bitcast [27 x i32]* %0 to <27 x i32>* + // CHECK-NEXT: %2 = load <27 x i32>, <27 x i32>* %1, align 4 + // CHECK-NEXT: %3 = load i32, i32* %i.addr, align 4 + // CHECK-NEXT: %4 = insertelement <27 x i32> %2, i32 %3, i32 13 + // CHECK-NEXT: %5 = load [27 x i32]*, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: %6 = bitcast [27 x i32]* %5 to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %4, <27 x i32>* %6, align 4 + // CHECK-NEXT: ret void +} + +template +struct MyMatrix { + using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns))); + + matrix_t value; +}; + +template +void insert(MyMatrix &Mat, EltTy e) { + Mat.value = __builtin_matrix_insert(Mat.value, 1u, 0u, e); +} + +void test_template(unsigned *Ptr1, unsigned E1, float *Ptr2, float E2) { + + // CHECK-LABEL: define void @_Z13test_templatePjjPff(i32* %Ptr1, i32 %E1, float* %Ptr2, float %E2) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Ptr1.addr = alloca i32*, align 8 + // CHECK-NEXT: %E1.addr = alloca i32, align 4 + // CHECK-NEXT: %Ptr2.addr = alloca float*, align 8 + // CHECK-NEXT: %E2.addr = alloca float, align 4 + // CHECK-NEXT: %Mat1 = alloca %struct.MyMatrix, align 4 + // CHECK-NEXT: %Mat2 = alloca %struct.MyMatrix.0, align 4 + // CHECK-NEXT: store i32* %Ptr1, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: store i32 %E1, i32* %E1.addr, align 4 + // CHECK-NEXT: store float* %Ptr2, float** %Ptr2.addr, align 8 + // CHECK-NEXT: store float %E2, float* %E2.addr, align 4 + // CHECK-NEXT: %0 = load i32*, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: %1 = bitcast i32* %0 to [4 x i32]* + // CHECK-NEXT: %2 = bitcast [4 x i32]* %1 to <4 x i32>* + // CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 4 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %Mat1, i32 0, i32 0 + // CHECK-NEXT: %4 = bitcast [4 x i32]* %value to <4 x i32>* + // CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %4, align 4 + // CHECK-NEXT: %5 = load i32, i32* %E1.addr, align 4 + // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix* dereferenceable(16) %Mat1, i32 %5) + // CHECK-NEXT: %6 = load float*, float** %Ptr2.addr, align 8 + // CHECK-NEXT: %7 = bitcast float* %6 to [24 x float]* + // CHECK-NEXT: %8 = bitcast [24 x float]* %7 to <24 x float>* + // CHECK-NEXT: %9 = load <24 x float>, <24 x float>* %8, align 4 + // CHECK-NEXT: %value1 = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %Mat2, i32 0, i32 0 + // CHECK-NEXT: %10 = bitcast [24 x float]* %value1 to <24 x float>* + // CHECK-NEXT: store <24 x float> %9, <24 x float>* %10, align 4 + // CHECK-NEXT: %11 = load float, float* %E2.addr, align 4 + // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix.0* dereferenceable(96) %Mat2, float %11) + // CHECK-NEXT: ret void + + // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix* dereferenceable(16) %Mat, i32 %e) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Mat.addr = alloca %struct.MyMatrix*, align 8 + // CHECK-NEXT: %e.addr = alloca i32, align 4 + // CHECK-NEXT: store %struct.MyMatrix* %Mat, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: store i32 %e, i32* %e.addr, align 4 + // CHECK-NEXT: %0 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %0, i32 0, i32 0 + // CHECK-NEXT: %1 = bitcast [4 x i32]* %value to <4 x i32>* + // CHECK-NEXT: %2 = load <4 x i32>, <4 x i32>* %1, align 4 + // CHECK-NEXT: %3 = load i32, i32* %e.addr, align 4 + // CHECK-NEXT: %4 = insertelement <4 x i32> %2, i32 %3, i32 1 + // CHECK-NEXT: %5 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: %value1 = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %5, i32 0, i32 0 + // CHECK-NEXT: %6 = bitcast [4 x i32]* %value1 to <4 x i32>* + // CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %6, align 4 + // CHECK-NEXT: ret void + + // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix.0* dereferenceable(96) %Mat, float %e) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Mat.addr = alloca %struct.MyMatrix.0*, align 8 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: store %struct.MyMatrix.0* %Mat, %struct.MyMatrix.0** %Mat.addr, align 8 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: %0 = load %struct.MyMatrix.0*, %struct.MyMatrix.0** %Mat.addr, align 8 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %0, i32 0, i32 0 + // CHECK-NEXT: %1 = bitcast [24 x float]* %value to <24 x float>* + // CHECK-NEXT: %2 = load <24 x float>, <24 x float>* %1, align 4 + // CHECK-NEXT: %3 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %4 = insertelement <24 x float> %2, float %3, i32 1 + // CHECK-NEXT: %5 = load %struct.MyMatrix.0*, %struct.MyMatrix.0** %Mat.addr, align 8 + // CHECK-NEXT: %value1 = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %5, i32 0, i32 0 + // CHECK-NEXT: %6 = bitcast [24 x float]* %value1 to <24 x float>* + // CHECK-NEXT: store <24 x float> %4, <24 x float>* %6, align 4 + // CHECK-NEXT: ret void + + MyMatrix Mat1; + Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1); + insert(Mat1, E1); + + MyMatrix Mat2; + Mat2.value = *((decltype(Mat2)::matrix_t *)Ptr2); + insert(Mat2, E2); +} diff --git a/clang/test/SemaCXX/builtin-matrix.cpp b/clang/test/SemaCXX/builtin-matrix.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/builtin-matrix.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 %s -fenable-matrix -pedantic -std=c++11 -verify -triple=x86_64-apple-darwin9 + +typedef float sx10x10_t __attribute__((matrix_type(10, 10))); +sx10x10_t a; + +struct Foo { + char *s; +}; + +void insert(sx10x10_t *a, float f) { + *a = __builtin_matrix_insert( + 10, // expected-error {{First argument must be a matrix}} + a, // expected-error {{Row argument must be an unsigned integer}} + a, // expected-error {{Column argument must be an unsigned integer}} + 10); + + int x = __builtin_matrix_insert(*a, 3u, 5u, 10.0); // expected-error {{cannot initialize a variable of type 'int' with an rvalue of type 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10))) ')}} + + // TODO: Should error here (index out of range). + *a = __builtin_matrix_insert(*a, -1u, 5u, 10.0); + + // FIXME: Column argument is fine! + *a = __builtin_matrix_insert(*a, f, // expected-error {{Row argument must be an unsigned integer}} + 5u, 10.0); // expected-error {{Column argument must be an unsigned integer}} +}