diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -574,6 +574,7 @@ BUILTIN(__builtin_call_with_static_chain, "v.", "nt") BUILTIN(__builtin_matrix_insert, "v.", "nt") +BUILTIN(__builtin_matrix_extract, "v.", "nt") // "Overloaded" Atomic operator builtins. These are overloaded to support data // types of i8, i16, i32, i64, and i128. The front-end sees calls to the diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11614,6 +11614,8 @@ // Matrix Builtin intrinsic handling. ExprResult SemaBuiltinMatrixInsertOverload(CallExpr *TheCall, ExprResult CallResult); + ExprResult SemaBuiltinExtractMatrixOverload(CallExpr *TheCall, + ExprResult CallResult); public: enum FormatStringType { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2356,6 +2356,15 @@ return RValue::get(MB.CreateMatrixInsert(MatValue, ValValue, RowValue, ColValue, MatrixTy->getNumRows())); } + case Builtin::BI__builtin_matrix_extract: { + MatrixBuilder MB(Builder); + Value *MatrixValue = EmitScalarExpr(E->getArg(0)); + Value *RowValue = EmitScalarExpr(E->getArg(1)); + Value *ColValue = EmitScalarExpr(E->getArg(2)); + const MatrixType *MatrixTy = getMatrixTy(E->getArg(0)->getType()); + Value *Result = MB.CreateExtractMatrix(MatrixValue, RowValue, ColValue, MatrixTy->getNumRows()); + return RValue::get(Result); + } case Builtin::BIfinite: case Builtin::BI__finite: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1614,6 +1614,7 @@ break; case Builtin::BI__builtin_matrix_insert: + case Builtin::BI__builtin_matrix_extract: if (!getLangOpts().EnableMatrix) { Diag(TheCall->getBeginLoc(), diag::err_builtin_matrix_disabled); return ExprError(); @@ -1622,6 +1623,8 @@ switch (BuiltinID) { case Builtin::BI__builtin_matrix_insert: return SemaBuiltinMatrixInsertOverload(TheCall, TheCallResult); + case Builtin::BI__builtin_matrix_extract: + return SemaBuiltinExtractMatrixOverload(TheCall, TheCallResult); default: llvm_unreachable("All matrix builtins should be handled here!"); } @@ -15176,3 +15179,102 @@ return CallResult; } + +/// \brief Use the call to create an overloaded matrix extraction declaration +/// +/// SemaBuiltinExtractMatrixOverload - Handle __builtin_matrix_extract. +/// This is declared to take (...), so everything must be checked. +/// A correct invocation with have 7 arguments passed in. +/// The return type is dependent on the value of two arguments. +ExprResult Sema::SemaBuiltinExtractMatrixOverload(CallExpr *TheCall, + ExprResult CallResult) { + + // This function takes three parameters + // 1: matrix m - the matrix being extracted from + // 2: int row - row being extracted + // 3: int col - column being extracted + // + // returns the element at [row, column], which is of the same type as the + // matrix element + + // First part of this method focuses on creating the overloaded function type + // Second part of this method focuses on creating the declaration reference + // for the call + + // Check that the number of arguments is correct + if (checkArgCount(*this, TheCall, 3)) + return ExprError(); + + Expr *Callee = TheCall->getCallee(); + DeclRefExpr *DRE = cast(Callee->IgnoreParenCasts()); + FunctionDecl *FDecl = cast(DRE->getDecl()); + + + // Some typechecking to ensure that the parameters are correct + Expr *MatArg = TheCall->getArg(0); + Expr *RowArg = TheCall->getArg(1); + Expr *ColArg = TheCall->getArg(2); + { + QualType MTy = MatArg->getType(); + QualType RTy = RowArg->getType(); + QualType CTy = RowArg->getType(); + + bool ArgError = false; + if (!MTy->isMatrixType()) { + Diag(MatArg->getBeginLoc(), diag::err_builtin_matrix_arg) << 0; + ArgError = true; + } + if (!RTy->isIntegerType()) { + Diag(RowArg->getBeginLoc(), diag::err_builtin_matrix_scalar_int_arg) + << 0 << 0; + ArgError = true; + } + if (!CTy->isIntegerType()) { + Diag(ColArg->getBeginLoc(), diag::err_builtin_matrix_scalar_int_arg) + << 1 << 0; + ArgError = true; + } + if (ArgError) + return ExprError(); + } + + // Create new function prototype + + // Convert an l-valued matrix input to an r-value + if (!MatArg->isRValue()) { + ExprResult Res = + ImplicitCastExpr::Create(Context, MatArg->getType(), CK_LValueToRValue, + MatArg, nullptr, VK_RValue); + assert(!Res.isInvalid() && Res.get()->isRValue() && + "Failed to cast Matrix arg to an R-value"); + TheCall->setArg(0, Res.get()); + } + + MatrixType const *MTy = + cast(MatArg->getType().getCanonicalType()); + QualType ReturnType = MTy->getElementType(); + + llvm::SmallVector ParameterTypes = { + MatArg->getType().getCanonicalType(), TheCall->getArg(1)->getType(), + TheCall->getArg(2)->getType()}; + + // Create a new DeclRefExpr to refer to the new decl. + DeclRefExpr *NewDRE = DeclRefExpr::Create( + Context, DRE->getQualifierLoc(), SourceLocation(), FDecl, + /*enclosing*/ false, DRE->getLocation(), Context.BuiltinFnTy, + DRE->getValueKind(), nullptr, nullptr, DRE->isNonOdrUse()); + + // Set the callee in the CallExpr. + // FIXME: This loses syntactic information. + QualType CalleePtrTy = Context.getPointerType(FDecl->getType()); + ExprResult PromotedCall = ImpCastExprToType(NewDRE, CalleePtrTy, + CK_BuiltinFnToFnPtr); + TheCall->setCallee(PromotedCall.get()); + + // Change the result type of the call to match the original value type. This + // is arbitrary, but the codegen for these builtins ins design to handle it + // gracefully. + TheCall->setType(ReturnType); + + return CallResult; +} diff --git a/clang/test/CodeGen/builtin-matrix.c b/clang/test/CodeGen/builtin-matrix.c --- a/clang/test/CodeGen/builtin-matrix.c +++ b/clang/test/CodeGen/builtin-matrix.c @@ -124,3 +124,34 @@ a = __builtin_matrix_insert(a, 2u, 1u, i); b = __builtin_matrix_insert(b, 1u, 2u, e); } + +void extract1(dx5x5_t a, fx3x3_t b, ix9x3_t c) { + double v1 = __builtin_matrix_extract(a, 2, 3); + float v2 = __builtin_matrix_extract(b, 2, 1); + int v3 = __builtin_matrix_extract(c, 1, 1); + + // CHECK-LABEL: @extract1( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [9 x float], align 4 + // CHECK-NEXT: %c.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %v1 = alloca double, align 8 + // CHECK-NEXT: %v2 = alloca float, align 4 + // CHECK-NEXT: %v3 = alloca i32, align 4 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [9 x float]* %b.addr to <9 x float>* + // CHECK-NEXT: store <9 x float> %b, <9 x float>* %1, align 4 + // CHECK-NEXT: %2 = bitcast [27 x i32]* %c.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %c, <27 x i32>* %2, align 4 + // CHECK-NEXT: %3 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %4 = extractelement <25 x double> %3, i32 17 + // CHECK-NEXT: store double %4, double* %v1, align 8 + // CHECK-NEXT: %5 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %6 = extractelement <9 x float> %5, i32 5 + // CHECK-NEXT: store float %6, float* %v2, align 4 + // CHECK-NEXT: %7 = load <27 x i32>, <27 x i32>* %2, align 4 + // CHECK-NEXT: %8 = extractelement <27 x i32> %7, i32 10 + // CHECK-NEXT: store i32 %8, i32* %v3, align 4 + // CHECK-NEXT: ret void +} diff --git a/clang/test/CodeGenCXX/builtin-matrix.cpp b/clang/test/CodeGenCXX/builtin-matrix.cpp --- a/clang/test/CodeGenCXX/builtin-matrix.cpp +++ b/clang/test/CodeGenCXX/builtin-matrix.cpp @@ -70,9 +70,9 @@ Mat.value = __builtin_matrix_insert(Mat.value, 1u, 0u, e); } -void test_template(unsigned *Ptr1, unsigned E1, float *Ptr2, float E2) { +void test_insert_template(unsigned *Ptr1, unsigned E1, float *Ptr2, float E2) { - // CHECK-LABEL: define void @_Z13test_templatePjjPff(i32* %Ptr1, i32 %E1, float* %Ptr2, float %E2) + // CHECK-LABEL: define void @_Z20test_insert_templatePjjPff(i32* %Ptr1, i32 %E1, float* %Ptr2, float %E2) // CHECK-NEXT: entry: // CHECK-NEXT: %Ptr1.addr = alloca i32*, align 8 // CHECK-NEXT: %E1.addr = alloca i32, align 4 @@ -148,3 +148,78 @@ Mat2.value = *((decltype(Mat2)::matrix_t *)Ptr2); insert(Mat2, E2); } + + +typedef float fx3x3_t __attribute__((matrix_type(3, 3))); +void extract1(dx5x5_t a, fx3x3_t b, ix9x3_t c) { + // CHECK-LABEL: @_Z8extract1Dm5_5_dDm3_3_fDm9_3_i( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [9 x float], align 4 + // CHECK-NEXT: %c.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %v1 = alloca double, align 8 + // CHECK-NEXT: %v2 = alloca float, align 4 + // CHECK-NEXT: %v3 = alloca i32, align 4 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [9 x float]* %b.addr to <9 x float>* + // CHECK-NEXT: store <9 x float> %b, <9 x float>* %1, align 4 + // CHECK-NEXT: %2 = bitcast [27 x i32]* %c.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %c, <27 x i32>* %2, align 4 + // CHECK-NEXT: %3 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %4 = extractelement <25 x double> %3, i32 17 + // CHECK-NEXT: store double %4, double* %v1, align 8 + // CHECK-NEXT: %5 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %6 = extractelement <9 x float> %5, i32 5 + // CHECK-NEXT: store float %6, float* %v2, align 4 + // CHECK-NEXT: %7 = load <27 x i32>, <27 x i32>* %2, align 4 + // CHECK-NEXT: %8 = extractelement <27 x i32> %7, i32 10 + // CHECK-NEXT: store i32 %8, i32* %v3, align 4 + // CHECK-NEXT: ret void + + double v1 = __builtin_matrix_extract(a, 2, 3); + float v2 = __builtin_matrix_extract(b, 2, 1); + int v3 = __builtin_matrix_extract(c, 1, 1); +} + +template +EltTy extract(MyMatrix &Mat) { + return __builtin_matrix_extract(Mat.value, 1u, 0u); +} + +void test_extract_template(unsigned *Ptr1, float *Ptr2) { + + // CHECK-LABEL: define void @_Z21test_extract_templatePjPf(i32* %Ptr1, float* %Ptr2) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Ptr1.addr = alloca i32*, align 8 + // CHECK-NEXT: %Ptr2.addr = alloca float*, align 8 + // CHECK-NEXT: %Mat1 = alloca %struct.MyMatrix, align 4 + // CHECK-NEXT: %v1 = alloca i32, align 4 + // CHECK-NEXT: store i32* %Ptr1, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: store float* %Ptr2, float** %Ptr2.addr, align 8 + // CHECK-NEXT: %0 = load i32*, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: %1 = bitcast i32* %0 to [4 x i32]* + // CHECK-NEXT: %2 = bitcast [4 x i32]* %1 to <4 x i32>* + // CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 4 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %Mat1, i32 0, i32 0 + // CHECK-NEXT: %4 = bitcast [4 x i32]* %value to <4 x i32>* + // CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %4, align 4 + // CHECK-NEXT: %call = call i32 @_Z7extractIjLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix* dereferenceable(16) %Mat1) + // CHECK-NEXT: store i32 %call, i32* %v1, align 4 + // CHECK-NEXT: ret void + + // CHECK-LABEL: define linkonce_odr i32 @_Z7extractIjLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix* dereferenceable(16) %Mat) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Mat.addr = alloca %struct.MyMatrix*, align 8 + // CHECK-NEXT: store %struct.MyMatrix* %Mat, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: %0 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %0, i32 0, i32 0 + // CHECK-NEXT: %1 = bitcast [4 x i32]* %value to <4 x i32>* + // CHECK-NEXT: %2 = load <4 x i32>, <4 x i32>* %1, align 4 + // CHECK-NEXT: %3 = extractelement <4 x i32> %2, i32 1 + // CHECK-NEXT: ret i32 %3 + + MyMatrix Mat1; + Mat1.value = *((decltype(Mat1)::matrix_t*) Ptr1); + unsigned v1 = extract(Mat1); +} diff --git a/clang/test/Sema/builtin-matrix.c b/clang/test/Sema/builtin-matrix.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/builtin-matrix.c @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 %s -fenable-matrix -pedantic -verify -triple=x86_64-apple-darwin9 + +typedef float sx10x10_t __attribute__((matrix_type(10, 10))); +sx10x10_t a; + +struct Foo { + char *s; +}; + +void insert(sx10x10_t *a, float f) { + *a = __builtin_matrix_insert( + 10, // expected-error {{First argument must be a matrix}} + a, // expected-error {{Row argument must be an unsigned integer}} + a, // expected-error {{Column argument must be an unsigned integer}} + 10); + + int x = __builtin_matrix_insert(*a, 3u, 5u, 10.0); // expected-error {{initializing 'int' with an expression of incompatible type 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10))) ')}} + + // TODO: Should error here (index out of range). + *a = __builtin_matrix_insert(*a, -1u, 5u, 10.0); + + // FIXME: Column argument is fine! + *a = __builtin_matrix_insert(*a, f, // expected-error {{Row argument must be an unsigned integer}} + 5u, 10.0); // expected-error {{Column argument must be an unsigned integer}} +} + + +void extract(sx10x10_t *a) { + struct Foo v1 = __builtin_matrix_extract( // expected-error {{initializing 'struct Foo' with an expression of incompatible type 'float'}} + *a, 1, 1); + + float v2 = __builtin_matrix_extract( + 10, // expected-error {{First argument must be a matrix}} + a, // expected-error {{Row argument must be an unsigned integer}} + a); // expected-error {{Column argument must be an unsigned integer}} + + float v3 = __builtin_matrix_extract( + *a, 1); // expected-error {{too few arguments to function call, expected 3, have 2}} + + float v4 = __builtin_matrix_extract( + *a, 1, 1, 1); // expected-error {{too many arguments to function call, expected 3, have 4}} +} diff --git a/clang/test/SemaCXX/builtin-matrix.cpp b/clang/test/SemaCXX/builtin-matrix.cpp --- a/clang/test/SemaCXX/builtin-matrix.cpp +++ b/clang/test/SemaCXX/builtin-matrix.cpp @@ -7,6 +7,13 @@ char *s; }; +template +struct MyMatrix { + using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns))); + + matrix_t value; +}; + void insert(sx10x10_t *a, float f) { *a = __builtin_matrix_insert( 10, // expected-error {{First argument must be a matrix}} @@ -23,3 +30,15 @@ *a = __builtin_matrix_insert(*a, f, // expected-error {{Row argument must be an unsigned integer}} 5u, 10.0); // expected-error {{Column argument must be an unsigned integer}} } + +template +EltTy extract(MyMatrix &Mat) { + char *v1 = __builtin_matrix_extract(Mat.value, 1u, 0u); // expected-error {{cannot initialize a variable of type 'char *' with an rvalue of type 'unsigned int'}} + return __builtin_matrix_extract(Mat.value, 1u, 0u); +} + +void test_extract_template(unsigned *Ptr1, float *Ptr2) { + MyMatrix Mat1; + Mat1.value = *((decltype(Mat1)::matrix_t*) Ptr1); + unsigned v1 = extract(Mat1); // expected-note {{in instantiation of function template specialization 'extract' requested here}} +}