diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -978,6 +978,7 @@ #include "clang/Basic/OpenCLImageTypes.def" CanQualType OCLSamplerTy, OCLEventTy, OCLClkEventTy; CanQualType OCLQueueTy, OCLReserveIDTy; + CanQualType IncompleteMatrixIdxTy; CanQualType OMPArraySectionTy, OMPArrayShapingTy, OMPIteratorTy; #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ CanQualType Id##Ty; diff --git a/clang/include/clang/AST/BuiltinTypes.def b/clang/include/clang/AST/BuiltinTypes.def --- a/clang/include/clang/AST/BuiltinTypes.def +++ b/clang/include/clang/AST/BuiltinTypes.def @@ -310,6 +310,9 @@ // context. PLACEHOLDER_TYPE(ARCUnbridgedCast, ARCUnbridgedCastTy) +// A placeholder type for incomplete matrix index expressions. +PLACEHOLDER_TYPE(IncompleteMatrixIdx, IncompleteMatrixIdxTy) + // A placeholder type for OpenMP array sections. PLACEHOLDER_TYPE(OMPArraySection, OMPArraySectionTy) diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -461,6 +461,11 @@ return const_cast(this)->getReferencedDeclOfCallee(); } + /// If \p Base is part of a matrix index expressions, return the access matrix + /// type. \p Base is part of a matrix index expression, if either Base is a + /// matrix (= matrix row index expr) or a matrix row index expr. + const ConstantMatrixType *getMatrixFromIndexExpr(bool EnableMatrix) const; + /// If this expression is an l-value for an Objective C /// property, find the underlying property reference expression. const ObjCPropertyRefExpr *getObjCProperty() const; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10744,6 +10744,12 @@ def err_builtin_matrix_disabled: Error< "matrix types extension is disabled. Pass -fenable-matrix to enable it">; +def err_matrix_index_not_integer: Error< + "matrix %select{row|column}0 index is not an integer">; +def err_matrix_index_outside_range: Error< + "matrix %select{row|column}0 index is outside the allowed range [0, %1)">; +def err_matrix_incomplete_index: Error< + "single subscript expressions are not allowed for matrix values">; def err_preserve_field_info_not_field : Error< "__builtin_preserve_field_info argument %0 not a field access">; diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1057,6 +1057,9 @@ /// The placeholder type for OpenMP iterator expression. PREDEF_TYPE_OMP_ITERATOR = 71, + /// A placeholder type for incomplete matrix index operations. + PREDEF_TYPE_INCOMPLETE_MATRIX_IDX = 72, + /// OpenCL image types with auto numeration #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ PREDEF_TYPE_##Id##_ID, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1388,6 +1388,8 @@ InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping); InitBuiltinType(OMPIteratorTy, BuiltinType::OMPIterator); } + if (LangOpts.MatrixTypes) + InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx); // C99 6.2.5p11. FloatComplexTy = getComplexType(FloatTy); diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -3848,6 +3848,20 @@ return nullptr; } +const ConstantMatrixType * +Expr::getMatrixFromIndexExpr(bool EnableMatrix) const { + if (!EnableMatrix) + return nullptr; + + if (getType()->isConstantMatrixType()) + return getType()->getAs(); + + auto *SubscriptE = dyn_cast(this); + return SubscriptE + ? SubscriptE->getBase()->getType()->getAs() + : nullptr; +} + bool Expr::refersToVectorElement() const { // FIXME: Why do we not just look at the ObjectKind here? const Expr *E = this->IgnoreParens(); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7763,6 +7763,10 @@ if (E->getBase()->getType()->isVectorType()) return Error(E); + // Skip matrixes as subscript bases. + if (E->getBase()->getMatrixFromIndexExpr(Info.getLangOpts().MatrixTypes)) + return false; + bool Success = true; if (!evaluatePointer(E->getBase(), Result)) { if (!Info.noteFailure()) diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp --- a/clang/lib/AST/NSAPI.cpp +++ b/clang/lib/AST/NSAPI.cpp @@ -482,6 +482,7 @@ case BuiltinType::Half: case BuiltinType::PseudoObject: case BuiltinType::BuiltinFn: + case BuiltinType::IncompleteMatrixIdx: case BuiltinType::OMPArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3025,6 +3025,8 @@ return "queue_t"; case OCLReserveID: return "reserve_id_t"; + case IncompleteMatrixIdx: + return ""; case OMPArraySection: return ""; case OMPArrayShaping: @@ -4045,6 +4047,7 @@ #include "clang/Basic/AArch64SVEACLETypes.def" case BuiltinType::BuiltinFn: case BuiltinType::NullPtr: + case BuiltinType::IncompleteMatrixIdx: case BuiltinType::OMPArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -403,6 +403,7 @@ case BuiltinType::Id: #include "clang/Basic/AArch64SVEACLETypes.def" case BuiltinType::BuiltinFn: + case BuiltinType::IncompleteMatrixIdx: case BuiltinType::OMPArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1846,6 +1846,16 @@ return RValue::get(CGF.EmitLoadOfScalar(LV, Loc)); } +static RValue EmitLoadOfMatrixEltLValue(LValue LV, SourceLocation Loc, + CodeGenFunction &CGF) { + assert(LV.getType()->isConstantMatrixType() && + "matrix element LValues need to access a matrix"); + Address Addr = MaybeConvertMatrixAddress(LV.getVectorAddress(), CGF); + llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, LV.isVolatileQualified()); + return RValue::get( + CGF.Builder.CreateExtractElement(Load, LV.getVectorIdx(), "matext")); +} + /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this /// method emits the address of the lvalue, then loads the result as an rvalue, /// returning the rvalue. @@ -1878,6 +1888,9 @@ return RValue::get(EmitLoadOfScalar(LV, Loc)); } + if (LV.isMatrixElt()) + return EmitLoadOfMatrixEltLValue(LV, Loc, *this); + if (LV.isVectorElt()) { llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(), LV.isVolatileQualified()); @@ -1999,6 +2012,19 @@ return RValue::get(Call); } +// Store the specified rvalue into the specified matrix element. +static void EmitStoreThroughMatrixEltLValue(RValue Src, LValue Dst, + CodeGenFunction &CGF) { + Address DstAddr = MaybeConvertMatrixAddress( + Address(Dst.getVectorPointer(), + CGF.getContext().getTypeAlignInChars(Dst.getType())), + CGF); + llvm::Value *Vec = CGF.Builder.CreateLoad(DstAddr); + Vec = CGF.Builder.CreateInsertElement(Vec, Src.getScalarVal(), + Dst.getVectorIdx(), "matins"); + + CGF.Builder.CreateStore(Vec, DstAddr, Dst.isVolatileQualified()); +} /// EmitStoreThroughLValue - Store the specified rvalue into the specified /// lvalue, where both are guaranteed to the have the same type, and that type @@ -2025,6 +2051,9 @@ if (Dst.isGlobalReg()) return EmitStoreThroughGlobalRegLValue(Src, Dst); + if (Dst.isMatrixElt()) + return EmitStoreThroughMatrixEltLValue(Src, Dst, *this); + assert(Dst.isBitField() && "Unknown LValue type"); return EmitStoreThroughBitfieldLValue(Src, Dst); } @@ -3637,6 +3666,28 @@ TBAAAccessInfo()); } + // If the base is a matrix type, we have a matrix index expressions (with row + // and column indices). We are forming a matrix element lvalue with an index + // into the matrix as a flattened vector. + if (auto *MTy = + E->getBase()->getMatrixFromIndexExpr(getLangOpts().MatrixTypes)) { + auto *ColIdxExpr = cast(E->getBase()); + LValue Base = EmitLValue(ColIdxExpr->getBase()); + llvm::Value *RowIdx = EmitScalarExpr(ColIdxExpr->getIdx()); + llvm::Value *ColIdx = EmitScalarExpr(E->getIdx()); + unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(), + ColIdx->getType()->getScalarSizeInBits()); + llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), MaxWidth); + RowIdx = Builder.CreateZExt(RowIdx, IntTy); + ColIdx = Builder.CreateZExt(ColIdx, IntTy); + llvm::Value *NumRows = Builder.getIntN(MaxWidth, MTy->getNumRows()); + llvm::Value *FinalIdx = + Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx); + return LValue::MakeMatrixElt(Base.getAddress(*this), FinalIdx, + ColIdxExpr->getBase()->getType(), + Base.getBaseInfo(), TBAAAccessInfo()); + } + // All the other cases basically behave like simple offsetting. // Handle the extvector case we ignored above. diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h --- a/clang/lib/CodeGen/CGValue.h +++ b/clang/lib/CodeGen/CGValue.h @@ -170,7 +170,8 @@ VectorElt, // This is a vector element l-value (V[i]), use getVector* BitField, // This is a bitfield l-value, use getBitfield*. ExtVectorElt, // This is an extended vector subset, use getExtVectorComp - GlobalReg // This is a register l-value, use getGlobalReg() + GlobalReg, // This is a register l-value, use getGlobalReg() + MatrixElt // This is a matrix element, use getVector* } LVType; llvm::Value *V; @@ -254,6 +255,7 @@ bool isBitField() const { return LVType == BitField; } bool isExtVectorElt() const { return LVType == ExtVectorElt; } bool isGlobalReg() const { return LVType == GlobalReg; } + bool isMatrixElt() const { return LVType == MatrixElt; } bool isVolatileQualified() const { return Quals.hasVolatile(); } bool isRestrictQualified() const { return Quals.hasRestrict(); } @@ -337,8 +339,14 @@ Address getVectorAddress() const { return Address(getVectorPointer(), getAlignment()); } - llvm::Value *getVectorPointer() const { assert(isVectorElt()); return V; } - llvm::Value *getVectorIdx() const { assert(isVectorElt()); return VectorIdx; } + llvm::Value *getVectorPointer() const { + assert(isVectorElt() || isMatrixElt()); + return V; + } + llvm::Value *getVectorIdx() const { + assert(isVectorElt() || isMatrixElt()); + return VectorIdx; + } // extended vector elements. Address getExtVectorAddress() const { @@ -430,6 +438,18 @@ return R; } + static LValue MakeMatrixElt(Address matAddress, llvm::Value *Idx, + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { + LValue R; + R.LVType = MatrixElt; + R.V = matAddress.getPointer(); + R.VectorIdx = Idx; + R.Initialize(type, type.getQualifiers(), matAddress.getAlignment(), + BaseInfo, TBAAInfo); + return R; + } + RValue asAggregateRValue(CodeGenFunction &CGF) const { return RValue::getAggregate(getAddress(CGF), isVolatileQualified()); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4561,7 +4561,9 @@ // resolution for the operator overload should get the first crack // at the overload. bool IsMSPropertySubscript = false; - if (base->getType()->isNonOverloadPlaceholderType()) { + auto BaseTy = base->getType(); + if (BaseTy->isNonOverloadPlaceholderType() && + !BaseTy->isSpecificPlaceholderType(BuiltinType::IncompleteMatrixIdx)) { IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); if (!IsMSPropertySubscript) { ExprResult result = CheckPlaceholderExpr(base); @@ -5195,7 +5197,8 @@ } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->getAs() && + !LHSExp->getMatrixFromIndexExpr(getLangOpts().MatrixTypes)) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); @@ -5286,6 +5289,41 @@ BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = LHSTy->getAs()->getPointeeType(); + } else if (auto *MTy = + LHSExp->getMatrixFromIndexExpr(getLangOpts().MatrixTypes)) { + BaseExpr = LHSExp; + IndexExpr = RHSExp; + + // Validate index. + bool IsRowIdx = BaseExpr->getType()->isConstantMatrixType(); + if (!IndexExpr->getType()->isIntegerType() && + !IndexExpr->isTypeDependent()) { + Diag(IndexExpr->getBeginLoc(), diag::err_matrix_index_not_integer) + << (IsRowIdx ? 0 : 1); + return ExprError(); + } + + unsigned Dim = IsRowIdx ? MTy->getNumRows() : MTy->getNumColumns(); + llvm::APSInt Idx; + if (IndexExpr->isIntegerConstantExpr(Idx, Context) && + (Idx < 0 || Idx >= Dim)) { + Diag(IndexExpr->getBeginLoc(), diag::err_matrix_index_outside_range) + << (IsRowIdx ? 0 : 1) << Dim; + return ExprError(); + } + + // Set the type of the outer ArraySubscriptExpr to the element type and for + // the inner expression to the matrix type. That ensures an error when only + // using a single ArraySubscriptExpr on a matrix. + if (IsRowIdx) + ResultType = Context.IncompleteMatrixIdxTy; + else { + ResultType = MTy->getElementType(); + BaseExpr->setType(ResultType); + } + + VK = VK_LValue; + OK = OK_VectorComponent; } else if (RHSTy->isArrayType()) { // Same as previous, except for 123[f().a] case Diag(RHSExp->getBeginLoc(), diag::ext_subscript_non_lvalue) @@ -5935,6 +5973,7 @@ // These are always invalid as call arguments and should be reported. case BuiltinType::BoundMember: case BuiltinType::BuiltinFn: + case BuiltinType::IncompleteMatrixIdx: case BuiltinType::OMPArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: @@ -18864,6 +18903,11 @@ return ExprError(); } + case BuiltinType::IncompleteMatrixIdx: + Diag(cast(E)->getIdx()->getBeginLoc(), + diag::err_matrix_incomplete_index); + return ExprError(); + // Expressions of unknown type. case BuiltinType::OMPArraySection: Diag(E->getBeginLoc(), diag::err_omp_array_section_use); diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -240,6 +240,9 @@ case BuiltinType::BuiltinFn: ID = PREDEF_TYPE_BUILTIN_FN; break; + case BuiltinType::IncompleteMatrixIdx: + ID = PREDEF_TYPE_INCOMPLETE_MATRIX_IDX; + break; case BuiltinType::OMPArraySection: ID = PREDEF_TYPE_OMP_ARRAY_SECTION; break; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -7007,6 +7007,9 @@ case PREDEF_TYPE_BUILTIN_FN: T = Context.BuiltinFnTy; break; + case PREDEF_TYPE_INCOMPLETE_MATRIX_IDX: + T = Context.IncompleteMatrixIdxTy; + break; case PREDEF_TYPE_OMP_ARRAY_SECTION: T = Context.OMPArraySectionTy; break; diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/matrix-type-operators.c @@ -0,0 +1,304 @@ +// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// Tests for the matrix type operators. + +typedef double dx5x5_t __attribute__((matrix_type(5, 5))); +typedef float fx2x3_t __attribute__((matrix_type(2, 3))); + +// Check that we can use matrix index expression on different floating point +// matrixes and indices. +void insert_fp(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) { + // CHECK-LABEL: define void @insert_fp(<25 x double> %a, double %d, <6 x float> %b, float %e, i32 %j, i32 %k) + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %d.addr = alloca double, align 8 + // CHECK-NEXT: %b.addr = alloca [6 x float], align 4 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: %j.addr = alloca i32, align 4 + // CHECK-NEXT: %k.addr = alloca i32, align 4 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: store double %d, double* %d.addr, align 8 + // CHECK-NEXT: %1 = bitcast [6 x float]* %b.addr to <6 x float>* + // CHECK-NEXT: store <6 x float> %b, <6 x float>* %1, align 4 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: store i32 %j, i32* %j.addr, align 4 + // CHECK-NEXT: store i32 %k, i32* %k.addr, align 4 + // CHECK-NEXT: %2 = load double, double* %d.addr, align 8 + // CHECK-NEXT: %3 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %matins = insertelement <25 x double> %3, double %2, i64 5 + // CHECK-NEXT: store <25 x double> %matins, <25 x double>* %0, align 8 + a[0ll][1u] = d; + + // CHECK-NEXT: %4 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %5 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %matins1 = insertelement <6 x float> %5, float %4, i32 1 + // CHECK-NEXT: store <6 x float> %matins1, <6 x float>* %1, align 4 + b[1][0] = e; + + // CHECK-NEXT: %6 = load double, double* %d.addr, align 8 + // CHECK-NEXT: %7 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %matins2 = insertelement <25 x double> %7, double %6, i32 1 + // CHECK-NEXT: store <25 x double> %matins2, <25 x double>* %0, align 8 + a[1][0u] = d; + + // CHECK-NEXT: %8 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %9 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %matins3 = insertelement <6 x float> %9, float %8, i64 3 + // CHECK-NEXT: store <6 x float> %matins3, <6 x float>* %1, align 4 + b[1ull][1] = e; + + // CHECK-NEXT: %10 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %11 = load i32, i32* %j.addr, align 4 + // CHECK-NEXT: %12 = load i32, i32* %k.addr, align 4 + // CHECK-NEXT: %13 = mul i32 %12, 2 + // CHECK-NEXT: %14 = add i32 %13, %11 + // CHECK-NEXT: %15 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %matins4 = insertelement <6 x float> %15, float %10, i32 %14 + // CHECK-NEXT: store <6 x float> %matins4, <6 x float>* %1, align 4 + // CHECK-NEXT: ret void + b[j][k] = e; +} + +// Check that we can can use matrix index expressions on integer matrixes. +typedef int ix9x3_t __attribute__((matrix_type(9, 3))); +void insert_int(ix9x3_t a, int i) { + // CHECK-LABEL: define void @insert_int(<27 x i32> %a, i32 %i) + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %i.addr = alloca i32, align 4 + // CHECK-NEXT: %0 = bitcast [27 x i32]* %a.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %a, <27 x i32>* %0, align 4 + // CHECK-NEXT: store i32 %i, i32* %i.addr, align 4 + // CHECK-NEXT: %1 = load i32, i32* %i.addr, align 4 + // CHECK-NEXT: %2 = load <27 x i32>, <27 x i32>* %0, align 4 + // CHECK-NEXT: %matins = insertelement <27 x i32> %2, i32 %1, i32 13 + // CHECK-NEXT: store <27 x i32> %matins, <27 x i32>* %0, align 4 + // CHECK-NEXT: ret void + + a[4u][1u] = i; +} + +// Check that we can can use matrix index expressions on FP and integer +// matrixes. +typedef int ix9x3_t __attribute__((matrix_type(9, 3))); +void insert_int_fp(ix9x3_t *a, int i, fx2x3_t b, float e, short j, unsigned long long k) { + // CHECK-LABEL: define void @insert_int_fp([27 x i32]* %a, i32 %i, <6 x float> %b, float %e, i16 signext %j, i64 %k) + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [27 x i32]*, align 8 + // CHECK-NEXT: %i.addr = alloca i32, align 4 + // CHECK-NEXT: %b.addr = alloca [6 x float], align 4 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: %j.addr = alloca i16, align 2 + // CHECK-NEXT: %k.addr = alloca i64, align 8 + // CHECK-NEXT: store [27 x i32]* %a, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: store i32 %i, i32* %i.addr, align 4 + // CHECK-NEXT: %0 = bitcast [6 x float]* %b.addr to <6 x float>* + // CHECK-NEXT: store <6 x float> %b, <6 x float>* %0, align 4 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: store i16 %j, i16* %j.addr, align 2 + // CHECK-NEXT: store i64 %k, i64* %k.addr, align 8 + // CHECK-NEXT: %1 = load i32, i32* %i.addr, align 4 + // CHECK-NEXT: %2 = load [27 x i32]*, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: %3 = bitcast [27 x i32]* %2 to <27 x i32>* + // CHECK-NEXT: %4 = load <27 x i32>, <27 x i32>* %3, align 4 + // CHECK-NEXT: %matins = insertelement <27 x i32> %4, i32 %1, i32 13 + // CHECK-NEXT: store <27 x i32> %matins, <27 x i32>* %3, align 4 + (*a)[4u][1u] = i; + + // CHECK-NEXT: %5 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %6 = load <6 x float>, <6 x float>* %0, align 4 + // CHECK-NEXT: %matins1 = insertelement <6 x float> %6, float %5, i32 3 + // CHECK-NEXT: store <6 x float> %matins1, <6 x float>* %0, align 4 + b[1u][1u] = e; + + // CHECK-NEXT: %7 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %8 = load i16, i16* %j.addr, align 2 + // CHECK-NEXT: %9 = load i64, i64* %k.addr, align 8 + // CHECK-NEXT: %10 = zext i16 %8 to i64 + // CHECK-NEXT: %11 = mul i64 %9, 2 + // CHECK-NEXT: %12 = add i64 %11, %10 + // CHECK-NEXT: %13 = load <6 x float>, <6 x float>* %0, align 4 + // CHECK-NEXT: %matins2 = insertelement <6 x float> %13, float %7, i64 %12 + // CHECK-NEXT: store <6 x float> %matins2, <6 x float>* %0, align 4 + // CHECK-NEXT: ret void + b[j][k] = e; +} + +// Check that we can use overloaded matrix index expressions on matrixes with +// matching dimensions, but different element types. +typedef double dx3x3_t __attribute__((matrix_type(3, 3))); +typedef float fx3x3_t __attribute__((matrix_type(3, 3))); +void insert_matching_dimensions(dx3x3_t a, double i, fx3x3_t b, float e, long int j, char k) { + // CHECK-LABEL: define void @insert_matching_dimensions(<9 x double> %a, double %i, <9 x float> %b, float %e, i64 %j, i8 signext %k) #3 { + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [9 x double], align 8 + // CHECK-NEXT: %i.addr = alloca double, align 8 + // CHECK-NEXT: %b.addr = alloca [9 x float], align 4 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: %j.addr = alloca i64, align 8 + // CHECK-NEXT: %k.addr = alloca i8, align 1 + // CHECK-NEXT: %0 = bitcast [9 x double]* %a.addr to <9 x double>* + // CHECK-NEXT: store <9 x double> %a, <9 x double>* %0, align 8 + // CHECK-NEXT: store double %i, double* %i.addr, align 8 + // CHECK-NEXT: %1 = bitcast [9 x float]* %b.addr to <9 x float>* + // CHECK-NEXT: store <9 x float> %b, <9 x float>* %1, align 4 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: store i64 %j, i64* %j.addr, align 8 + // CHECK-NEXT: store i8 %k, i8* %k.addr, align 1 + // CHECK-NEXT: %2 = load double, double* %i.addr, align 8 + // CHECK-NEXT: %3 = load <9 x double>, <9 x double>* %0, align 8 + // CHECK-NEXT: %matins = insertelement <9 x double> %3, double %2, i32 5 + // CHECK-NEXT: store <9 x double> %matins, <9 x double>* %0, align 8 + a[2u][1u] = i; + + // CHECK-NEXT: %4 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %5 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matins1 = insertelement <9 x float> %5, float %4, i32 7 + // CHECK-NEXT: store <9 x float> %matins1, <9 x float>* %1, align 4 + b[1u][2u] = e; + + // CHECK-NEXT: %6 = load double, double* %i.addr, align 8 + // CHECK-NEXT: %conv = fptrunc double %6 to float + // CHECK-NEXT: %7 = load i64, i64* %j.addr, align 8 + // CHECK-NEXT: %8 = load i8, i8* %k.addr, align 1 + // CHECK-NEXT: %9 = zext i8 %8 to i64 + // CHECK-NEXT: %10 = mul i64 %9, 3 + // CHECK-NEXT: %11 = add i64 %10, %7 + // CHECK-NEXT: %12 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matins2 = insertelement <9 x float> %12, float %conv, i64 %11 + // CHECK-NEXT: store <9 x float> %matins2, <9 x float>* %1, align 4 + // CHECK-NEXT: ret void + b[j][k] = i; +} + +void extract1(dx5x5_t a, fx3x3_t b, ix9x3_t c, unsigned long j) { + // CHECK-LABEL: @extract1( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [9 x float], align 4 + // CHECK-NEXT: %c.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %j.addr = alloca i64, align 8 + // CHECK-NEXT: %v1 = alloca double, align 8 + // CHECK-NEXT: %v2 = alloca float, align 4 + // CHECK-NEXT: %v3 = alloca i32, align 4 + // CHECK-NEXT: %v4 = alloca i32, align 4 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [9 x float]* %b.addr to <9 x float>* + // CHECK-NEXT: store <9 x float> %b, <9 x float>* %1, align 4 + double v1 = a[2][3]; + + // CHECK-NEXT: %2 = bitcast [27 x i32]* %c.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %c, <27 x i32>* %2, align 4 + // CHECK-NEXT: store i64 %j, i64* %j.addr, align 8 + // CHECK-NEXT: %3 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %matext = extractelement <25 x double> %3, i32 17 + // CHECK-NEXT: store double %matext, double* %v1, align 8 + float v2 = b[2][1]; + + // CHECK-NEXT: %4 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matext1 = extractelement <9 x float> %4, i32 5 + // CHECK-NEXT: store float %matext1, float* %v2, align 4 + // CHECK-NEXT: %5 = load <27 x i32>, <27 x i32>* %2, align 4 + // CHECK-NEXT: %matext2 = extractelement <27 x i32> %5, i32 10 + // CHECK-NEXT: store i32 %matext2, i32* %v3, align 4 + int v3 = c[1][1]; + + // CHECK-NEXT: %6 = load i64, i64* %j.addr, align 8 + // CHECK-NEXT: %7 = load i64, i64* %j.addr, align 8 + // CHECK-NEXT: %8 = mul i64 %7, 9 + // CHECK-NEXT: %9 = add i64 %8, %6 + // CHECK-NEXT: %10 = load <27 x i32>, <27 x i32>* %2, align 4 + // CHECK-NEXT: %matext3 = extractelement <27 x i32> %10, i64 %9 + // CHECK-NEXT: store i32 %matext3, i32* %v4, align 4 + // CHECK-NEXT: ret void + int v4 = c[j][j]; +} + +typedef double dx3x2_t __attribute__((matrix_type(3, 2))); +double test_extract_matrix_pointer(dx5x5_t *ptr, dx3x2_t **ptr2) { + // CHECK-LABEL: define double @test_extract_matrix_pointer([25 x double]* %ptr, [6 x double]** %ptr2) + // CHECK-NEXT: entry: + // CHECK-NEXT: %ptr.addr = alloca [25 x double]*, align 8 + // CHECK-NEXT: %ptr2.addr = alloca [6 x double]**, align 8 + // CHECK-NEXT: store [25 x double]* %ptr, [25 x double]** %ptr.addr, align 8 + // CHECK-NEXT: store [6 x double]** %ptr2, [6 x double]*** %ptr2.addr, align 8 + // CHECK-NEXT: %0 = load [25 x double]*, [25 x double]** %ptr.addr, align 8 + // CHECK-NEXT: %arrayidx = getelementptr inbounds [25 x double], [25 x double]* %0, i64 0 + // CHECK-NEXT: %1 = bitcast [25 x double]* %arrayidx to <25 x double>* + // CHECK-NEXT: %2 = load <25 x double>, <25 x double>* %1, align 8 + // CHECK-NEXT: %matext = extractelement <25 x double> %2, i32 17 + // CHECK-NEXT: %3 = load [6 x double]**, [6 x double]*** %ptr2.addr, align 8 + // CHECK-NEXT: %arrayidx1 = getelementptr inbounds [6 x double]*, [6 x double]** %3, i64 1 + // CHECK-NEXT: %4 = load [6 x double]*, [6 x double]** %arrayidx1, align 8 + // CHECK-NEXT: %arrayidx2 = getelementptr inbounds [6 x double], [6 x double]* %4, i64 2 + // CHECK-NEXT: %5 = bitcast [6 x double]* %arrayidx2 to <6 x double>* + // CHECK-NEXT: %6 = load <6 x double>, <6 x double>* %5, align 8 + // CHECK-NEXT: %matext3 = extractelement <6 x double> %6, i32 3 + // CHECK-NEXT: %add = fadd double %matext, %matext3 + // CHECK-NEXT: %7 = load [6 x double]**, [6 x double]*** %ptr2.addr, align 8 + // CHECK-NEXT: %add.ptr = getelementptr inbounds [6 x double]*, [6 x double]** %7, i64 4 + // CHECK-NEXT: %8 = load [6 x double]*, [6 x double]** %add.ptr, align 8 + // CHECK-NEXT: %add.ptr4 = getelementptr inbounds [6 x double], [6 x double]* %8, i64 6 + // CHECK-NEXT: %9 = bitcast [6 x double]* %add.ptr4 to <6 x double>* + // CHECK-NEXT: %10 = load <6 x double>, <6 x double>* %9, align 8 + // CHECK-NEXT: %matext5 = extractelement <6 x double> %10, i32 1 + // CHECK-NEXT: %add6 = fadd double %add, %matext5 + // CHECK-NEXT: ret double %add6 + + return (ptr[0])[2][3] + ptr2[1][2][0][1] + (*(*(ptr2 + 4) + 6))[1][0]; +} +void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) { + // CHECK-LABEL: define void @insert_extract(<25 x double> %a, <9 x float> %b, i64 %j, i16 signext %k) + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [9 x float], align 4 + // CHECK-NEXT: %j.addr = alloca i64, align 8 + // CHECK-NEXT: %k.addr = alloca i16, align 2 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [9 x float]* %b.addr to <9 x float>* + // CHECK-NEXT: store <9 x float> %b, <9 x float>* %1, align 4 + // CHECK-NEXT: store i64 %j, i64* %j.addr, align 8 + // CHECK-NEXT: store i16 %k, i16* %k.addr, align 2 + // CHECK-NEXT: %2 = load i64, i64* %j.addr, align 8 + // CHECK-NEXT: %3 = load i16, i16* %k.addr, align 2 + // CHECK-NEXT: %4 = zext i16 %3 to i64 + // CHECK-NEXT: %5 = mul i64 %4, 3 + // CHECK-NEXT: %6 = add i64 %5, %2 + // CHECK-NEXT: %7 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matext = extractelement <9 x float> %7, i64 %6 + // CHECK-NEXT: %conv = fpext float %matext to double + // CHECK-NEXT: %8 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %matins = insertelement <25 x double> %8, double %conv, i32 17 + // CHECK-NEXT: store <25 x double> %matins, <25 x double>* %0, align 8 + a[2][3] = b[j][k]; + + // CHECK-NEXT: %9 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matext1 = extractelement <9 x float> %9, i32 3 + // CHECK-NEXT: %10 = load i16, i16* %k.addr, align 2 + // CHECK-NEXT: %11 = load i64, i64* %j.addr, align 8 + // CHECK-NEXT: %12 = zext i16 %10 to i64 + // CHECK-NEXT: %13 = mul i64 %11, 3 + // CHECK-NEXT: %14 = add i64 %13, %12 + // CHECK-NEXT: %15 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matins2 = insertelement <9 x float> %15, float %matext1, i64 %14 + // CHECK-NEXT: store <9 x float> %matins2, <9 x float>* %1, align 4 + b[k][j] = b[0][1]; + + // CHECK-NEXT: %16 = load i16, i16* %k.addr, align 2 + // CHECK-NEXT: %17 = zext i16 %16 to i32 + // CHECK-NEXT: %18 = mul i32 %17, 3 + // CHECK-NEXT: %19 = add i32 %18, 0 + // CHECK-NEXT: %20 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matext3 = extractelement <9 x float> %20, i32 %19 + // CHECK-NEXT: %21 = load i64, i64* %j.addr, align 8 + // CHECK-NEXT: %22 = mul i64 %21, 3 + // CHECK-NEXT: %23 = add i64 %22, 2 + // CHECK-NEXT: %24 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matins4 = insertelement <9 x float> %24, float %matext3, i64 %23 + // CHECK-NEXT: store <9 x float> %matins4, <9 x float>* %1, align 4 + // CHECK-NEXT: ret void + b[2][j] = b[0][k]; +} diff --git a/clang/test/CodeGenCXX/matrix-type-operators.cpp b/clang/test/CodeGenCXX/matrix-type-operators.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/matrix-type-operators.cpp @@ -0,0 +1,211 @@ +// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s + +typedef double dx5x5_t __attribute__((matrix_type(5, 5))); +using fx2x3_t = float __attribute__((matrix_type(2, 3))); + +void insert_fp(dx5x5_t *a, double d, fx2x3_t *b, float e) { + (*a)[0u][1u] = d; + (*b)[1u][0u] = e; + + // CHECK-LABEL: @_Z9insert_fpPU11matrix_typeLm5ELm5EddPU11matrix_typeLm2ELm3Eff( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double]*, align 8 + // CHECK-NEXT: %d.addr = alloca double, align 8 + // CHECK-NEXT: %b.addr = alloca [6 x float]*, align 8 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: store [25 x double]* %a, [25 x double]** %a.addr, align 8 + // CHECK-NEXT: store double %d, double* %d.addr, align 8 + // CHECK-NEXT: store [6 x float]* %b, [6 x float]** %b.addr, align 8 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: %0 = load double, double* %d.addr, align 8 + // CHECK-NEXT: %1 = load [25 x double]*, [25 x double]** %a.addr, align 8 + // CHECK-NEXT: %2 = bitcast [25 x double]* %1 to <25 x double>* + // CHECK-NEXT: %3 = load <25 x double>, <25 x double>* %2, align 8 + // CHECK-NEXT: %matins = insertelement <25 x double> %3, double %0, i32 5 + // CHECK-NEXT: store <25 x double> %matins, <25 x double>* %2, align 8 + // CHECK-NEXT: %4 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %5 = load [6 x float]*, [6 x float]** %b.addr, align 8 + // CHECK-NEXT: %6 = bitcast [6 x float]* %5 to <6 x float>* + // CHECK-NEXT: %7 = load <6 x float>, <6 x float>* %6, align 4 + // CHECK-NEXT: %matins1 = insertelement <6 x float> %7, float %4, i32 1 + // CHECK-NEXT: store <6 x float> %matins1, <6 x float>* %6, align 4 + // CHECK-NEXT: ret void +} + +typedef int ix9x3_t __attribute__((matrix_type(9, 3))); + +void insert_int(ix9x3_t *a, int i) { + (*a)[4u][1u] = i; + + // CHECK-LABEL: @_Z10insert_intPU11matrix_typeLm9ELm3Eii( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [27 x i32]*, align 8 + // CHECK-NEXT: %i.addr = alloca i32, align 4 + // CHECK-NEXT: store [27 x i32]* %a, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: store i32 %i, i32* %i.addr, align 4 + // CHECK-NEXT: %0 = load i32, i32* %i.addr, align 4 + // CHECK-NEXT: %1 = load [27 x i32]*, [27 x i32]** %a.addr, align 8 + // CHECK-NEXT: %2 = bitcast [27 x i32]* %1 to <27 x i32>* + // CHECK-NEXT: %3 = load <27 x i32>, <27 x i32>* %2, align 4 + // CHECK-NEXT: %matins = insertelement <27 x i32> %3, i32 %0, i32 13 + // CHECK-NEXT: store <27 x i32> %matins, <27 x i32>* %2, align 4 + // CHECK-NEXT: ret void +} + +template +struct MyMatrix { + using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns))); + + matrix_t value; +}; + +template +void insert(MyMatrix &Mat, EltTy e) { + Mat.value[1u][0u] = e; +} + +void test_template(unsigned *Ptr1, unsigned E1, float *Ptr2, float E2) { + + // CHECK-LABEL: define void @_Z13test_templatePjjPff(i32* %Ptr1, i32 %E1, float* %Ptr2, float %E2) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Ptr1.addr = alloca i32*, align 8 + // CHECK-NEXT: %E1.addr = alloca i32, align 4 + // CHECK-NEXT: %Ptr2.addr = alloca float*, align 8 + // CHECK-NEXT: %E2.addr = alloca float, align 4 + // CHECK-NEXT: %Mat1 = alloca %struct.MyMatrix, align 4 + // CHECK-NEXT: %Mat2 = alloca %struct.MyMatrix.0, align 4 + // CHECK-NEXT: store i32* %Ptr1, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: store i32 %E1, i32* %E1.addr, align 4 + // CHECK-NEXT: store float* %Ptr2, float** %Ptr2.addr, align 8 + // CHECK-NEXT: store float %E2, float* %E2.addr, align 4 + // CHECK-NEXT: %0 = load i32*, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: %1 = bitcast i32* %0 to [4 x i32]* + // CHECK-NEXT: %2 = bitcast [4 x i32]* %1 to <4 x i32>* + // CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 4 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %Mat1, i32 0, i32 0 + // CHECK-NEXT: %4 = bitcast [4 x i32]* %value to <4 x i32>* + // CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %4, align 4 + // CHECK-NEXT: %5 = load i32, i32* %E1.addr, align 4 + // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix* dereferenceable(16) %Mat1, i32 %5) + // CHECK-NEXT: %6 = load float*, float** %Ptr2.addr, align 8 + // CHECK-NEXT: %7 = bitcast float* %6 to [24 x float]* + // CHECK-NEXT: %8 = bitcast [24 x float]* %7 to <24 x float>* + // CHECK-NEXT: %9 = load <24 x float>, <24 x float>* %8, align 4 + // CHECK-NEXT: %value1 = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %Mat2, i32 0, i32 0 + // CHECK-NEXT: %10 = bitcast [24 x float]* %value1 to <24 x float>* + // CHECK-NEXT: store <24 x float> %9, <24 x float>* %10, align 4 + // CHECK-NEXT: %11 = load float, float* %E2.addr, align 4 + // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix.0* dereferenceable(96) %Mat2, float %11) + // CHECK-NEXT: ret void + + // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix* dereferenceable(16) %Mat, i32 %e) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Mat.addr = alloca %struct.MyMatrix*, align 8 + // CHECK-NEXT: %e.addr = alloca i32, align 4 + // CHECK-NEXT: store %struct.MyMatrix* %Mat, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: store i32 %e, i32* %e.addr, align 4 + // CHECK-NEXT: %0 = load i32, i32* %e.addr, align 4 + // CHECK-NEXT: %1 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %1, i32 0, i32 0 + // CHECK-NEXT: %2 = bitcast [4 x i32]* %value to <4 x i32>* + // CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 4 + // CHECK-NEXT: %matins = insertelement <4 x i32> %3, i32 %0, i32 1 + // CHECK-NEXT: store <4 x i32> %matins, <4 x i32>* %2, align 4 + // CHECK-NEXT: ret void + + // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_(%struct.MyMatrix.0* dereferenceable(96) %Mat, float %e) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Mat.addr = alloca %struct.MyMatrix.0*, align 8 + // CHECK-NEXT: %e.addr = alloca float, align 4 + // CHECK-NEXT: store %struct.MyMatrix.0* %Mat, %struct.MyMatrix.0** %Mat.addr, align 8 + // CHECK-NEXT: store float %e, float* %e.addr, align 4 + // CHECK-NEXT: %0 = load float, float* %e.addr, align 4 + // CHECK-NEXT: %1 = load %struct.MyMatrix.0*, %struct.MyMatrix.0** %Mat.addr, align 8 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.0, %struct.MyMatrix.0* %1, i32 0, i32 0 + // CHECK-NEXT: %2 = bitcast [24 x float]* %value to <24 x float>* + // CHECK-NEXT: %3 = load <24 x float>, <24 x float>* %2, align 4 + // CHECK-NEXT: %matins = insertelement <24 x float> %3, float %0, i32 1 + // CHECK-NEXT: store <24 x float> %matins, <24 x float>* %2, align 4 + // CHECK-NEXT: ret void + + MyMatrix Mat1; + Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1); + insert(Mat1, E1); + + MyMatrix Mat2; + Mat2.value = *((decltype(Mat2)::matrix_t *)Ptr2); + insert(Mat2, E2); +} + +typedef float fx3x3_t __attribute__((matrix_type(3, 3))); +void extract1(dx5x5_t a, fx3x3_t b, ix9x3_t c) { + // CHECK-LABEL: @_Z8extract1U11matrix_typeLm5ELm5EdU11matrix_typeLm3ELm3EfU11matrix_typeLm9ELm3Ei( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [9 x float], align 4 + // CHECK-NEXT: %c.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %v1 = alloca double, align 8 + // CHECK-NEXT: %v2 = alloca float, align 4 + // CHECK-NEXT: %v3 = alloca i32, align 4 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [9 x float]* %b.addr to <9 x float>* + // CHECK-NEXT: store <9 x float> %b, <9 x float>* %1, align 4 + // CHECK-NEXT: %2 = bitcast [27 x i32]* %c.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %c, <27 x i32>* %2, align 4 + // CHECK-NEXT: %3 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %matext = extractelement <25 x double> %3, i32 17 + // CHECK-NEXT: store double %matext, double* %v1, align 8 + // CHECK-NEXT: %4 = load <9 x float>, <9 x float>* %1, align 4 + // CHECK-NEXT: %matext1 = extractelement <9 x float> %4, i32 5 + // CHECK-NEXT: store float %matext1, float* %v2, align 4 + // CHECK-NEXT: %5 = load <27 x i32>, <27 x i32>* %2, align 4 + // CHECK-NEXT: %matext2 = extractelement <27 x i32> %5, i32 10 + // CHECK-NEXT: store i32 %matext2, i32* %v3, align 4 + // CHECK-NEXT: ret void + + double v1 = a[2][3]; + float v2 = b[2][1]; + int v3 = c[1][1]; +} + +template +EltTy extract(MyMatrix &Mat) { + return Mat.value[1u][0u]; +} + +void test_extract_template(unsigned *Ptr1, float *Ptr2) { + // CHECK-LABEL: define void @_Z21test_extract_templatePjPf(i32* %Ptr1, float* %Ptr2) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Ptr1.addr = alloca i32*, align 8 + // CHECK-NEXT: %Ptr2.addr = alloca float*, align 8 + // CHECK-NEXT: %Mat1 = alloca %struct.MyMatrix, align 4 + // CHECK-NEXT: %v1 = alloca i32, align 4 + // CHECK-NEXT: store i32* %Ptr1, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: store float* %Ptr2, float** %Ptr2.addr, align 8 + // CHECK-NEXT: %0 = load i32*, i32** %Ptr1.addr, align 8 + // CHECK-NEXT: %1 = bitcast i32* %0 to [4 x i32]* + // CHECK-NEXT: %2 = bitcast [4 x i32]* %1 to <4 x i32>* + // CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 4 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %Mat1, i32 0, i32 0 + // CHECK-NEXT: %4 = bitcast [4 x i32]* %value to <4 x i32>* + // CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %4, align 4 + // CHECK-NEXT: %call = call i32 @_Z7extractIjLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix* dereferenceable(16) %Mat1) + // CHECK-NEXT: store i32 %call, i32* %v1, align 4 + // CHECK-NEXT: ret void + + // CHECK-LABEL: define linkonce_odr i32 @_Z7extractIjLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix* dereferenceable(16) %Mat) + // CHECK-NEXT: entry: + // CHECK-NEXT: %Mat.addr = alloca %struct.MyMatrix*, align 8 + // CHECK-NEXT: store %struct.MyMatrix* %Mat, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: %0 = load %struct.MyMatrix*, %struct.MyMatrix** %Mat.addr, align 8 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix, %struct.MyMatrix* %0, i32 0, i32 0 + // CHECK-NEXT: %1 = bitcast [4 x i32]* %value to <4 x i32>* + // CHECK-NEXT: %2 = load <4 x i32>, <4 x i32>* %1, align 4 + // CHECK-NEXT: %matext = extractelement <4 x i32> %2, i32 1 + // CHECK-NEXT: ret i32 %matext + + MyMatrix Mat1; + Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1); + unsigned v1 = extract(Mat1); +} diff --git a/clang/test/Sema/matrix-type-operators.c b/clang/test/Sema/matrix-type-operators.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/matrix-type-operators.c @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 %s -fenable-matrix -pedantic -verify -triple=x86_64-apple-darwin9 + +typedef float sx5x10_t __attribute__((matrix_type(5, 10))); + +void insert(sx5x10_t a, float f) { + // Non integer indexes. + a[3][f] = 0; + // expected-error@-1 {{matrix column index is not an integer}} + a[f][9] = 0; + // expected-error@-1 {{matrix row index is not an integer}} + a[f][f] = 0; + // expected-error@-1 {{matrix row index is not an integer}} + a[0][f] = 0; + // expected-error@-1 {{matrix column index is not an integer}} + + // Invalid element type. + a[3][4] = &f; + // expected-error@-1 {{assigning to 'float' from incompatible type 'float *'; remove &}} + + // Indexes outside allowed dimensions. + a[-1][3] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + a[3][-1] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + a[3][-1u] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + a[-1u][3] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + a[5][2] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + a[4][10] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + a[5][10.0] = f; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + + a[3] = 5.0; + // expected-error@-1 {{single subscript expressions are not allowed for matrix values}} +} + +void extract(sx5x10_t a, float f) { + // Non integer indexes. + float v1 = a[3][f]; + // expected-error@-1 {{matrix column index is not an integer}} + float v2 = a[f][9]; + // expected-error@-1 {{matrix row index is not an integer}} + float v3 = a[f][f]; + // expected-error@-1 {{matrix row index is not an integer}} + + // Invalid element type. + char *v4 = a[3][4]; + // expected-error@-1 {{initializing 'char *' with an expression of incompatible type 'float'}} + + // Indexes outside allowed dimensions. + float v5 = a[-1][3]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + float v6 = a[3][-1]; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + float v8 = a[-1u][3]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + float v9 = a[5][2]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + float v10 = a[4][10]; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + float v11 = a[5][10.0]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + + float v12 = a[3]; + // expected-error@-1 {{single subscript expressions are not allowed for matrix values}} +} diff --git a/clang/test/SemaCXX/matrix-type-operators.cpp b/clang/test/SemaCXX/matrix-type-operators.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/matrix-type-operators.cpp @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 %s -fenable-matrix -pedantic -std=c++11 -verify -triple=x86_64-apple-darwin9 + +typedef float sx5x10_t __attribute__((matrix_type(5, 10))); + +void insert(sx5x10_t a, float f) { + // Non integer indexes. + a[3][f] = 0; + // expected-error@-1 {{matrix column index is not an integer}} + a[f][9] = 0; + // expected-error@-1 {{matrix row index is not an integer}} + a[f][f] = 0; + // expected-error@-1 {{matrix row index is not an integer}} + a[0][f] = 0; + // expected-error@-1 {{matrix column index is not an integer}} + + // Invalid element type. + a[3][4] = &f; + // expected-error@-1 {{assigning to 'float' from incompatible type 'float *'; remove &}} + + // Indexes outside allowed dimensions. + a[-1][3] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + a[3][-1] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + a[3][-1u] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + a[-1u][3] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + a[5][2] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + a[4][10] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + a[5][10.0] = f; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} +} + +void extract(sx5x10_t a, float f) { + // Non integer indexes. + float v1 = a[3][f]; + // expected-error@-1 {{matrix column index is not an integer}} + float v2 = a[f][9]; + // expected-error@-1 {{matrix row index is not an integer}} + float v3 = a[f][f]; + // expected-error@-1 {{matrix row index is not an integer}} + + // Invalid element type. + char *v4 = a[3][4]; + // expected-error@-1 {{cannot initialize a variable of type 'char *' with an lvalue of type 'float'}} + + // Indexes outside allowed dimensions. + float v5 = a[-1][3]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + float v6 = a[3][-1]; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + float v8 = a[-1u][3]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + float v9 = a[5][2]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} + float v10 = a[4][10]; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 10)}} + float v11 = a[5][10.0]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 5)}} +} + +void incomplete_matrix_index_expr(sx5x10_t a, float f) { + float x = a[3]; + // expected-error@-1 {{single subscript expressions are not allowed for matrix values}} + a[2] = f; + // expected-error@-1 {{single subscript expressions are not allowed for matrix values}} +}