diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11139,6 +11139,11 @@ QualType CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc); + /// Type checking for matrix binary operators. + QualType CheckMatrixElementwiseOperands(ExprResult &LHS, ExprResult &RHS, + SourceLocation Loc, + bool IsCompAssign); + bool areLaxCompatibleVectorTypes(QualType srcType, QualType destType); bool isLaxVectorConversion(QualType srcType, QualType destType); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/Module.h" #include @@ -3469,6 +3470,11 @@ } } + if (op.Ty->isMatrixType()) { + llvm::MatrixBuilder MB(Builder); + return MB.CreateAdd(op.LHS, op.RHS); + } + if (op.Ty->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && !CanElideOverflowCheck(CGF.getContext(), op)) @@ -3614,6 +3620,11 @@ } } + if (op.Ty->isMatrixType()) { + llvm::MatrixBuilder MB(Builder); + return MB.CreateSub(op.LHS, op.RHS); + } + if (op.Ty->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && !CanElideOverflowCheck(CGF.getContext(), op)) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9960,6 +9960,11 @@ return compType; } + if (LHS.get()->getType()->isMatrixType() || + RHS.get()->getType()->isMatrixType()) { + return CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy); + } + QualType compType = UsualArithmeticConversions( LHS, RHS, Loc, CompLHSTy ? ACK_CompAssign : ACK_Arithmetic); if (LHS.isInvalid() || RHS.isInvalid()) @@ -10055,6 +10060,11 @@ return compType; } + if (LHS.get()->getType()->isMatrixType() || + RHS.get()->getType()->isMatrixType()) { + return CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy); + } + QualType compType = UsualArithmeticConversions( LHS, RHS, Loc, CompLHSTy ? ACK_CompAssign : ACK_Arithmetic); if (LHS.isInvalid() || RHS.isInvalid()) @@ -11646,6 +11656,70 @@ return GetSignedVectorType(LHS.get()->getType()); } +static bool tryConvertScalarToMatrixElementTy(Sema &S, QualType ElementType, + ExprResult *Scalar) { + QualType ScalarTy = Scalar->get()->getType().getUnqualifiedType(); + if (!ScalarTy->isArithmeticType()) + return false; + + CastKind ScalarCast = CK_NoOp; + if (ElementType->isIntegralType(S.Context) && + ScalarTy->isIntegralType(S.Context)) + ScalarCast = CK_IntegralCast; + else if (ElementType->isRealFloatingType() && ScalarTy->isRealFloatingType()) + ScalarCast = CK_FloatingCast; + else if (ElementType->isRealFloatingType() && + ScalarTy->isIntegralType(S.Context)) + ScalarCast = CK_IntegralToFloating; + else + return false; + + *Scalar = S.ImpCastExprToType(Scalar->get(), ElementType, ScalarCast); + + return true; +} + +QualType Sema::CheckMatrixElementwiseOperands(ExprResult &LHS, ExprResult &RHS, + SourceLocation Loc, + bool IsCompAssign) { + if (!IsCompAssign) { + LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); + if (LHS.isInvalid()) + return QualType(); + } + RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); + if (RHS.isInvalid()) + return QualType(); + + // For conversion purposes, we ignore any qualifiers. + // For example, "const float" and "float" are equivalent. + QualType LHSType = LHS.get()->getType().getUnqualifiedType(); + QualType RHSType = RHS.get()->getType().getUnqualifiedType(); + + const MatrixType *LHSMatType = LHSType->getAs(); + const MatrixType *RHSMatType = RHSType->getAs(); + assert((LHSMatType || RHSMatType) && "At least one operand must be a matrix"); + + if (Context.hasSameType(LHSType, RHSType)) + return LHSType; + + if (LHSMatType && !RHSMatType) { + if (tryConvertScalarToMatrixElementTy(*this, LHSMatType->getElementType(), + &RHS)) + return LHSType; + return InvalidOperands(Loc, LHS, RHS); + } + + if (!LHSMatType && RHSMatType) { + if (tryConvertScalarToMatrixElementTy(*this, RHSMatType->getElementType(), + &LHS)) + return RHSType; + return InvalidOperands(Loc, LHS, RHS); + } + + return InvalidOperands(Loc, LHS, RHS); +} + inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c --- a/clang/test/CodeGen/matrix-type-operators.c +++ b/clang/test/CodeGen/matrix-type-operators.c @@ -155,3 +155,311 @@ // CHECK-NEXT: store i32 %matext2, i32* %v3, align 4 // CHECK-NEXT: ret void } + +void add_matrix_matrix(dx5x5_t a, dx5x5_t b, dx5x5_t c, ix9x3_t ai, ix9x3_t bi, ix9x3_t ci) { + a = b + c; + ai = bi + ci; + + // CHECK-LABEL: @add_matrix_matrix( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %c.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %ai.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %bi.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %ci.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [25 x double]* %b.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %b, <25 x double>* %1, align 8 + // CHECK-NEXT: %2 = bitcast [25 x double]* %c.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %c, <25 x double>* %2, align 8 + // CHECK-NEXT: %3 = bitcast [27 x i32]* %ai.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %ai, <27 x i32>* %3, align 4 + // CHECK-NEXT: %4 = bitcast [27 x i32]* %bi.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %bi, <27 x i32>* %4, align 4 + // CHECK-NEXT: %5 = bitcast [27 x i32]* %ci.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %ci, <27 x i32>* %5, align 4 + // CHECK-NEXT: %6 = load <25 x double>, <25 x double>* %1, align 8 + // CHECK-NEXT: %7 = load <25 x double>, <25 x double>* %2, align 8 + // CHECK-NEXT: %8 = fadd <25 x double> %6, %7 + // CHECK-NEXT: store <25 x double> %8, <25 x double>* %0, align 8 + // CHECK-NEXT: %9 = load <27 x i32>, <27 x i32>* %4, align 4 + // CHECK-NEXT: %10 = load <27 x i32>, <27 x i32>* %5, align 4 + // CHECK-NEXT: %11 = add <27 x i32> %9, %10 + // CHECK-NEXT: store <27 x i32> %11, <27 x i32>* %3, align 4 + // CHECK-NEXT: ret void +} + +void add_matrix_scalar_float(dx5x5_t a, fx2x3_t b, float vf, double vd) { + a = a + vf; + a = a + vd; + + // CHECK-LABEL: define void @add_matrix_scalar_float(<25 x double> %a, <6 x float> %b, float %vf, double %vd) + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [6 x float], align 4 + // CHECK-NEXT: %vf.addr = alloca float, align 4 + // CHECK-NEXT: %vd.addr = alloca double, align 8 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [6 x float]* %b.addr to <6 x float>* + // CHECK-NEXT: store <6 x float> %b, <6 x float>* %1, align 4 + // CHECK-NEXT: store float %vf, float* %vf.addr, align 4 + // CHECK-NEXT: store double %vd, double* %vd.addr, align 8 + // CHECK-NEXT: %2 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %3 = load float, float* %vf.addr, align 4 + // CHECK-NEXT: %conv = fpext float %3 to double + // CHECK-NEXT: %scalar.splat.splatinsert = insertelement <25 x double> undef, double %conv, i32 0 + // CHECK-NEXT: %scalar.splat.splat = shufflevector <25 x double> %scalar.splat.splatinsert, <25 x double> undef, <25 x i32> zeroinitializer + // CHECK-NEXT: %4 = fadd <25 x double> %2, %scalar.splat.splat + // CHECK-NEXT: store <25 x double> %4, <25 x double>* %0, align 8 + // CHECK-NEXT: %5 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %6 = load double, double* %vd.addr, align 8 + // CHECK-NEXT: %scalar.splat.splatinsert1 = insertelement <25 x double> undef, double %6, i32 0 + // CHECK-NEXT: %scalar.splat.splat2 = shufflevector <25 x double> %scalar.splat.splatinsert1, <25 x double> undef, <25 x i32> zeroinitializer + // CHECK-NEXT: %7 = fadd <25 x double> %5, %scalar.splat.splat2 + // CHECK-NEXT: store <25 x double> %7, <25 x double>* %0, align 8 + + b = b + vf; + b = b + vd; + + // CHECK-NEXT: %8 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %9 = load float, float* %vf.addr, align 4 + // CHECK-NEXT: %scalar.splat.splatinsert3 = insertelement <6 x float> undef, float %9, i32 0 + // CHECK-NEXT: %scalar.splat.splat4 = shufflevector <6 x float> %scalar.splat.splatinsert3, <6 x float> undef, <6 x i32> zeroinitializer + // CHECK-NEXT: %10 = fadd <6 x float> %8, %scalar.splat.splat4 + // CHECK-NEXT: store <6 x float> %10, <6 x float>* %1, align 4 + // CHECK-NEXT: %11 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %12 = load double, double* %vd.addr, align 8 + // CHECK-NEXT: %conv5 = fptrunc double %12 to float + // CHECK-NEXT: %scalar.splat.splatinsert6 = insertelement <6 x float> undef, float %conv5, i32 0 + // CHECK-NEXT: %scalar.splat.splat7 = shufflevector <6 x float> %scalar.splat.splatinsert6, <6 x float> undef, <6 x i32> zeroinitializer + // CHECK-NEXT: %13 = fadd <6 x float> %11, %scalar.splat.splat7 + // CHECK-NEXT: store <6 x float> %13, <6 x float>* %1, align 4 + // CHECK-NEXT: ret void +} + +typedef int llix9x3_t __attribute__((matrix_type(9, 3))); + +void add_matrix_scalar_ints(ix9x3_t a, llix9x3_t b, short vs, long int vli, unsigned long long int vulli) { + a = a + vs; + a = a + vli; + a = a + vulli; + + // CHECK-LABEL: define void @add_matrix_scalar_ints(<27 x i32> %a, <27 x i32> %b, i16 signext %vs, i64 %vli, i64 %vulli) + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %b.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %vs.addr = alloca i16, align 2 + // CHECK-NEXT: %vli.addr = alloca i64, align 8 + // CHECK-NEXT: %vulli.addr = alloca i64, align 8 + // CHECK-NEXT: %0 = bitcast [27 x i32]* %a.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %a, <27 x i32>* %0, align 4 + // CHECK-NEXT: %1 = bitcast [27 x i32]* %b.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %b, <27 x i32>* %1, align 4 + // CHECK-NEXT: store i16 %vs, i16* %vs.addr, align 2 + // CHECK-NEXT: store i64 %vli, i64* %vli.addr, align 8 + // CHECK-NEXT: store i64 %vulli, i64* %vulli.addr, align 8 + // CHECK-NEXT: %2 = load <27 x i32>, <27 x i32>* %0, align 4 + // CHECK-NEXT: %3 = load i16, i16* %vs.addr, align 2 + // CHECK-NEXT: %conv = sext i16 %3 to i32 + // CHECK-NEXT: %scalar.splat.splatinsert = insertelement <27 x i32> undef, i32 %conv, i32 0 + // CHECK-NEXT: %scalar.splat.splat = shufflevector <27 x i32> %scalar.splat.splatinsert, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %4 = add <27 x i32> %2, %scalar.splat.splat + // CHECK-NEXT: store <27 x i32> %4, <27 x i32>* %0, align 4 + // CHECK-NEXT: %5 = load <27 x i32>, <27 x i32>* %0, align 4 + // CHECK-NEXT: %6 = load i64, i64* %vli.addr, align 8 + // CHECK-NEXT: %conv1 = trunc i64 %6 to i32 + // CHECK-NEXT: %scalar.splat.splatinsert2 = insertelement <27 x i32> undef, i32 %conv1, i32 0 + // CHECK-NEXT: %scalar.splat.splat3 = shufflevector <27 x i32> %scalar.splat.splatinsert2, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %7 = add <27 x i32> %5, %scalar.splat.splat3 + // CHECK-NEXT: store <27 x i32> %7, <27 x i32>* %0, align 4 + // CHECK-NEXT: %8 = load <27 x i32>, <27 x i32>* %0, align 4 + // CHECK-NEXT: %9 = load i64, i64* %vulli.addr, align 8 + // CHECK-NEXT: %conv4 = trunc i64 %9 to i32 + // CHECK-NEXT: %scalar.splat.splatinsert5 = insertelement <27 x i32> undef, i32 %conv4, i32 0 + // CHECK-NEXT: %scalar.splat.splat6 = shufflevector <27 x i32> %scalar.splat.splatinsert5, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %10 = add <27 x i32> %8, %scalar.splat.splat6 + // CHECK-NEXT: store <27 x i32> %10, <27 x i32>* %0, align 4 + // CHECK-NEXT: %11 = load i16, i16* %vs.addr, align 2 + + b = vs + b; + b = vli + b; + b = vulli + b; + + // CHECK-NEXT: %conv7 = sext i16 %11 to i32 + // CHECK-NEXT: %12 = load <27 x i32>, <27 x i32>* %1, align 4 + // CHECK-NEXT: %scalar.splat.splatinsert8 = insertelement <27 x i32> undef, i32 %conv7, i32 0 + // CHECK-NEXT: %scalar.splat.splat9 = shufflevector <27 x i32> %scalar.splat.splatinsert8, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %13 = add <27 x i32> %scalar.splat.splat9, %12 + // CHECK-NEXT: store <27 x i32> %13, <27 x i32>* %1, align 4 + // CHECK-NEXT: %14 = load i64, i64* %vli.addr, align 8 + // CHECK-NEXT: %conv10 = trunc i64 %14 to i32 + // CHECK-NEXT: %15 = load <27 x i32>, <27 x i32>* %1, align 4 + // CHECK-NEXT: %scalar.splat.splatinsert11 = insertelement <27 x i32> undef, i32 %conv10, i32 0 + // CHECK-NEXT: %scalar.splat.splat12 = shufflevector <27 x i32> %scalar.splat.splatinsert11, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %16 = add <27 x i32> %scalar.splat.splat12, %15 + // CHECK-NEXT: store <27 x i32> %16, <27 x i32>* %1, align 4 + // CHECK-NEXT: %17 = load i64, i64* %vulli.addr, align 8 + // CHECK-NEXT: %conv13 = trunc i64 %17 to i32 + // CHECK-NEXT: %18 = load <27 x i32>, <27 x i32>* %1, align 4 + // CHECK-NEXT: %scalar.splat.splatinsert14 = insertelement <27 x i32> undef, i32 %conv13, i32 0 + // CHECK-NEXT: %scalar.splat.splat15 = shufflevector <27 x i32> %scalar.splat.splatinsert14, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %19 = add <27 x i32> %scalar.splat.splat15, %18 + // CHECK-NEXT: store <27 x i32> %19, <27 x i32>* %1, align 4 + // CHECK-NEXT: ret void +} + +void sub_matrix_matrix(dx5x5_t a, dx5x5_t b, dx5x5_t c, ix9x3_t ai, ix9x3_t bi, ix9x3_t ci) { + a = b - c; + ai = bi - ci; + + // CHECK-LABEL: @sub_matrix_matrix( + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %c.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %ai.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %bi.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %ci.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [25 x double]* %b.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %b, <25 x double>* %1, align 8 + // CHECK-NEXT: %2 = bitcast [25 x double]* %c.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %c, <25 x double>* %2, align 8 + // CHECK-NEXT: %3 = bitcast [27 x i32]* %ai.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %ai, <27 x i32>* %3, align 4 + // CHECK-NEXT: %4 = bitcast [27 x i32]* %bi.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %bi, <27 x i32>* %4, align 4 + // CHECK-NEXT: %5 = bitcast [27 x i32]* %ci.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %ci, <27 x i32>* %5, align 4 + // CHECK-NEXT: %6 = load <25 x double>, <25 x double>* %1, align 8 + // CHECK-NEXT: %7 = load <25 x double>, <25 x double>* %2, align 8 + // CHECK-NEXT: %8 = fsub <25 x double> %6, %7 + // CHECK-NEXT: store <25 x double> %8, <25 x double>* %0, align 8 + // CHECK-NEXT: %9 = load <27 x i32>, <27 x i32>* %4, align 4 + // CHECK-NEXT: %10 = load <27 x i32>, <27 x i32>* %5, align 4 + // CHECK-NEXT: %11 = sub <27 x i32> %9, %10 + // CHECK-NEXT: store <27 x i32> %11, <27 x i32>* %3, align 4 + // CHECK-NEXT: ret void +} + +void sub_matrix_scalar_float(dx5x5_t a, fx2x3_t b, float vf, double vd) { + a = a - vf; + a = a - vd; + + // CHECK-LABEL: define void @sub_matrix_scalar_float(<25 x double> %a, <6 x float> %b, float %vf, double %vd) + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [25 x double], align 8 + // CHECK-NEXT: %b.addr = alloca [6 x float], align 4 + // CHECK-NEXT: %vf.addr = alloca float, align 4 + // CHECK-NEXT: %vd.addr = alloca double, align 8 + // CHECK-NEXT: %0 = bitcast [25 x double]* %a.addr to <25 x double>* + // CHECK-NEXT: store <25 x double> %a, <25 x double>* %0, align 8 + // CHECK-NEXT: %1 = bitcast [6 x float]* %b.addr to <6 x float>* + // CHECK-NEXT: store <6 x float> %b, <6 x float>* %1, align 4 + // CHECK-NEXT: store float %vf, float* %vf.addr, align 4 + // CHECK-NEXT: store double %vd, double* %vd.addr, align 8 + // CHECK-NEXT: %2 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %3 = load float, float* %vf.addr, align 4 + // CHECK-NEXT: %conv = fpext float %3 to double + // CHECK-NEXT: %scalar.splat.splatinsert = insertelement <25 x double> undef, double %conv, i32 0 + // CHECK-NEXT: %scalar.splat.splat = shufflevector <25 x double> %scalar.splat.splatinsert, <25 x double> undef, <25 x i32> zeroinitializer + // CHECK-NEXT: %4 = fsub <25 x double> %2, %scalar.splat.splat + // CHECK-NEXT: store <25 x double> %4, <25 x double>* %0, align 8 + // CHECK-NEXT: %5 = load <25 x double>, <25 x double>* %0, align 8 + // CHECK-NEXT: %6 = load double, double* %vd.addr, align 8 + // CHECK-NEXT: %scalar.splat.splatinsert1 = insertelement <25 x double> undef, double %6, i32 0 + // CHECK-NEXT: %scalar.splat.splat2 = shufflevector <25 x double> %scalar.splat.splatinsert1, <25 x double> undef, <25 x i32> zeroinitializer + // CHECK-NEXT: %7 = fsub <25 x double> %5, %scalar.splat.splat2 + // CHECK-NEXT: store <25 x double> %7, <25 x double>* %0, align 8 + + b = b - vf; + b = b - vd; + + // CHECK-NEXT: %8 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %9 = load float, float* %vf.addr, align 4 + // CHECK-NEXT: %scalar.splat.splatinsert3 = insertelement <6 x float> undef, float %9, i32 0 + // CHECK-NEXT: %scalar.splat.splat4 = shufflevector <6 x float> %scalar.splat.splatinsert3, <6 x float> undef, <6 x i32> zeroinitializer + // CHECK-NEXT: %10 = fsub <6 x float> %8, %scalar.splat.splat4 + // CHECK-NEXT: store <6 x float> %10, <6 x float>* %1, align 4 + // CHECK-NEXT: %11 = load <6 x float>, <6 x float>* %1, align 4 + // CHECK-NEXT: %12 = load double, double* %vd.addr, align 8 + // CHECK-NEXT: %conv5 = fptrunc double %12 to float + // CHECK-NEXT: %scalar.splat.splatinsert6 = insertelement <6 x float> undef, float %conv5, i32 0 + // CHECK-NEXT: %scalar.splat.splat7 = shufflevector <6 x float> %scalar.splat.splatinsert6, <6 x float> undef, <6 x i32> zeroinitializer + // CHECK-NEXT: %13 = fsub <6 x float> %11, %scalar.splat.splat7 + // CHECK-NEXT: store <6 x float> %13, <6 x float>* %1, align 4 + // CHECK-NEXT: ret void +} + +void sub_matrix_scalar_ints(ix9x3_t a, llix9x3_t b, short vs, long int vli, unsigned long long int vulli) { + a = a - vs; + a = a - vli; + a = a - vulli; + + // CHECK-LABEL: define void @sub_matrix_scalar_ints(<27 x i32> %a, <27 x i32> %b, i16 signext %vs, i64 %vli, i64 %vulli) + // CHECK-NEXT: entry: + // CHECK-NEXT: %a.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %b.addr = alloca [27 x i32], align 4 + // CHECK-NEXT: %vs.addr = alloca i16, align 2 + // CHECK-NEXT: %vli.addr = alloca i64, align 8 + // CHECK-NEXT: %vulli.addr = alloca i64, align 8 + // CHECK-NEXT: %0 = bitcast [27 x i32]* %a.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %a, <27 x i32>* %0, align 4 + // CHECK-NEXT: %1 = bitcast [27 x i32]* %b.addr to <27 x i32>* + // CHECK-NEXT: store <27 x i32> %b, <27 x i32>* %1, align 4 + // CHECK-NEXT: store i16 %vs, i16* %vs.addr, align 2 + // CHECK-NEXT: store i64 %vli, i64* %vli.addr, align 8 + // CHECK-NEXT: store i64 %vulli, i64* %vulli.addr, align 8 + // CHECK-NEXT: %2 = load <27 x i32>, <27 x i32>* %0, align 4 + // CHECK-NEXT: %3 = load i16, i16* %vs.addr, align 2 + // CHECK-NEXT: %conv = sext i16 %3 to i32 + // CHECK-NEXT: %scalar.splat.splatinsert = insertelement <27 x i32> undef, i32 %conv, i32 0 + // CHECK-NEXT: %scalar.splat.splat = shufflevector <27 x i32> %scalar.splat.splatinsert, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %4 = sub <27 x i32> %2, %scalar.splat.splat + // CHECK-NEXT: store <27 x i32> %4, <27 x i32>* %0, align 4 + // CHECK-NEXT: %5 = load <27 x i32>, <27 x i32>* %0, align 4 + // CHECK-NEXT: %6 = load i64, i64* %vli.addr, align 8 + // CHECK-NEXT: %conv1 = trunc i64 %6 to i32 + // CHECK-NEXT: %scalar.splat.splatinsert2 = insertelement <27 x i32> undef, i32 %conv1, i32 0 + // CHECK-NEXT: %scalar.splat.splat3 = shufflevector <27 x i32> %scalar.splat.splatinsert2, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %7 = sub <27 x i32> %5, %scalar.splat.splat3 + // CHECK-NEXT: store <27 x i32> %7, <27 x i32>* %0, align 4 + // CHECK-NEXT: %8 = load <27 x i32>, <27 x i32>* %0, align 4 + // CHECK-NEXT: %9 = load i64, i64* %vulli.addr, align 8 + // CHECK-NEXT: %conv4 = trunc i64 %9 to i32 + // CHECK-NEXT: %scalar.splat.splatinsert5 = insertelement <27 x i32> undef, i32 %conv4, i32 0 + // CHECK-NEXT: %scalar.splat.splat6 = shufflevector <27 x i32> %scalar.splat.splatinsert5, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %10 = sub <27 x i32> %8, %scalar.splat.splat6 + // CHECK-NEXT: store <27 x i32> %10, <27 x i32>* %0, align 4 + + b = vs - b; + b = vli - b; + b = vulli - b; + + // CHECK-NEXT: %11 = load i16, i16* %vs.addr, align 2 + // CHECK-NEXT: %conv7 = sext i16 %11 to i32 + // CHECK-NEXT: %12 = load <27 x i32>, <27 x i32>* %1, align 4 + // CHECK-NEXT: %scalar.splat.splatinsert8 = insertelement <27 x i32> undef, i32 %conv7, i32 0 + // CHECK-NEXT: %scalar.splat.splat9 = shufflevector <27 x i32> %scalar.splat.splatinsert8, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %13 = sub <27 x i32> %scalar.splat.splat9, %12 + // CHECK-NEXT: store <27 x i32> %13, <27 x i32>* %1, align 4 + // CHECK-NEXT: %14 = load i64, i64* %vli.addr, align 8 + // CHECK-NEXT: %conv10 = trunc i64 %14 to i32 + // CHECK-NEXT: %15 = load <27 x i32>, <27 x i32>* %1, align 4 + // CHECK-NEXT: %scalar.splat.splatinsert11 = insertelement <27 x i32> undef, i32 %conv10, i32 0 + // CHECK-NEXT: %scalar.splat.splat12 = shufflevector <27 x i32> %scalar.splat.splatinsert11, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %16 = sub <27 x i32> %scalar.splat.splat12, %15 + // CHECK-NEXT: store <27 x i32> %16, <27 x i32>* %1, align 4 + // CHECK-NEXT: %17 = load i64, i64* %vulli.addr, align 8 + // CHECK-NEXT: %conv13 = trunc i64 %17 to i32 + // CHECK-NEXT: %18 = load <27 x i32>, <27 x i32>* %1, align 4 + // CHECK-NEXT: %scalar.splat.splatinsert14 = insertelement <27 x i32> undef, i32 %conv13, i32 0 + // CHECK-NEXT: %scalar.splat.splat15 = shufflevector <27 x i32> %scalar.splat.splatinsert14, <27 x i32> undef, <27 x i32> zeroinitializer + // CHECK-NEXT: %19 = sub <27 x i32> %scalar.splat.splat15, %18 + // CHECK-NEXT: store <27 x i32> %19, <27 x i32>* %1, align 4 + // CHECK-NEXT: ret void +} diff --git a/clang/test/CodeGenCXX/matrix-type-operators.cpp b/clang/test/CodeGenCXX/matrix-type-operators.cpp --- a/clang/test/CodeGenCXX/matrix-type-operators.cpp +++ b/clang/test/CodeGenCXX/matrix-type-operators.cpp @@ -209,3 +209,79 @@ Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1); unsigned v1 = extract(Mat1); } + +template +typename MyMatrix::matrix_t add(MyMatrix &A, MyMatrix &B) { + return A.value + B.value; +} + +void test_add_template() { + // CHECK-LABEL: define void @_Z17test_add_templatev() + // CHECK-NEXT: entry: + // CHECK-NEXT: %Mat1 = alloca %struct.MyMatrix.1, align 4 + // CHECK-NEXT: %Mat2 = alloca %struct.MyMatrix.1, align 4 + // CHECK-NEXT: %call = call <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix.1* dereferenceable(40) %Mat1, %struct.MyMatrix.1* dereferenceable(40) %Mat2) + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %Mat1, i32 0, i32 0 + // CHECK-NEXT: %0 = bitcast [10 x float]* %value to <10 x float>* + // CHECK-NEXT: store <10 x float> %call, <10 x float>* %0, align 4 + // CHECK-NEXT: ret void + + // CHECK-LABEL: define linkonce_odr <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix.1* dereferenceable(40) %A, %struct.MyMatrix.1* dereferenceable(40) %B) + // CHECK-NEXT: entry: + // CHECK-NEXT: %A.addr = alloca %struct.MyMatrix.1*, align 8 + // CHECK-NEXT: %B.addr = alloca %struct.MyMatrix.1*, align 8 + // CHECK-NEXT: store %struct.MyMatrix.1* %A, %struct.MyMatrix.1** %A.addr, align 8 + // CHECK-NEXT: store %struct.MyMatrix.1* %B, %struct.MyMatrix.1** %B.addr, align 8 + // CHECK-NEXT: %0 = load %struct.MyMatrix.1*, %struct.MyMatrix.1** %A.addr, align 8 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %0, i32 0, i32 0 + // CHECK-NEXT: %1 = bitcast [10 x float]* %value to <10 x float>* + // CHECK-NEXT: %2 = load <10 x float>, <10 x float>* %1, align 4 + // CHECK-NEXT: %3 = load %struct.MyMatrix.1*, %struct.MyMatrix.1** %B.addr, align 8 + // CHECK-NEXT: %value1 = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %3, i32 0, i32 0 + // CHECK-NEXT: %4 = bitcast [10 x float]* %value1 to <10 x float>* + // CHECK-NEXT: %5 = load <10 x float>, <10 x float>* %4, align 4 + // CHECK-NEXT: %6 = fadd <10 x float> %2, %5 + // CHECK-NEXT: ret <10 x float> %6 + + MyMatrix Mat1; + MyMatrix Mat2; + Mat1.value = add(Mat1, Mat2); +} + +template +typename MyMatrix::matrix_t subtract(MyMatrix &A, MyMatrix &B) { + return A.value - B.value; +} + +void test_subtract_template() { + // CHECK-LABEL: define void @_Z22test_subtract_templatev() + // CHECK-NEXT: entry: + // CHECK-NEXT: %Mat1 = alloca %struct.MyMatrix.1, align 4 + // CHECK-NEXT: %Mat2 = alloca %struct.MyMatrix.1, align 4 + // CHECK-NEXT: %call = call <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix.1* dereferenceable(40) %Mat1, %struct.MyMatrix.1* dereferenceable(40) %Mat2) + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %Mat1, i32 0, i32 0 + // CHECK-NEXT: %0 = bitcast [10 x float]* %value to <10 x float>* + // CHECK-NEXT: store <10 x float> %call, <10 x float>* %0, align 4 + // CHECK-NEXT: ret void + + // CHECK-LABEL: define linkonce_odr <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix.1* dereferenceable(40) %A, %struct.MyMatrix.1* dereferenceable(40) %B) + // CHECK-NEXT: entry: + // CHECK-NEXT: %A.addr = alloca %struct.MyMatrix.1*, align 8 + // CHECK-NEXT: %B.addr = alloca %struct.MyMatrix.1*, align 8 + // CHECK-NEXT: store %struct.MyMatrix.1* %A, %struct.MyMatrix.1** %A.addr, align 8 + // CHECK-NEXT: store %struct.MyMatrix.1* %B, %struct.MyMatrix.1** %B.addr, align 8 + // CHECK-NEXT: %0 = load %struct.MyMatrix.1*, %struct.MyMatrix.1** %A.addr, align 8 + // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %0, i32 0, i32 0 + // CHECK-NEXT: %1 = bitcast [10 x float]* %value to <10 x float>* + // CHECK-NEXT: %2 = load <10 x float>, <10 x float>* %1, align 4 + // CHECK-NEXT: %3 = load %struct.MyMatrix.1*, %struct.MyMatrix.1** %B.addr, align 8 + // CHECK-NEXT: %value1 = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %3, i32 0, i32 0 + // CHECK-NEXT: %4 = bitcast [10 x float]* %value1 to <10 x float>* + // CHECK-NEXT: %5 = load <10 x float>, <10 x float>* %4, align 4 + // CHECK-NEXT: %6 = fsub <10 x float> %2, %5 + // CHECK-NEXT: ret <10 x float> %6 + + MyMatrix Mat1; + MyMatrix Mat2; + Mat1.value = subtract(Mat1, Mat2); +} diff --git a/clang/test/Sema/matrix-type-operators.c b/clang/test/Sema/matrix-type-operators.c --- a/clang/test/Sema/matrix-type-operators.c +++ b/clang/test/Sema/matrix-type-operators.c @@ -65,3 +65,32 @@ float v12 = a[3]; // expected-error@-1 {{single subscript expressions are not allowed for matrix values}} } + +typedef float sx10x5_t __attribute__((matrix_type(10, 5))); +typedef float sx10x10_t __attribute__((matrix_type(10, 10))); + +void add(sx10x10_t a, sx5x10_t b, sx10x5_t c) { + a = b + c; + // expected-error@-1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t' (aka 'float __attribute__((matrix_type(10, 5)))'))}} + + a = b + b; // expected-error {{assigning to 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') from incompatible type 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))')}} + + a = 10 + b; + // expected-error@-1 {{assigning to 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') from incompatible type 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))')}} + + a = b + &c; + // expected-error@-1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t *' (aka 'float __attribute__((matrix_type(10, 5)))*'))}} +} + +void sub(sx10x10_t a, sx5x10_t b, sx10x5_t c) { + a = b - c; + // expected-error@-1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t' (aka 'float __attribute__((matrix_type(10, 5)))'))}} + + a = b - b; // expected-error {{assigning to 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') from incompatible type 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))')}} + + a = 10 - b; + // expected-error@-1 {{assigning to 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') from incompatible type 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))')}} + + a = b - &c; + // expected-error@-1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t *' (aka 'float __attribute__((matrix_type(10, 5)))*'))}} +} diff --git a/clang/test/SemaCXX/matrix-type-operators.cpp b/clang/test/SemaCXX/matrix-type-operators.cpp --- a/clang/test/SemaCXX/matrix-type-operators.cpp +++ b/clang/test/SemaCXX/matrix-type-operators.cpp @@ -66,3 +66,66 @@ a[2] = f; // expected-error@-1 {{single subscript expressions are not allowed for matrix values}} } + +template +struct MyMatrix { + using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns))); + + matrix_t value; +}; + +template +typename MyMatrix::matrix_t add(MyMatrix &A, MyMatrix &B) { + char *v1 = A.value + B.value; + // expected-error@-1 {{cannot initialize a variable of type 'char *' with an rvalue of type 'MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 2)))')}} + // expected-error@-2 {{invalid operands to binary expression ('MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(3, 3)))') and 'MyMatrix::matrix_t' (aka 'float __attribute__((matrix_type(2, 2)))'))}} + // expected-error@-3 {{invalid operands to binary expression ('MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 2)))') and 'MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(3, 3)))'))}} + + return A.value + B.value; + // expected-error@-1 {{invalid operands to binary expression ('MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(3, 3)))') and 'MyMatrix::matrix_t' (aka 'float __attribute__((matrix_type(2, 2)))'))}} + // expected-error@-2 {{invalid operands to binary expression ('MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 2)))') and 'MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(3, 3)))'))}} +} + +void test_add_template(unsigned *Ptr1, float *Ptr2) { + MyMatrix Mat1; + MyMatrix Mat2; + MyMatrix Mat3; + Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1); + unsigned v1 = add(Mat1, Mat1); + // expected-error@-1 {{cannot initialize a variable of type 'unsigned int' with an rvalue of type 'typename MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 2)))')}} + // expected-note@-2 {{in instantiation of function template specialization 'add' requested here}} + + Mat1.value = add(Mat1, Mat2); + // expected-note@-1 {{in instantiation of function template specialization 'add' requested here}} + + Mat1.value = add(Mat2, Mat3); + // expected-note@-1 {{in instantiation of function template specialization 'add' requested here}} +} + +template +typename MyMatrix::matrix_t subtract(MyMatrix &A, MyMatrix &B) { + char *v1 = A.value - B.value; + // expected-error@-1 {{cannot initialize a variable of type 'char *' with an rvalue of type 'MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 2)))')}} + // expected-error@-2 {{invalid operands to binary expression ('MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(3, 3)))') and 'MyMatrix::matrix_t' (aka 'float __attribute__((matrix_type(2, 2)))')}} + // expected-error@-3 {{invalid operands to binary expression ('MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 2)))') and 'MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(3, 3)))')}} + + return A.value - B.value; + // expected-error@-1 {{invalid operands to binary expression ('MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(3, 3)))') and 'MyMatrix::matrix_t' (aka 'float __attribute__((matrix_type(2, 2)))')}} + // expected-error@-2 {{invalid operands to binary expression ('MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 2)))') and 'MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(3, 3)))')}} +} + +void test_subtract_template(unsigned *Ptr1, float *Ptr2) { + MyMatrix Mat1; + MyMatrix Mat2; + MyMatrix Mat3; + Mat1.value = *((decltype(Mat1)::matrix_t *)Ptr1); + unsigned v1 = subtract(Mat1, Mat1); + // expected-error@-1 {{cannot initialize a variable of type 'unsigned int' with an rvalue of type 'typename MyMatrix::matrix_t' (aka 'unsigned int __attribute__((matrix_type(2, 2)))')}} + // expected-note@-2 {{in instantiation of function template specialization 'subtract' requested here}} + + Mat1.value = subtract(Mat1, Mat2); + // expected-note@-1 {{in instantiation of function template specialization 'subtract' requested here}} + + Mat1.value = subtract(Mat2, Mat3); + // expected-note@-1 {{in instantiation of function template specialization 'subtract' requested here}} +} diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h --- a/llvm/include/llvm/IR/MatrixBuilder.h +++ b/llvm/include/llvm/IR/MatrixBuilder.h @@ -127,6 +127,16 @@ /// Add matrixes \p LHS and \p RHS. Support both integer and floating point /// matrixes. Value *CreateAdd(Value *LHS, Value *RHS) { + assert(LHS->getType()->isVectorTy() || RHS->getType()->isVectorTy()); + if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) + RHS = B.CreateVectorSplat( + cast(LHS->getType())->getNumElements(), RHS, + "scalar.splat"); + else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) + LHS = B.CreateVectorSplat( + cast(RHS->getType())->getNumElements(), LHS, + "scalar.splat"); + return cast(LHS->getType()) ->getElementType() ->isFloatingPointTy() @@ -137,6 +147,16 @@ /// Subtract matrixes \p LHS and \p RHS. Support both integer and floating /// point matrixes. Value *CreateSub(Value *LHS, Value *RHS) { + assert(LHS->getType()->isVectorTy() || RHS->getType()->isVectorTy()); + if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) + RHS = B.CreateVectorSplat( + cast(LHS->getType())->getNumElements(), RHS, + "scalar.splat"); + else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) + LHS = B.CreateVectorSplat( + cast(RHS->getType())->getNumElements(), LHS, + "scalar.splat"); + return cast(LHS->getType()) ->getElementType() ->isFloatingPointTy()