diff --git a/clang/docs/MatrixTypes.rst b/clang/docs/MatrixTypes.rst --- a/clang/docs/MatrixTypes.rst +++ b/clang/docs/MatrixTypes.rst @@ -122,9 +122,11 @@ operator provides matrix multiplication, while ``+`` and ``-`` are performed element-wise. There are also scalar versions of the operators, which take a matrix type and the matrix element type. The operation is applied to all -elements of the matrix using the scalar value. +elements of the matrix using the scalar value. In addition, the ``/`` operator +is only supported for expression ``M1 / S1``, where ``M1`` is of matrix type +and ``S1`` is of a real type. -For ``BIN_OP`` in ``+``, ``-``, ``*`` given the expression ``M1 BIN_OP M2`` where +For ``BIN_OP`` in ``+``, ``-``, ``*``, ``/`` given the expression ``M1 BIN_OP M2`` where at least one of ``M1`` or ``M2`` is of matrix type and, for `*`, the other is of a real type: diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -257,6 +257,7 @@ EXTENSION(gnu_asm, LangOpts.GNUAsm) EXTENSION(gnu_asm_goto_with_outputs, LangOpts.GNUAsm) EXTENSION(matrix_types, LangOpts.MatrixTypes) +EXTENSION(matrix_types_scalar_division, LangOpts.MatrixTypesScalarDiv) FEATURE(cxx_abi_relative_vtable, LangOpts.CPlusPlus && LangOpts.RelativeCXXABIVTables) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -395,6 +395,8 @@ LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors") LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type") +LANGOPT(MatrixTypesScalarDiv, 1, 1, "Enable or disable scalar division support " + "for matrix types") COMPATIBLE_VALUE_LANGOPT(MaxTokens, 32, 0, "Max number of tokens per TU or 0") diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2086,8 +2086,9 @@ bool Type::hasUnsignedIntegerRepresentation() const { if (const auto *VT = dyn_cast(CanonicalType)) return VT->getElementType()->isUnsignedIntegerOrEnumerationType(); - else - return isUnsignedIntegerOrEnumerationType(); + if (const auto *VT = dyn_cast(CanonicalType)) + return VT->getElementType()->isUnsignedIntegerOrEnumerationType(); + return isUnsignedIntegerOrEnumerationType(); } bool Type::isFloatingType() const { diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -3157,6 +3157,20 @@ } } + if (Ops.Ty->isConstantMatrixType()) { + llvm::MatrixBuilder MB(Builder); + // We need to check the types of the operands of the operator to get the + // correct matrix dimensions. + auto *BO = cast(Ops.E); + assert( + isa(BO->getLHS()->getType().getCanonicalType()) && + "first operand must be a matrix"); + assert(BO->getRHS()->getType().getCanonicalType()->isArithmeticType() && + "second operand must be an arithmetic type"); + return MB.CreateScalarDiv(Ops.LHS, Ops.RHS, + Ops.Ty->hasUnsignedIntegerRepresentation()); + } + if (Ops.LHS->getType()->isFPOrFPVectorTy()) { llvm::Value *Val; CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -10199,14 +10199,20 @@ bool IsCompAssign, bool IsDiv) { checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); - if (LHS.get()->getType()->isVectorType() || - RHS.get()->getType()->isVectorType()) + QualType LHSTy = LHS.get()->getType(); + QualType RHSTy = RHS.get()->getType(); + if (LHSTy->isVectorType() || RHSTy->isVectorType()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/false); - if (!IsDiv && (LHS.get()->getType()->isConstantMatrixType() || - RHS.get()->getType()->isConstantMatrixType())) + if (!IsDiv && + (LHSTy->isConstantMatrixType() || RHSTy->isConstantMatrixType())) return CheckMatrixMultiplyOperands(LHS, RHS, Loc, IsCompAssign); + // For division, only matrix-by-scalar is supported. Other combinations with + // matrix types are invalid. + if (IsDiv && LHSTy->isConstantMatrixType() && RHSTy->isArithmeticType() && + getLangOpts().MatrixTypesScalarDiv) + return CheckMatrixElementwiseOperands(LHS, RHS, Loc, IsCompAssign); QualType compType = UsualArithmeticConversions( LHS, RHS, Loc, IsCompAssign ? ACK_CompAssign : ACK_Arithmetic); diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c --- a/clang/test/CodeGen/matrix-type-operators.c +++ b/clang/test/CodeGen/matrix-type-operators.c @@ -729,6 +729,102 @@ a *= 5; } +// CHECK-LABEL: @divide_double_matrix_scalar_float( +// CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8 +// CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4 +// CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer +// CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]] +// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8 +// CHECK-NEXT: ret void +// +void divide_double_matrix_scalar_float(dx5x5_t a, float s) { + a = a / s; +} + +// CHECK-LABEL: @divide_double_matrix_scalar_double( +// CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8 +// CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8 +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer +// CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]] +// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8 +// CHECK-NEXT: ret void +// +void divide_double_matrix_scalar_double(dx5x5_t a, double s) { + a = a / s; +} + +// CHECK-LABEL: @divide_float_matrix_scalar_double( +// CHECK: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4 +// CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8 +// CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer +// CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]] +// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4 +// CHECK-NEXT: ret void +// +void divide_float_matrix_scalar_double(fx2x3_t b, double s) { + b = b / s; +} + +// CHECK-LABEL: @divide_int_matrix_scalar_short( +// CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4 +// CHECK-NEXT: [[S:%.*]] = load i16, i16* %s.addr, align 2 +// CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer +// CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]] +// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4 +// CHECK-NEXT: ret void +// +void divide_int_matrix_scalar_short(ix9x3_t b, short s) { + b = b / s; +} + +// CHECK-LABEL: @divide_int_matrix_scalar_ull( +// CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4 +// CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8 +// CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer +// CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]] +// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4 +// CHECK-NEXT: ret void +// +void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { + b = b / s; +} + +// CHECK-LABEL: @divide_ull_matrix_scalar_ull( +// CHECK: [[MAT:%.*]] = load <8 x i64>, <8 x i64>* [[MAT_ADDR:%.*]], align 8 +// CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8 +// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i32 0 +// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]] +// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* [[MAT_ADDR]], align 8 +// CHECK-NEXT: ret void +// +void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) { + b = b / s; +} + +// CHECK-LABEL: @divide_float_matrix_constant( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4 +// CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>* +// CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4 +// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4 +// CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], +// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4 +// CHECK-NEXT: ret void +// +void divide_float_matrix_constant(fx2x3_t a) { + a = a / 2.5; +} + // Tests for the matrix type operators. typedef double dx5x5_t __attribute__((matrix_type(5, 5))); diff --git a/clang/test/CodeGen/matrix-type.c b/clang/test/CodeGen/matrix-type.c --- a/clang/test/CodeGen/matrix-type.c +++ b/clang/test/CodeGen/matrix-type.c @@ -4,6 +4,10 @@ #error Expected extension 'matrix_types' to be enabled #endif +#if !__has_extension(matrix_types_scalar_division) +#error Expected extension 'matrix_types_scalar_division' to be enabled +#endif + typedef double dx5x5_t __attribute__((matrix_type(5, 5))); // CHECK: %struct.Matrix = type { i8, [12 x float], float } diff --git a/clang/test/Sema/matrix-type-operators.c b/clang/test/Sema/matrix-type-operators.c --- a/clang/test/Sema/matrix-type-operators.c +++ b/clang/test/Sema/matrix-type-operators.c @@ -94,6 +94,40 @@ // expected-error@-1 {{assigning to 'float' from incompatible type 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))')}} } +void mat_scalar_divide(sx10x10_t a, sx5x10_t b, float sf, char *p) { + // Shape of multiplication result does not match the type of b. + b = a / sf; + // expected-error@-1 {{assigning to 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') from incompatible type 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))')}} + b = sf / a; + // expected-error@-1 {{invalid operands to binary expression ('float' and 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))'))}} + + a = a / p; + // expected-error@-1 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'char *')}} + a = p / a; + // expected-error@-1 {{invalid operands to binary expression ('char *' and 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))'))}} + + sf = a / sf; + // expected-error@-1 {{assigning to 'float' from incompatible type 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))')}} +} + +void matrix_matrix_divide(sx10x10_t a, sx5x10_t b, ix10x5_t c, ix10x10_t d, float sf, char *p) { + // Matrix by matrix division is not supported. + a = a / a; + // expected-error@-1 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'sx10x10_t')}} + + b = a / a; + // expected-error@-1 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'sx10x10_t')}} + + // Check element type mismatches. + a = b / c; + // expected-error@-1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'ix10x5_t' (aka 'int __attribute__((matrix_type(10, 5)))'))}} + d = a / a; + // expected-error@-1 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'sx10x10_t')}} + + p = a / a; + // expected-error@-1 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'sx10x10_t')}} +} + sx5x10_t get_matrix(); void insert(sx5x10_t a, float f) { diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h --- a/llvm/include/llvm/IR/MatrixBuilder.h +++ b/llvm/include/llvm/IR/MatrixBuilder.h @@ -215,6 +215,22 @@ return B.CreateMul(LHS, RHS); } + /// Divide matrix \p LHS by scalar \p RHS. If the operands are integers, \p + /// IsUnsigned indicates whether UDiv or SDiv should be used. + Value *CreateScalarDiv(Value *LHS, Value *RHS, bool IsUnsigned) { + assert(LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()); + assert(!isa(LHS->getType()) && + "LHS Assumed to be fixed width"); + RHS = + B.CreateVectorSplat(cast(LHS->getType())->getElementCount(), + RHS, "scalar.splat"); + return cast(LHS->getType()) + ->getElementType() + ->isFloatingPointTy() + ? B.CreateFDiv(LHS, RHS) + : (IsUnsigned ? B.CreateUDiv(LHS, RHS) : B.CreateSDiv(LHS, RHS)); + } + /// Extracts the element at (\p RowIdx, \p ColumnIdx) from \p Matrix. Value *CreateExtractElement(Value *Matrix, Value *RowIdx, Value *ColumnIdx, unsigned NumRows, Twine const &Name = "") {