diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -732,6 +732,7 @@ BO->getLHS()->getType().getCanonicalType()); auto *RHSMatTy = dyn_cast( BO->getRHS()->getType().getCanonicalType()); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); if (LHSMatTy && RHSMatTy) return MB.CreateMatrixMultiply(Ops.LHS, Ops.RHS, LHSMatTy->getNumRows(), LHSMatTy->getNumColumns(), @@ -3206,6 +3207,7 @@ "first operand must be a matrix"); assert(BO->getRHS()->getType().getCanonicalType()->isArithmeticType() && "second operand must be an arithmetic type"); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); return MB.CreateScalarDiv(Ops.LHS, Ops.RHS, Ops.Ty->hasUnsignedIntegerRepresentation()); } @@ -3585,6 +3587,7 @@ if (op.Ty->isConstantMatrixType()) { llvm::MatrixBuilder MB(Builder); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); return MB.CreateAdd(op.LHS, op.RHS); } @@ -3734,6 +3737,7 @@ if (op.Ty->isConstantMatrixType()) { llvm::MatrixBuilder MB(Builder); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); return MB.CreateSub(op.LHS, op.RHS); } diff --git a/clang/test/CodeGen/fp-matrix-pragma.c b/clang/test/CodeGen/fp-matrix-pragma.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/fp-matrix-pragma.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -emit-llvm -S -fenable-matrix --target=arm64-unknown-iphoneos -g0 -mllvm -disable-llvm-optzns -o - | FileCheck %s + +typedef float fx2x2_t __attribute__((matrix_type(2, 2))); +typedef int ix2x2_t __attribute__((matrix_type(2, 2))); + +fx2x2_t fp_matrix_contract(fx2x2_t a, fx2x2_t b, float c, float d) { +// CHECK: call contract <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32 +// CHECK: fdiv contract <4 x float> +// CHECK: fmul contract <4 x float> +#pragma clang fp contract(fast) + return (a * b / c) * d; +} + +fx2x2_t fp_matrix_reassoc(fx2x2_t a, fx2x2_t b, fx2x2_t c) { +// CHECK: fadd reassoc <4 x float> +// CHECK: fsub reassoc <4 x float> +#pragma clang fp reassociate(on) + return a + b - c; +} + +ix2x2_t int_matrix_ops(ix2x2_t a, ix2x2_t b, ix2x2_t c) { +// CHECK: call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32 +// CHECK: add <4 x i32> +#pragma clang fp contract(fast) reassociate(on) + return a * b + c; +}