Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -3874,6 +3874,14 @@ } } + // For vector and matrix adds, try to fold into a fmuladd. + if (op.LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); + // Try to form an fmuladd. + if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) + return FMulAdd; + } + if (op.Ty->isConstantMatrixType()) { llvm::MatrixBuilder MB(Builder); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); @@ -3887,10 +3895,6 @@ if (op.LHS->getType()->isFPOrFPVectorTy()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); - // Try to form an fmuladd. - if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) - return FMulAdd; - return Builder.CreateFAdd(op.LHS, op.RHS, "add"); } @@ -4024,6 +4028,14 @@ } } + // For vector and matrix subs, try to fold into a fmuladd. + if (op.LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); + // Try to form an fmuladd. + if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) + return FMulAdd; + } + if (op.Ty->isConstantMatrixType()) { llvm::MatrixBuilder MB(Builder); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); @@ -4037,9 +4049,6 @@ if (op.LHS->getType()->isFPOrFPVectorTy()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); - // Try to form an fmuladd. - if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) - return FMulAdd; return Builder.CreateFSub(op.LHS, op.RHS, "sub"); } Index: clang/test/CodeGen/ffp-model.c =================================================================== --- clang/test/CodeGen/ffp-model.c +++ clang/test/CodeGen/ffp-model.c @@ -1,18 +1,18 @@ // REQUIRES: x86-registered-target -// RUN: %clang -S -emit-llvm -ffp-model=fast -emit-llvm %s -o - \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=fast %s -o - \ // RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-FAST -// RUN: %clang -S -emit-llvm -ffp-model=precise %s -o - \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise %s -o - \ // RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-PRECISE -// RUN: %clang -S -emit-llvm -ffp-model=strict %s -o - \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict %s -o - \ // RUN: -target x86_64 | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT -// RUN: %clang -S -emit-llvm -ffp-model=strict -ffast-math \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict -ffast-math \ // RUN: -target x86_64 %s -o - | FileCheck %s \ // RUN: --check-prefixes CHECK,CHECK-STRICT-FAST -// RUN: %clang -S -emit-llvm -ffp-model=precise -ffast-math \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise -ffast-math \ // RUN: %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-FAST1 float mymuladd(float x, float y, float z) { @@ -46,3 +46,105 @@ // CHECK-FAST1: load float, ptr {{.*}} // CHECK-FAST1: fadd fast float {{.*}}, {{.*}} } + +typedef float __attribute__((ext_vector_type(2))) v2f; + +v2f my_vec_muladd(v2f x, float y, v2f z) { + // CHECK: define{{.*}} @my_vec_muladd + return x * y + z; + + // CHECK-FAST: fmul fast <2 x float> + // CHECK-FAST: load <2 x float>, ptr + // CHECK-FAST: fadd fast <2 x float> + + // CHECK-PRECISE: load <2 x float>, ptr + // CHECK-PRECISE: load float, ptr + // CHECK-PRECISE: load <2 x float>, ptr + // CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, <2 x float> {{.*}}) + + // CHECK-STRICT: load <2 x float>, ptr + // CHECK-STRICT: load float, ptr + // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}}) + // CHECK-STRICT: load <2 x float>, ptr + // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}}) + + // CHECK-STRICT-FAST: load <2 x float>, ptr + // CHECK-STRICT-FAST: load float, ptr + // CHECK-STRICT-FAST: fmul fast <2 x float> {{.*}}, {{.*}} + // CHECK-STRICT-FAST: load <2 x float>, ptr + // CHECK-STRICT-FAST: fadd fast <2 x float> {{.*}}, {{.*}} + + // CHECK-FAST1: load <2 x float>, ptr + // CHECK-FAST1: load float, ptr + // CHECK-FAST1: fmul fast <2 x float> {{.*}}, {{.*}} + // CHECK-FAST1: load <2 x float>, ptr {{.*}} + // CHECK-FAST1: fadd fast <2 x float> {{.*}}, {{.*}} +} + +typedef float __attribute__((matrix_type(2, 1))) m21f; + +m21f my_m21_muladd(m21f x, float y, m21f z) { + // CHECK: define{{.*}} <2 x float> @my_m21_muladd + return x * y + z; + + // CHECK-FAST: fmul fast <2 x float> + // CHECK-FAST: load <2 x float>, ptr + // CHECK-FAST: fadd fast <2 x float> + + // CHECK-PRECISE: load <2 x float>, ptr + // CHECK-PRECISE: load float, ptr + // CHECK-PRECISE: load <2 x float>, ptr + // CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, <2 x float> {{.*}}) + + // CHECK-STRICT: load <2 x float>, ptr + // CHECK-STRICT: load float, ptr + // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}}) + // CHECK-STRICT: load <2 x float>, ptr + // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}}) + + // CHECK-STRICT-FAST: load <2 x float>, ptr + // CHECK-STRICT-FAST: load float, ptr + // CHECK-STRICT-FAST: fmul fast <2 x float> {{.*}}, {{.*}} + // CHECK-STRICT-FAST: load <2 x float>, ptr + // CHECK-STRICT-FAST: fadd fast <2 x float> {{.*}}, {{.*}} + + // CHECK-FAST1: load <2 x float>, ptr + // CHECK-FAST1: load float, ptr + // CHECK-FAST1: fmul fast <2 x float> {{.*}}, {{.*}} + // CHECK-FAST1: load <2 x float>, ptr {{.*}} + // CHECK-FAST1: fadd fast <2 x float> {{.*}}, {{.*}} +} + +typedef float __attribute__((matrix_type(2, 2))) m22f; + +m22f my_m22_muladd(m22f x, float y, m22f z) { + // CHECK: define{{.*}} <4 x float> @my_m22_muladd + return x * y + z; + + // CHECK-FAST: fmul fast <4 x float> + // CHECK-FAST: load <4 x float>, ptr + // CHECK-FAST: fadd fast <4 x float> + + // CHECK-PRECISE: load <4 x float>, ptr + // CHECK-PRECISE: load float, ptr + // CHECK-PRECISE: load <4 x float>, ptr + // CHECK-PRECISE: call <4 x float> @llvm.fmuladd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}) + + // CHECK-STRICT: load <4 x float>, ptr + // CHECK-STRICT: load float, ptr + // CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, {{.*}}) + // CHECK-STRICT: load <4 x float>, ptr + // CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, {{.*}}) + + // CHECK-STRICT-FAST: load <4 x float>, ptr + // CHECK-STRICT-FAST: load float, ptr + // CHECK-STRICT-FAST: fmul fast <4 x float> {{.*}}, {{.*}} + // CHECK-STRICT-FAST: load <4 x float>, ptr + // CHECK-STRICT-FAST: fadd fast <4 x float> {{.*}}, {{.*}} + + // CHECK-FAST1: load <4 x float>, ptr + // CHECK-FAST1: load float, ptr + // CHECK-FAST1: fmul fast <4 x float> {{.*}}, {{.*}} + // CHECK-FAST1: load <4 x float>, ptr {{.*}} + // CHECK-FAST1: fadd fast <4 x float> {{.*}}, {{.*}} +}