This is an archive of the discontinued LLVM Phabricator instance.

The root cause seems to be that by default, the SystemZ back-end targets a machine without SIMD support, and therefore vector return types are passed via implicit reference according to the ABI:

/home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/clang/test/CodeGen/ffp-model.c:121:12: error: CHECK: expected string not found in input
 // CHECK: define{{.*}} <4 x float> @my_m22_muladd
           ^
<stdin>:62:28: note: scanning from here
 %4 = fadd fast <2 x float> %2, %3
                           ^
<stdin>:67:1: note: possible intended match here
define dso_local void @my_m22_muladd(ptr noalias sret([4 x float]) align 4 %agg.result, ptr noundef %0, float noundef nofpclass(nan inf) %y, ptr noundef %1) #0 {
^

thegameg mentioned this in rGc1eacc3c4094: [Matrix] Fix test on SystemZ.Sep 5 2023, 11:23 AM

In D158883#4635997, @uweigand wrote:

The newly added test cases in ffp-model.c fail on SystemZ, making CI red:

Should be fixed, thanks for the report and sorry for the delay.

kuhar mentioned this in D159453: [Matrix] Fix test on SystemZ.Sep 5 2023, 11:33 AM

In D158883#4638648, @thegameg wrote:

In D158883#4635997, @uweigand wrote:

The newly added test cases in ffp-model.c fail on SystemZ, making CI red:

Should be fixed, thanks for the report and sorry for the delay.

Thanks, this fixes the problem for me locally. (Build bot is unfortunately still down because of this: https://github.com/llvm/llvm-project/pull/65267#issuecomment-1707318337)

Revision Contents

Path

Size

clang/

lib/

CodeGen/

CGExprScalar.cpp

23 lines

test/

CodeGen/

ffp-model.c

112 lines

Diff 555200

clang/lib/CodeGen/CGExprScalar.cpp

Show First 20 Lines • Show All 3,868 Lines • ▼ Show 20 Lines	case LangOptions::SOB_Undefined:
[[fallthrough]];		[[fallthrough]];
case LangOptions::SOB_Trapping:		case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))		if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");		return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
return EmitOverflowCheckedBinOp(op);		return EmitOverflowCheckedBinOp(op);
}		}
}		}

		// For vector and matrix adds, try to fold into a fmuladd.
		if (op.LHS->getType()->isFPOrFPVectorTy()) {
		CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
		// Try to form an fmuladd.
		if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
		return FMulAdd;
		}

if (op.Ty->isConstantMatrixType()) {		if (op.Ty->isConstantMatrixType()) {
llvm::MatrixBuilder MB(Builder);		llvm::MatrixBuilder MB(Builder);
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);		CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
return MB.CreateAdd(op.LHS, op.RHS);		return MB.CreateAdd(op.LHS, op.RHS);
}		}

if (op.Ty->isUnsignedIntegerType() &&		if (op.Ty->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&		CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
!CanElideOverflowCheck(CGF.getContext(), op))		!CanElideOverflowCheck(CGF.getContext(), op))
return EmitOverflowCheckedBinOp(op);		return EmitOverflowCheckedBinOp(op);

if (op.LHS->getType()->isFPOrFPVectorTy()) {		if (op.LHS->getType()->isFPOrFPVectorTy()) {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);		CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
// Try to form an fmuladd.
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
return FMulAdd;

return Builder.CreateFAdd(op.LHS, op.RHS, "add");		return Builder.CreateFAdd(op.LHS, op.RHS, "add");
}		}

if (op.isFixedPointOp())		if (op.isFixedPointOp())
return EmitFixedPointBinOp(op);		return EmitFixedPointBinOp(op);

return Builder.CreateAdd(op.LHS, op.RHS, "add");		return Builder.CreateAdd(op.LHS, op.RHS, "add");
}		}
▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines	if (op.Ty->isSignedIntegerOrEnumerationType()) {
[[fallthrough]];		[[fallthrough]];
case LangOptions::SOB_Trapping:		case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))		if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");		return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
return EmitOverflowCheckedBinOp(op);		return EmitOverflowCheckedBinOp(op);
}		}
}		}

		// For vector and matrix subs, try to fold into a fmuladd.
		if (op.LHS->getType()->isFPOrFPVectorTy()) {
		CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
		// Try to form an fmuladd.
		if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
		return FMulAdd;
		}

if (op.Ty->isConstantMatrixType()) {		if (op.Ty->isConstantMatrixType()) {
llvm::MatrixBuilder MB(Builder);		llvm::MatrixBuilder MB(Builder);
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);		CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
return MB.CreateSub(op.LHS, op.RHS);		return MB.CreateSub(op.LHS, op.RHS);
}		}

if (op.Ty->isUnsignedIntegerType() &&		if (op.Ty->isUnsignedIntegerType() &&
CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&		CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
!CanElideOverflowCheck(CGF.getContext(), op))		!CanElideOverflowCheck(CGF.getContext(), op))
return EmitOverflowCheckedBinOp(op);		return EmitOverflowCheckedBinOp(op);

if (op.LHS->getType()->isFPOrFPVectorTy()) {		if (op.LHS->getType()->isFPOrFPVectorTy()) {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);		CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
// Try to form an fmuladd.
if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
return FMulAdd;
return Builder.CreateFSub(op.LHS, op.RHS, "sub");		return Builder.CreateFSub(op.LHS, op.RHS, "sub");
}		}

if (op.isFixedPointOp())		if (op.isFixedPointOp())
return EmitFixedPointBinOp(op);		return EmitFixedPointBinOp(op);

return Builder.CreateSub(op.LHS, op.RHS, "sub");		return Builder.CreateSub(op.LHS, op.RHS, "sub");
}		}
▲ Show 20 Lines • Show All 1,371 Lines • Show Last 20 Lines

clang/test/CodeGen/ffp-model.c

// REQUIRES: x86-registered-target		// REQUIRES: x86-registered-target
// RUN: %clang -S -emit-llvm -ffp-model=fast -emit-llvm %s -o - \		// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=fast %s -o - \
// RUN: \| FileCheck %s --check-prefixes=CHECK,CHECK-FAST		// RUN: \| FileCheck %s --check-prefixes=CHECK,CHECK-FAST

// RUN: %clang -S -emit-llvm -ffp-model=precise %s -o - \		// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise %s -o - \
// RUN: \| FileCheck %s --check-prefixes=CHECK,CHECK-PRECISE		// RUN: \| FileCheck %s --check-prefixes=CHECK,CHECK-PRECISE

// RUN: %clang -S -emit-llvm -ffp-model=strict %s -o - \		// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict %s -o - \
// RUN: -target x86_64 \| FileCheck %s --check-prefixes=CHECK,CHECK-STRICT		// RUN: -target x86_64 \| FileCheck %s --check-prefixes=CHECK,CHECK-STRICT

// RUN: %clang -S -emit-llvm -ffp-model=strict -ffast-math \		// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict -ffast-math \
// RUN: -target x86_64 %s -o - \| FileCheck %s \		// RUN: -target x86_64 %s -o - \| FileCheck %s \
// RUN: --check-prefixes CHECK,CHECK-STRICT-FAST		// RUN: --check-prefixes CHECK,CHECK-STRICT-FAST

// RUN: %clang -S -emit-llvm -ffp-model=precise -ffast-math \		// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise -ffast-math \
// RUN: %s -o - \| FileCheck %s --check-prefixes CHECK,CHECK-FAST1		// RUN: %s -o - \| FileCheck %s --check-prefixes CHECK,CHECK-FAST1

float mymuladd(float x, float y, float z) {		float mymuladd(float x, float y, float z) {
// CHECK: define{{.*}} float @mymuladd		// CHECK: define{{.*}} float @mymuladd
return x * y + z;		return x * y + z;

// CHECK-FAST: fmul fast float		// CHECK-FAST: fmul fast float
// CHECK-FAST: load float, ptr		// CHECK-FAST: load float, ptr
Show All 17 Lines	float mymuladd(float x, float y, float z) {
// CHECK-STRICT-FAST: fadd fast float {{.}}, {{.}}		// CHECK-STRICT-FAST: fadd fast float {{.}}, {{.}}

// CHECK-FAST1: load float, ptr		// CHECK-FAST1: load float, ptr
// CHECK-FAST1: load float, ptr		// CHECK-FAST1: load float, ptr
// CHECK-FAST1: fmul fast float {{.}}, {{.}}		// CHECK-FAST1: fmul fast float {{.}}, {{.}}
// CHECK-FAST1: load float, ptr {{.*}}		// CHECK-FAST1: load float, ptr {{.*}}
// CHECK-FAST1: fadd fast float {{.}}, {{.}}		// CHECK-FAST1: fadd fast float {{.}}, {{.}}
}		}

		typedef float __attribute__((ext_vector_type(2))) v2f;

		v2f my_vec_muladd(v2f x, float y, v2f z) {
		// CHECK: define{{.*}} @my_vec_muladd
		return x * y + z;

		// CHECK-FAST: fmul fast <2 x float>
		// CHECK-FAST: load <2 x float>, ptr
		// CHECK-FAST: fadd fast <2 x float>

		// CHECK-PRECISE: load <2 x float>, ptr
		// CHECK-PRECISE: load float, ptr
		// CHECK-PRECISE: load <2 x float>, ptr
		// CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.}}, <2 x float> {{.}}, <2 x float> {{.*}})

		// CHECK-STRICT: load <2 x float>, ptr
		// CHECK-STRICT: load float, ptr
		// CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.}}, <2 x float> {{.}}, {{.*}})
		// CHECK-STRICT: load <2 x float>, ptr
		// CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.}}, <2 x float> {{.}}, {{.*}})

		// CHECK-STRICT-FAST: load <2 x float>, ptr
		// CHECK-STRICT-FAST: load float, ptr
		// CHECK-STRICT-FAST: fmul fast <2 x float> {{.}}, {{.}}
		// CHECK-STRICT-FAST: load <2 x float>, ptr
		// CHECK-STRICT-FAST: fadd fast <2 x float> {{.}}, {{.}}

		// CHECK-FAST1: load <2 x float>, ptr
		// CHECK-FAST1: load float, ptr
		// CHECK-FAST1: fmul fast <2 x float> {{.}}, {{.}}
		// CHECK-FAST1: load <2 x float>, ptr {{.*}}
		// CHECK-FAST1: fadd fast <2 x float> {{.}}, {{.}}
		}

		typedef float __attribute__((matrix_type(2, 1))) m21f;

		m21f my_m21_muladd(m21f x, float y, m21f z) {
		// CHECK: define{{.*}} <2 x float> @my_m21_muladd
		return x * y + z;

		// CHECK-FAST: fmul fast <2 x float>
		// CHECK-FAST: load <2 x float>, ptr
		// CHECK-FAST: fadd fast <2 x float>

		// CHECK-PRECISE: load <2 x float>, ptr
		// CHECK-PRECISE: load float, ptr
		// CHECK-PRECISE: load <2 x float>, ptr
		// CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.}}, <2 x float> {{.}}, <2 x float> {{.*}})

		// CHECK-STRICT: load <2 x float>, ptr
		// CHECK-STRICT: load float, ptr
		// CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.}}, <2 x float> {{.}}, {{.*}})
		// CHECK-STRICT: load <2 x float>, ptr
		// CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.}}, <2 x float> {{.}}, {{.*}})

		// CHECK-STRICT-FAST: load <2 x float>, ptr
		// CHECK-STRICT-FAST: load float, ptr
		// CHECK-STRICT-FAST: fmul fast <2 x float> {{.}}, {{.}}
		// CHECK-STRICT-FAST: load <2 x float>, ptr
		// CHECK-STRICT-FAST: fadd fast <2 x float> {{.}}, {{.}}

		// CHECK-FAST1: load <2 x float>, ptr
		// CHECK-FAST1: load float, ptr
		// CHECK-FAST1: fmul fast <2 x float> {{.}}, {{.}}
		// CHECK-FAST1: load <2 x float>, ptr {{.*}}
		// CHECK-FAST1: fadd fast <2 x float> {{.}}, {{.}}
		}

		typedef float __attribute__((matrix_type(2, 2))) m22f;

		m22f my_m22_muladd(m22f x, float y, m22f z) {
		// CHECK: define{{.*}} <4 x float> @my_m22_muladd
		return x * y + z;

		// CHECK-FAST: fmul fast <4 x float>
		// CHECK-FAST: load <4 x float>, ptr
		// CHECK-FAST: fadd fast <4 x float>

		// CHECK-PRECISE: load <4 x float>, ptr
		// CHECK-PRECISE: load float, ptr
		// CHECK-PRECISE: load <4 x float>, ptr
		// CHECK-PRECISE: call <4 x float> @llvm.fmuladd.v4f32(<4 x float> {{.}}, <4 x float> {{.}}, <4 x float> {{.*}})

		// CHECK-STRICT: load <4 x float>, ptr
		// CHECK-STRICT: load float, ptr
		// CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> {{.}}, <4 x float> {{.}}, {{.*}})
		// CHECK-STRICT: load <4 x float>, ptr
		// CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> {{.}}, <4 x float> {{.}}, {{.*}})

		// CHECK-STRICT-FAST: load <4 x float>, ptr
		// CHECK-STRICT-FAST: load float, ptr
		// CHECK-STRICT-FAST: fmul fast <4 x float> {{.}}, {{.}}
		// CHECK-STRICT-FAST: load <4 x float>, ptr
		// CHECK-STRICT-FAST: fadd fast <4 x float> {{.}}, {{.}}

		// CHECK-FAST1: load <4 x float>, ptr
		// CHECK-FAST1: load float, ptr
		// CHECK-FAST1: fmul fast <4 x float> {{.}}, {{.}}
		// CHECK-FAST1: load <4 x float>, ptr {{.*}}
		// CHECK-FAST1: fadd fast <4 x float> {{.}}, {{.}}
		}

This is an archive of the discontinued LLVM Phabricator instance.

[Matrix] Try to emit fmuladd for both vector and matrix typesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 555200

clang/lib/CodeGen/CGExprScalar.cpp

clang/test/CodeGen/ffp-model.c

[Matrix] Try to emit fmuladd for both vector and matrix types
ClosedPublic