Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -629,6 +629,9 @@ return false; } + /// Determine whether target has the FMA instructions support or not + virtual bool hasFMA() const { return false; } + // Different targets may support a different maximum width for the _BitInt // type, depending on what operations are supported. virtual size_t getMaxBitIntWidth() const { Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -419,6 +419,12 @@ return getPointerWidthV(AddrSpace); } + bool hasFMA() const override { + if (HasFMA) + return true; + else + return false; + } }; // X86-32 generic target Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -3770,7 +3770,8 @@ // Check whether it would be legal to emit an fmuladd intrinsic call to // represent op and if so, build the fmuladd. // -// Checks that (a) the operation is fusable, and (b) -ffp-contract=on. +// Checks that (a) the operation is fusable, and (b) -ffp-contract=on and +// does target supports fma instructions. // Does NOT check the type of the operation - it's assumed that this function // will be called from contexts where it's known that the type is contractable. static Value* tryEmitFMulAdd(const BinOpInfo &op, @@ -3782,7 +3783,9 @@ "Only fadd/fsub can be the root of an fmuladd."); // Check whether this op is marked as fusable. - if (!op.FPFeatures.allowFPContractWithinStatement()) + const bool allowFPContract = op.FPFeatures.allowFPContractWithinStatement(); + const bool allowFMA = CGF.getContext().getTargetInfo().hasFMA(); + if (!allowFPContract && !allowFMA) return nullptr; Value *LHS = op.LHS; Index: clang/test/CodeGen/X86/fexcess-precision-avx.c =================================================================== --- /dev/null +++ clang/test/CodeGen/X86/fexcess-precision-avx.c @@ -0,0 +1,313 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -ffp-contract=on -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=source -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=double -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=extended -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ +// RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \ +// RUN: -freciprocal-math -ffp-contract=on -fno-rounding-math \ +// RUN: -funsafe-math-optimizations -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s + +// CHECK-EXT-LABEL: define dso_local half @f +// CHECK-EXT-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-EXT-NEXT: entry: +// CHECK-EXT-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-EXT-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// CHECK-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-EXT-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// CHECK-EXT-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]] +// CHECK-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-EXT-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float +// CHECK-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 +// CHECK-EXT-NEXT: [[EXT3:%.*]] = fpext half [[TMP3]] to float +// CHECK-EXT-NEXT: [[MUL4:%.*]] = fmul float [[EXT2]], [[EXT3]] +// CHECK-EXT-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[MUL4]] +// CHECK-EXT-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half +// CHECK-EXT-NEXT: ret half [[UNPROMOTION]] +// CHECK-NO-EXT-LABEL: define dso_local half @f +// CHECK-NO-EXT-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NO-EXT-NEXT: entry: +// CHECK-NO-EXT-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-NO-EXT-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-NO-EXT-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-NO-EXT-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-NO-EXT-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]] +// CHECK-NO-EXT-NEXT: [[TMP4:%.*]] = call half @llvm.fmuladd.f16(half [[TMP0]], half [[TMP1]], half [[MUL1]]) +// CHECK-NO-EXT-NEXT: ret half [[TMP4]] +// +// CHECK-EXT-DBL-LABEL: define dso_local half @f +// CHECK-EXT-DBL-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-EXT-DBL-NEXT: entry: +// CHECK-EXT-DBL-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-DBL-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-DBL-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-DBL-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-DBL-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to double +// CHECK-EXT-DBL-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to double +// CHECK-EXT-DBL-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to double +// CHECK-EXT-DBL-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to double +// CHECK-EXT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]] +// CHECK-EXT-DBL-NEXT: [[TMP4:%.*]] = call double @llvm.fmuladd.f64(double [[CONV]], double [[CONV1]], double [[MUL4]]) +// CHECK-EXT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[TMP4]] to half +// CHECK-EXT-DBL-NEXT: ret half [[CONV5]] +// +// CHECK-EXT-FP80-LABEL: define dso_local half @f +// CHECK-EXT-FP80-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-EXT-FP80-NEXT: entry: +// CHECK-EXT-FP80-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-FP80-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-FP80-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-FP80-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-FP80-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]] +// CHECK-EXT-FP80-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.fmuladd.f80(x86_fp80 [[CONV]], x86_fp80 [[CONV1]], x86_fp80 [[MUL4]]) +// CHECK-EXT-FP80-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[TMP4]] to half +// CHECK-EXT-FP80-NEXT: ret half [[CONV5]] +// +// CHECK-CONTRACT-LABEL: define dso_local half @f +// CHECK-CONTRACT-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CONTRACT-NEXT: entry: +// CHECK-CONTRACT-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]] +// CHECK-CONTRACT-NEXT: [[TMP4:%.*]] = call half @llvm.fmuladd.f16(half [[TMP0]], half [[TMP1]], half [[MUL1]]) +// CHECK-CONTRACT-NEXT: ret half [[TMP4]] +// +// CHECK-CONTRACT-DBL-LABEL: define dso_local half @f +// CHECK-CONTRACT-DBL-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CONTRACT-DBL-NEXT: entry: +// CHECK-CONTRACT-DBL-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-DBL-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to double +// CHECK-CONTRACT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]] +// CHECK-CONTRACT-DBL-NEXT: [[TMP4:%.*]] = call double @llvm.fmuladd.f64(double [[CONV]], double [[CONV1]], double [[MUL4]]) +// CHECK-CONTRACT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[TMP4]] to half +// CHECK-CONTRACT-DBL-NEXT: ret half [[CONV5]] +// +// CHECK-CONTRACT-EXT-LABEL: define dso_local half @f +// CHECK-CONTRACT-EXT-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CONTRACT-EXT-NEXT: entry: +// CHECK-CONTRACT-EXT-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-EXT-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]] +// CHECK-CONTRACT-EXT-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.fmuladd.f80(x86_fp80 [[CONV]], x86_fp80 [[CONV1]], x86_fp80 [[MUL4]]) +// CHECK-CONTRACT-EXT-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[TMP4]] to half +// CHECK-CONTRACT-EXT-NEXT: ret half [[CONV5]] +// +// CHECK-UNSAFE-LABEL: define dso_local half @f +// CHECK-UNSAFE-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-UNSAFE-NEXT: entry: +// CHECK-UNSAFE-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-UNSAFE-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-UNSAFE-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-UNSAFE-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-UNSAFE-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[MUL1:%.*]] = fmul reassoc nsz arcp afn half [[TMP2]], [[TMP3]] +// CHECK-UNSAFE-NEXT: [[TMP4:%.*]] = call reassoc nsz arcp afn half @llvm.fmuladd.f16(half [[TMP0]], half [[TMP1]], half [[MUL1]]) +// CHECK-UNSAFE-NEXT: ret half [[TMP4]] +// +_Float16 f(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { + return a * b + c * d; +} + +// CHECK-EXT-LABEL: define dso_local i32 @getFEM +// CHECK-EXT-SAME: () #[[ATTR0]] { +// CHECK-EXT-NEXT: entry: +// CHECK-EXT-NEXT: ret i32 0 +// CHECK-NO-EXT-LABEL: define dso_local i32 @getFEM +// CHECK-NO-EXT-SAME: () #[[ATTR0]] { +// CHECK-NO-EXT-NEXT: entry: +// CHECK-NO-EXT-NEXT: ret i32 0 +// +// CHECK-EXT-DBL-LABEL: define dso_local i32 @getFEM +// CHECK-EXT-DBL-SAME: () #[[ATTR0]] { +// CHECK-EXT-DBL-NEXT: entry: +// CHECK-EXT-DBL-NEXT: ret i32 1 +// +// CHECK-EXT-FP80-LABEL: define dso_local i32 @getFEM +// CHECK-EXT-FP80-SAME: () #[[ATTR0]] { +// CHECK-EXT-FP80-NEXT: entry: +// CHECK-EXT-FP80-NEXT: ret i32 2 +// +// CHECK-CONTRACT-LABEL: define dso_local i32 @getFEM +// CHECK-CONTRACT-SAME: () #[[ATTR0]] { +// CHECK-CONTRACT-NEXT: entry: +// CHECK-CONTRACT-NEXT: ret i32 0 +// +// CHECK-CONTRACT-DBL-LABEL: define dso_local i32 @getFEM +// CHECK-CONTRACT-DBL-SAME: () #[[ATTR0]] { +// CHECK-CONTRACT-DBL-NEXT: entry: +// CHECK-CONTRACT-DBL-NEXT: ret i32 1 +// +// CHECK-CONTRACT-EXT-LABEL: define dso_local i32 @getFEM +// CHECK-CONTRACT-EXT-SAME: () #[[ATTR0]] { +// CHECK-CONTRACT-EXT-NEXT: entry: +// CHECK-CONTRACT-EXT-NEXT: ret i32 2 +// +// CHECK-UNSAFE-LABEL: define dso_local i32 @getFEM +// CHECK-UNSAFE-SAME: () #[[ATTR0]] { +// CHECK-UNSAFE-NEXT: entry: +// CHECK-UNSAFE-NEXT: ret i32 0 +// +int getFEM() { + return __FLT_EVAL_METHOD__; +} Index: clang/test/CodeGen/X86/fexcess-precision.c =================================================================== --- clang/test/CodeGen/X86/fexcess-precision.c +++ clang/test/CodeGen/X86/fexcess-precision.c @@ -1,164 +1,84 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=fast -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ -// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=standard -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ -// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -emit-llvm -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=fast \ // RUN: -emit-llvm -ffp-eval-method=source -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=standard \ // RUN: -emit-llvm -ffp-eval-method=source -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none \ // RUN: -emit-llvm -ffp-eval-method=source -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=fast \ // RUN: -emit-llvm -ffp-eval-method=double -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=standard \ // RUN: -emit-llvm -ffp-eval-method=double -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none \ // RUN: -emit-llvm -ffp-eval-method=double -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=fast \ // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=fast -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=standard \ // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=standard -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none \ // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none \ // RUN: -ffp-contract=on -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -ffp-contract=on -emit-llvm -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none \ // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ // RUN: -ffp-eval-method=source -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ -// RUN: -ffp-eval-method=source -emit-llvm -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none \ // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ // RUN: -ffp-eval-method=double -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ -// RUN: -ffp-eval-method=double -emit-llvm -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none \ // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ // RUN: -ffp-eval-method=extended -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ -// RUN: -ffp-eval-method=extended -emit-llvm -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s - // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -ffloat16-excess-precision=none \ // RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \ @@ -166,182 +86,183 @@ // RUN: -funsafe-math-optimizations -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ -// RUN: -ffloat16-excess-precision=none -target-feature +avx512fp16 \ -// RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \ -// RUN: -freciprocal-math -ffp-contract=on -fno-rounding-math \ -// RUN: -funsafe-math-optimizations -emit-llvm -o - %s \ -// RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s - -// CHECK-EXT-LABEL: @f( +// CHECK-EXT-LABEL: define dso_local half @f +// CHECK-EXT-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-EXT-NEXT: entry: -// CHECK-EXT-NEXT: [[A_ADDR:%.*]] = alloca half -// CHECK-EXT-NEXT: [[B_ADDR:%.*]] = alloca half -// CHECK-EXT-NEXT: [[C_ADDR:%.*]] = alloca half -// CHECK-EXT-NEXT: [[D_ADDR:%.*]] = alloca half -// CHECK-EXT-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] -// CHECK-EXT-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] -// CHECK-EXT-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] -// CHECK-EXT-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] -// CHECK-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-EXT-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 // CHECK-EXT-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float -// CHECK-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 // CHECK-EXT-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float // CHECK-EXT-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]] -// CHECK-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 // CHECK-EXT-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float -// CHECK-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 // CHECK-EXT-NEXT: [[EXT3:%.*]] = fpext half [[TMP3]] to float // CHECK-EXT-NEXT: [[MUL4:%.*]] = fmul float [[EXT2]], [[EXT3]] // CHECK-EXT-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[MUL4]] // CHECK-EXT-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half // CHECK-EXT-NEXT: ret half [[UNPROMOTION]] // -// CHECK-NO-EXT-LABEL: @f( +// CHECK-NO-EXT-LABEL: define dso_local half @f +// CHECK-NO-EXT-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NO-EXT-NEXT: entry: -// CHECK-NO-EXT-NEXT: [[A_ADDR:%.*]] = alloca half -// CHECK-NO-EXT-NEXT: [[B_ADDR:%.*]] = alloca half -// CHECK-NO-EXT-NEXT: [[C_ADDR:%.*]] = alloca half -// CHECK-NO-EXT-NEXT: [[D_ADDR:%.*]] = alloca half -// CHECK-NO-EXT-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] -// CHECK-NO-EXT-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] -// CHECK-NO-EXT-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] -// CHECK-NO-EXT-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] -// CHECK-NO-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] -// CHECK-NO-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-NO-EXT-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-NO-EXT-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-NO-EXT-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-NO-EXT-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-NO-EXT-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 // CHECK-NO-EXT-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]] -// CHECK-NO-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] -// CHECK-NO-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-NO-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 // CHECK-NO-EXT-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]] // CHECK-NO-EXT-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[MUL1]] // CHECK-NO-EXT-NEXT: ret half [[ADD]] // -// CHECK-EXT-DBL-LABEL: @f( +// CHECK-EXT-DBL-LABEL: define dso_local half @f +// CHECK-EXT-DBL-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-EXT-DBL-NEXT: entry: -// CHECK-EXT-DBL-NEXT: [[A_ADDR:%.*]] = alloca half -// CHECK-EXT-DBL-NEXT: [[B_ADDR:%.*]] = alloca half -// CHECK-EXT-DBL-NEXT: [[C_ADDR:%.*]] = alloca half -// CHECK-EXT-DBL-NEXT: [[D_ADDR:%.*]] = alloca half -// CHECK-EXT-DBL-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] -// CHECK-EXT-DBL-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] -// CHECK-EXT-DBL-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] -// CHECK-EXT-DBL-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] -// CHECK-EXT-DBL-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-EXT-DBL-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-DBL-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-DBL-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-DBL-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-DBL-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 // CHECK-EXT-DBL-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to double -// CHECK-EXT-DBL-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-EXT-DBL-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 // CHECK-EXT-DBL-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to double // CHECK-EXT-DBL-NEXT: [[MUL:%.*]] = fmul double [[CONV]], [[CONV1]] -// CHECK-EXT-DBL-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-EXT-DBL-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 // CHECK-EXT-DBL-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to double -// CHECK-EXT-DBL-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-EXT-DBL-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 // CHECK-EXT-DBL-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to double // CHECK-EXT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]] // CHECK-EXT-DBL-NEXT: [[ADD:%.*]] = fadd double [[MUL]], [[MUL4]] // CHECK-EXT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD]] to half // CHECK-EXT-DBL-NEXT: ret half [[CONV5]] // -// CHECK-EXT-FP80-LABEL: @f( +// CHECK-EXT-FP80-LABEL: define dso_local half @f +// CHECK-EXT-FP80-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-EXT-FP80-NEXT: entry: -// CHECK-EXT-FP80-NEXT: [[A_ADDR:%.*]] = alloca half -// CHECK-EXT-FP80-NEXT: [[B_ADDR:%.*]] = alloca half -// CHECK-EXT-FP80-NEXT: [[C_ADDR:%.*]] = alloca half -// CHECK-EXT-FP80-NEXT: [[D_ADDR:%.*]] = alloca half -// CHECK-EXT-FP80-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] -// CHECK-EXT-FP80-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] -// CHECK-EXT-FP80-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] -// CHECK-EXT-FP80-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] -// CHECK-EXT-FP80-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-EXT-FP80-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-FP80-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-FP80-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-FP80-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-EXT-FP80-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 // CHECK-EXT-FP80-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to x86_fp80 -// CHECK-EXT-FP80-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-EXT-FP80-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 // CHECK-EXT-FP80-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to x86_fp80 // CHECK-EXT-FP80-NEXT: [[MUL:%.*]] = fmul x86_fp80 [[CONV]], [[CONV1]] -// CHECK-EXT-FP80-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-EXT-FP80-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 // CHECK-EXT-FP80-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to x86_fp80 -// CHECK-EXT-FP80-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-EXT-FP80-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 // CHECK-EXT-FP80-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to x86_fp80 // CHECK-EXT-FP80-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]] // CHECK-EXT-FP80-NEXT: [[ADD:%.*]] = fadd x86_fp80 [[MUL]], [[MUL4]] // CHECK-EXT-FP80-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[ADD]] to half // CHECK-EXT-FP80-NEXT: ret half [[CONV5]] // -// CHECK-CONTRACT-LABEL: @f( +// CHECK-CONTRACT-LABEL: define dso_local half @f +// CHECK-CONTRACT-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-CONTRACT-NEXT: entry: -// CHECK-CONTRACT-NEXT: [[A_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-NEXT: [[B_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-NEXT: [[C_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-NEXT: [[D_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] -// CHECK-CONTRACT-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] -// CHECK-CONTRACT-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] -// CHECK-CONTRACT-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] -// CHECK-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] -// CHECK-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] -// CHECK-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] -// CHECK-CONTRACT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-CONTRACT-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 // CHECK-CONTRACT-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]] // CHECK-CONTRACT-NEXT: [[TMP4:%.*]] = call half @llvm.fmuladd.f16(half [[TMP0]], half [[TMP1]], half [[MUL1]]) // CHECK-CONTRACT-NEXT: ret half [[TMP4]] // -// CHECK-CONTRACT-DBL-LABEL: @f( +// CHECK-CONTRACT-DBL-LABEL: define dso_local half @f +// CHECK-CONTRACT-DBL-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-CONTRACT-DBL-NEXT: entry: -// CHECK-CONTRACT-DBL-NEXT: [[A_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-DBL-NEXT: [[B_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-DBL-NEXT: [[C_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-DBL-NEXT: [[D_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-DBL-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] -// CHECK-CONTRACT-DBL-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] -// CHECK-CONTRACT-DBL-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] -// CHECK-CONTRACT-DBL-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] -// CHECK-CONTRACT-DBL-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-DBL-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 // CHECK-CONTRACT-DBL-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to double -// CHECK-CONTRACT-DBL-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 // CHECK-CONTRACT-DBL-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to double -// CHECK-CONTRACT-DBL-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 // CHECK-CONTRACT-DBL-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to double -// CHECK-CONTRACT-DBL-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-CONTRACT-DBL-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 // CHECK-CONTRACT-DBL-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to double // CHECK-CONTRACT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]] // CHECK-CONTRACT-DBL-NEXT: [[TMP4:%.*]] = call double @llvm.fmuladd.f64(double [[CONV]], double [[CONV1]], double [[MUL4]]) // CHECK-CONTRACT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[TMP4]] to half // CHECK-CONTRACT-DBL-NEXT: ret half [[CONV5]] // -// CHECK-CONTRACT-EXT-LABEL: @f( +// CHECK-CONTRACT-EXT-LABEL: define dso_local half @f +// CHECK-CONTRACT-EXT-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-CONTRACT-EXT-NEXT: entry: -// CHECK-CONTRACT-EXT-NEXT: [[A_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-EXT-NEXT: [[B_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-EXT-NEXT: [[C_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-EXT-NEXT: [[D_ADDR:%.*]] = alloca half -// CHECK-CONTRACT-EXT-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] -// CHECK-CONTRACT-EXT-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] -// CHECK-CONTRACT-EXT-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] -// CHECK-CONTRACT-EXT-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] -// CHECK-CONTRACT-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-CONTRACT-EXT-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 // CHECK-CONTRACT-EXT-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to x86_fp80 -// CHECK-CONTRACT-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 // CHECK-CONTRACT-EXT-NEXT: [[CONV1:%.*]] = fpext half [[TMP1]] to x86_fp80 -// CHECK-CONTRACT-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 // CHECK-CONTRACT-EXT-NEXT: [[CONV2:%.*]] = fpext half [[TMP2]] to x86_fp80 -// CHECK-CONTRACT-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-CONTRACT-EXT-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 // CHECK-CONTRACT-EXT-NEXT: [[CONV3:%.*]] = fpext half [[TMP3]] to x86_fp80 // CHECK-CONTRACT-EXT-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]] // CHECK-CONTRACT-EXT-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.fmuladd.f80(x86_fp80 [[CONV]], x86_fp80 [[CONV1]], x86_fp80 [[MUL4]]) // CHECK-CONTRACT-EXT-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[TMP4]] to half // CHECK-CONTRACT-EXT-NEXT: ret half [[CONV5]] // -// CHECK-UNSAFE-LABEL: @f( +// CHECK-UNSAFE-LABEL: define dso_local half @f +// CHECK-UNSAFE-SAME: (half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]], half noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-UNSAFE-NEXT: entry: -// CHECK-UNSAFE-NEXT: [[A_ADDR:%.*]] = alloca half -// CHECK-UNSAFE-NEXT: [[B_ADDR:%.*]] = alloca half -// CHECK-UNSAFE-NEXT: [[C_ADDR:%.*]] = alloca half -// CHECK-UNSAFE-NEXT: [[D_ADDR:%.*]] = alloca half -// CHECK-UNSAFE-NEXT: store half [[A:%.*]], ptr [[A_ADDR]] -// CHECK-UNSAFE-NEXT: store half [[B:%.*]], ptr [[B_ADDR]] -// CHECK-UNSAFE-NEXT: store half [[C:%.*]], ptr [[C_ADDR]] -// CHECK-UNSAFE-NEXT: store half [[D:%.*]], ptr [[D_ADDR]] -// CHECK-UNSAFE-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]] -// CHECK-UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]] -// CHECK-UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]] -// CHECK-UNSAFE-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]] +// CHECK-UNSAFE-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-UNSAFE-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-UNSAFE-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-UNSAFE-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 +// CHECK-UNSAFE-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store half [[B]], ptr [[B_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store half [[C]], ptr [[C_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store half [[D]], ptr [[D_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 // CHECK-UNSAFE-NEXT: [[MUL1:%.*]] = fmul reassoc nsz arcp afn half [[TMP2]], [[TMP3]] // CHECK-UNSAFE-NEXT: [[TMP4:%.*]] = call reassoc nsz arcp afn half @llvm.fmuladd.f16(half [[TMP0]], half [[TMP1]], half [[MUL1]]) // CHECK-UNSAFE-NEXT: ret half [[TMP4]] @@ -350,35 +271,43 @@ return a * b + c * d; } -// CHECK-EXT-LABEL: @getFEM( +// CHECK-EXT-LABEL: define dso_local i32 @getFEM +// CHECK-EXT-SAME: () #[[ATTR0]] { // CHECK-EXT-NEXT: entry: // CHECK-EXT-NEXT: ret i32 0 // -// CHECK-NO-EXT-LABEL: @getFEM( +// CHECK-NO-EXT-LABEL: define dso_local i32 @getFEM +// CHECK-NO-EXT-SAME: () #[[ATTR0]] { // CHECK-NO-EXT-NEXT: entry: // CHECK-NO-EXT-NEXT: ret i32 0 // -// CHECK-EXT-DBL-LABEL: @getFEM( +// CHECK-EXT-DBL-LABEL: define dso_local i32 @getFEM +// CHECK-EXT-DBL-SAME: () #[[ATTR0]] { // CHECK-EXT-DBL-NEXT: entry: // CHECK-EXT-DBL-NEXT: ret i32 1 // -// CHECK-EXT-FP80-LABEL: @getFEM( +// CHECK-EXT-FP80-LABEL: define dso_local i32 @getFEM +// CHECK-EXT-FP80-SAME: () #[[ATTR0]] { // CHECK-EXT-FP80-NEXT: entry: // CHECK-EXT-FP80-NEXT: ret i32 2 // -// CHECK-CONTRACT-LABEL: @getFEM( +// CHECK-CONTRACT-LABEL: define dso_local i32 @getFEM +// CHECK-CONTRACT-SAME: () #[[ATTR0]] { // CHECK-CONTRACT-NEXT: entry: // CHECK-CONTRACT-NEXT: ret i32 0 // -// CHECK-CONTRACT-DBL-LABEL: @getFEM( +// CHECK-CONTRACT-DBL-LABEL: define dso_local i32 @getFEM +// CHECK-CONTRACT-DBL-SAME: () #[[ATTR0]] { // CHECK-CONTRACT-DBL-NEXT: entry: // CHECK-CONTRACT-DBL-NEXT: ret i32 1 // -// CHECK-CONTRACT-EXT-LABEL: @getFEM( +// CHECK-CONTRACT-EXT-LABEL: define dso_local i32 @getFEM +// CHECK-CONTRACT-EXT-SAME: () #[[ATTR0]] { // CHECK-CONTRACT-EXT-NEXT: entry: // CHECK-CONTRACT-EXT-NEXT: ret i32 2 // -// CHECK-UNSAFE-LABEL: @getFEM( +// CHECK-UNSAFE-LABEL: define dso_local i32 @getFEM +// CHECK-UNSAFE-SAME: () #[[ATTR0]] { // CHECK-UNSAFE-NEXT: entry: // CHECK-UNSAFE-NEXT: ret i32 0 // Index: clang/test/CodeGen/X86/fma-intrinsics.c =================================================================== --- /dev/null +++ clang/test/CodeGen/X86/fma-intrinsics.c @@ -0,0 +1,38 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu nehalem -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-FMA %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu skylake -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-FMA %s + +// CHECK-NO-FMA-LABEL: define dso_local float @testFma +// CHECK-NO-FMA-SAME: (float noundef [[ADD:%.*]], float noundef [[MUL:%.*]], float noundef [[NUM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NO-FMA-NEXT: entry: +// CHECK-NO-FMA-NEXT: [[ADD_ADDR:%.*]] = alloca float, align 4 +// CHECK-NO-FMA-NEXT: [[MUL_ADDR:%.*]] = alloca float, align 4 +// CHECK-NO-FMA-NEXT: [[NUM_ADDR:%.*]] = alloca float, align 4 +// CHECK-NO-FMA-NEXT: store float [[ADD]], ptr [[ADD_ADDR]], align 4 +// CHECK-NO-FMA-NEXT: store float [[MUL]], ptr [[MUL_ADDR]], align 4 +// CHECK-NO-FMA-NEXT: store float [[NUM]], ptr [[NUM_ADDR]], align 4 +// CHECK-NO-FMA-NEXT: [[TMP0:%.*]] = load float, ptr [[ADD_ADDR]], align 4 +// CHECK-NO-FMA-NEXT: [[TMP1:%.*]] = load float, ptr [[NUM_ADDR]], align 4 +// CHECK-NO-FMA-NEXT: [[TMP2:%.*]] = load float, ptr [[MUL_ADDR]], align 4 +// CHECK-NO-FMA-NEXT: [[MUL1:%.*]] = fmul float [[TMP1]], [[TMP2]] +// CHECK-NO-FMA-NEXT: [[ADD2:%.*]] = fadd float [[TMP0]], [[MUL1]] +// CHECK-NO-FMA-NEXT: ret float [[ADD2]] +// +// CHECK-FMA-LABEL: define dso_local float @testFma +// CHECK-FMA-SAME: (float noundef [[ADD:%.*]], float noundef [[MUL:%.*]], float noundef [[NUM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-FMA-NEXT: entry: +// CHECK-FMA-NEXT: [[ADD_ADDR:%.*]] = alloca float, align 4 +// CHECK-FMA-NEXT: [[MUL_ADDR:%.*]] = alloca float, align 4 +// CHECK-FMA-NEXT: [[NUM_ADDR:%.*]] = alloca float, align 4 +// CHECK-FMA-NEXT: store float [[ADD]], ptr [[ADD_ADDR]], align 4 +// CHECK-FMA-NEXT: store float [[MUL]], ptr [[MUL_ADDR]], align 4 +// CHECK-FMA-NEXT: store float [[NUM]], ptr [[NUM_ADDR]], align 4 +// CHECK-FMA-NEXT: [[TMP0:%.*]] = load float, ptr [[ADD_ADDR]], align 4 +// CHECK-FMA-NEXT: [[TMP1:%.*]] = load float, ptr [[NUM_ADDR]], align 4 +// CHECK-FMA-NEXT: [[TMP2:%.*]] = load float, ptr [[MUL_ADDR]], align 4 +// CHECK-FMA-NEXT: [[TMP3:%.*]] = call float @llvm.fmuladd.f32(float [[TMP1]], float [[TMP2]], float [[TMP0]]) +// CHECK-FMA-NEXT: ret float [[TMP3]] +// +float testFma(float add, float mul, float num) { + return add + num*mul; +}