Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -1827,10 +1827,32 @@ Module.getLangOpts().Sanitize.has(SanitizerKind::Return); } -void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, - bool HasOptnone, - bool AttrOnCallSite, - llvm::AttrBuilder &FuncAttrs) { +/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the +/// requested denormal behavior, accounting for the overriding behavior of the +/// -f32 case. +static void addDenormalModeAttrs(llvm::DenormalMode FPDenormalMode, + llvm::DenormalMode FP32DenormalMode, + llvm::AttrBuilder &FuncAttrs) { + if (FPDenormalMode != llvm::DenormalMode::getDefault()) + FuncAttrs.addAttribute("denormal-fp-math", FPDenormalMode.str()); + + if (FP32DenormalMode != FPDenormalMode && FP32DenormalMode.isValid()) + FuncAttrs.addAttribute("denormal-fp-math-f32", FP32DenormalMode.str()); +} + +/// Add default attributes to a function, which have merge semantics under +/// -mlink-builtin-bitcode and should not simply overwrite any existing +/// attributes in the linked library. +static void +addMergableDefaultFunctionAttributes(const CodeGenOptions &CodeGenOpts, + llvm::AttrBuilder &FuncAttrs) { + addDenormalModeAttrs(CodeGenOpts.FPDenormalMode, CodeGenOpts.FP32DenormalMode, + FuncAttrs); +} + +void CodeGenModule::getTrivialDefaultFunctionAttributes( + StringRef Name, bool HasOptnone, bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. if (!HasOptnone) { if (CodeGenOpts.OptimizeSize) @@ -1872,15 +1894,6 @@ if (CodeGenOpts.NullPointerIsValid) FuncAttrs.addAttribute(llvm::Attribute::NullPointerIsValid); - if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE()) - FuncAttrs.addAttribute("denormal-fp-math", - CodeGenOpts.FPDenormalMode.str()); - if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) { - FuncAttrs.addAttribute( - "denormal-fp-math-f32", - CodeGenOpts.FP32DenormalMode.str()); - } - if (LangOpts.getDefaultExceptionMode() == LangOptions::FPE_Ignore) FuncAttrs.addAttribute("no-trapping-math", "true"); @@ -1983,6 +1996,19 @@ } } +void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, + bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { + getTrivialDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite, + FuncAttrs); + if (!AttrOnCallSite) { + // If we're just getting the default, get the default values for mergeable + // attributes. + addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs); + } +} + void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { llvm::AttrBuilder FuncAttrs(F.getContext()); getDefaultFunctionAttributes(F.getName(), F.hasOptNone(), @@ -1991,8 +2017,60 @@ F.addFnAttrs(FuncAttrs); } +/// Apply default attributes to \p F, accounting for merge semantics of +/// attributes that should not overwrite existing attributes. +void CodeGenModule::mergeDefaultFunctionDefinitionAttributes( + llvm::Function &F, bool WillInternalize) { + llvm::AttrBuilder FuncAttrs(F.getContext()); + getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(), + /*AttrOnCallSite=*/false, FuncAttrs); + GetCPUAndFeaturesAttributes(GlobalDecl(), FuncAttrs); + + if (!WillInternalize && F.isInterposable()) { + // Do not promote "dynamic" denormal-fp-math to this translation unit's + // setting for weak functions that won't be internalized. The user has no + // real control for how builtin bitcode is linked, so we shouldn't assume + // later copies will use a consistent mode. + F.addFnAttrs(FuncAttrs); + return; + } + + llvm::AttributeMask AttrsToRemove; + + llvm::DenormalMode DenormModeToMerge = F.getDenormalModeRaw(); + llvm::DenormalMode DenormModeToMergeF32 = F.getDenormalModeF32Raw(); + llvm::DenormalMode Merged = + CodeGenOpts.FPDenormalMode.mergeCalleeMode(DenormModeToMerge); + llvm::DenormalMode MergedF32 = CodeGenOpts.FP32DenormalMode; + + if (DenormModeToMergeF32.isValid()) { + MergedF32 = + CodeGenOpts.FP32DenormalMode.mergeCalleeMode(DenormModeToMergeF32); + } + + if (Merged == llvm::DenormalMode::getDefault()) { + AttrsToRemove.addAttribute("denormal-fp-math"); + } else if (Merged != DenormModeToMerge) { + // Overwrite existing attribute + FuncAttrs.addAttribute("denormal-fp-math", + CodeGenOpts.FPDenormalMode.str()); + } + + if (MergedF32 == llvm::DenormalMode::getDefault()) { + AttrsToRemove.addAttribute("denormal-fp-math-f32"); + } else if (MergedF32 != DenormModeToMergeF32) { + // Overwrite existing attribute + FuncAttrs.addAttribute("denormal-fp-math-f32", + CodeGenOpts.FP32DenormalMode.str()); + } + + F.removeFnAttrs(AttrsToRemove); + addDenormalModeAttrs(Merged, MergedF32, FuncAttrs); + F.addFnAttrs(FuncAttrs); +} + void CodeGenModule::addDefaultFunctionDefinitionAttributes( - llvm::AttrBuilder &attrs) { + llvm::AttrBuilder &attrs) { getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false, /*for call*/ false, attrs); GetCPUAndFeaturesAttributes(GlobalDecl(), attrs); Index: clang/lib/CodeGen/CodeGenAction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenAction.cpp +++ clang/lib/CodeGen/CodeGenAction.cpp @@ -269,7 +269,8 @@ // in LLVM IR. if (F.isIntrinsic()) continue; - Gen->CGM().addDefaultFunctionDefinitionAttributes(F); + Gen->CGM().mergeDefaultFunctionDefinitionAttributes(F, + LM.Internalize); } CurLinkModule = LM.Module.get(); Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -1272,6 +1272,8 @@ /// function which relies on particular fast-math attributes for correctness. /// It's up to you to ensure that this is safe. void addDefaultFunctionDefinitionAttributes(llvm::Function &F); + void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F, + bool WillInternalize); /// Like the overload taking a `Function &`, but intended specifically /// for frontends that want to build on Clang's target-configuration logic. @@ -1734,6 +1736,12 @@ /// function. void SimplifyPersonality(); + /// Helper function for getDefaultFunctionAttributes. Builds a set of function + /// attributes which can be simply added to a function. + void getTrivialDefaultFunctionAttributes(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs); + /// Helper function for ConstructAttributeList and /// addDefaultFunctionDefinitionAttributes. Builds a set of function /// attributes to add to a function with the given properties. Index: clang/test/CodeGen/denormalfpmode-f32.c =================================================================== --- clang/test/CodeGen/denormalfpmode-f32.c +++ clang/test/CodeGen/denormalfpmode-f32.c @@ -2,21 +2,30 @@ // RUN: %clang_cc1 -S -fdenormal-fp-math=ieee %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-NONE // RUN: %clang_cc1 -S -fdenormal-fp-math=preserve-sign %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PS,CHECK-F32-NONE // RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PZ,CHECK-F32-NONE +// RUN: %clang_cc1 -S -fdenormal-fp-math=dynamic %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-DYNAMIC,CHECK-F32-NONE // RUN: %clang_cc1 -S -fdenormal-fp-math-f32=ieee %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-NONE // RUN: %clang_cc1 -S -fdenormal-fp-math=ieee -fdenormal-fp-math-f32=ieee %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-NONE // RUN: %clang_cc1 -S -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PS,CHECK-F32-IEEE // RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero -fdenormal-fp-math-f32=ieee %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PZ,CHECK-F32-IEEE +// RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero -fdenormal-fp-math-f32=dynamic %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PZ,CHECK-F32-DYNAMIC + // RUN: %clang_cc1 -S -fdenormal-fp-math-f32=preserve-sign %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-PS // RUN: %clang_cc1 -S -fdenormal-fp-math=ieee -fdenormal-fp-math-f32=preserve-sign %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-PS // RUN: %clang_cc1 -S -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=preserve-sign %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PS,CHECK-F32-NONE // RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero -fdenormal-fp-math-f32=preserve-sign %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PZ,CHECK-F32-PS +// RUN: %clang_cc1 -S -fdenormal-fp-math=ieee -fdenormal-fp-math-f32=dynamic %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-DYNAMIC + // RUN: %clang_cc1 -S -fdenormal-fp-math-f32=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-PZ +// RUN: %clang_cc1 -S -fdenormal-fp-math-f32=dynamic %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-DYNAMIC // RUN: %clang_cc1 -S -fdenormal-fp-math=ieee -fdenormal-fp-math-f32=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-NONE,CHECK-F32-PZ +// RUN: %clang_cc1 -S -fdenormal-fp-math=dynamic -fdenormal-fp-math-f32=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-DYNAMIC,CHECK-F32-PZ // RUN: %clang_cc1 -S -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PS,CHECK-F32-PZ // RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero -fdenormal-fp-math-f32=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-PZ,CHECK-F32-NONE +// RUN: %clang_cc1 -S -fdenormal-fp-math=dynamic -fdenormal-fp-math-f32=dynamic %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK-ATTR,CHECK-DYNAMIC,CHECK-F32-NONE + // CHECK-LABEL: main @@ -25,11 +34,16 @@ // CHECK-IEEE: "denormal-fp-math"="ieee,ieee" // CHECK-PS: "denormal-fp-math"="preserve-sign,preserve-sign" // CHECK-PZ: "denormal-fp-math"="positive-zero,positive-zero" +// CHECK-DYNAMIC: "denormal-fp-math"="dynamic,dynamic" + // CHECK-F32-NONE-NOT:"denormal-fp-math-f32" // CHECK-F32-IEEE: "denormal-fp-math-f32"="ieee,ieee" // CHECK-F32-PS: "denormal-fp-math-f32"="preserve-sign,preserve-sign" // CHECK-F32-PZ: "denormal-fp-math-f32"="positive-zero,positive-zero" + +// CHECK-F32-DYNAMIC: "denormal-fp-math-f32"="dynamic,dynamic" + int main(void) { return 0; } Index: clang/test/CodeGen/denormalfpmode.c =================================================================== --- clang/test/CodeGen/denormalfpmode.c +++ clang/test/CodeGen/denormalfpmode.c @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -S -fdenormal-fp-math=ieee %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-IEEE // RUN: %clang_cc1 -S -fdenormal-fp-math=preserve-sign %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-PS // RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-PZ +// RUN: %clang_cc1 -S -fdenormal-fp-math=dynamic %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-DYNAMIC // CHECK-LABEL: main @@ -8,6 +9,7 @@ // CHECK-IEEE-NOT:"denormal-fp-math" // CHECK-PS: attributes #0 = {{.*}}"denormal-fp-math"="preserve-sign,preserve-sign"{{.*}} // CHECK-PZ: attributes #0 = {{.*}}"denormal-fp-math"="positive-zero,positive-zero"{{.*}} +// CHECK-DYNAMIC: attributes #0 = {{.*}}"denormal-fp-math"="dynamic,dynamic"{{.*}} int main(void) { return 0; Index: clang/test/CodeGenCUDA/Inputs/ocml-sample.cl =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/Inputs/ocml-sample.cl @@ -0,0 +1,18 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +half do_f16_stuff(half a, half b, half c) { + return __builtin_fmaf16(a, b, c) + 4.0h; +} + +float do_f32_stuff(float a, float b, float c) { + return __builtin_fmaf(a, b, c) + 4.0f; +} + +double do_f64_stuff(double a, double b, double c) { + return __builtin_fma(a, b, c) + 4.0; +} + +__attribute__((weak)) +float weak_do_f32_stuff(float a, float b, float c) { + return c * (a / b); +} Index: clang/test/CodeGenCUDA/link-builtin-bitcode-denormal-fp-mode.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/link-builtin-bitcode-denormal-fp-mode.cu @@ -0,0 +1,157 @@ +// Verify the behavior of the denormal-fp-mode attributes in the way that +// rocm-device-libs should be built with. The bitcode should be compiled with +// denormal-fp-math-f32=dynamic, and should be replaced with the denormal mode +// of the final TU. + +// Build the fake device library in the way rocm-device-libs should be built. +// +// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -fdenormal-fp-math-f32=dynamic \ +// RUN: -mcode-object-version=none -emit-llvm-bc \ +// RUN: %S/Inputs/ocml-sample.cl -o %t.dynamic.f32.bc +// +// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -fdenormal-fp-math=dynamic \ +// RUN: -mcode-object-version=none -emit-llvm-bc \ +// RUN: %S/Inputs/ocml-sample.cl -o %t.dynamic.full.bc + + + +// Check the default behavior with no denormal-fp-math arguments. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \ +// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc \ +// RUN: -emit-llvm %s -o - | FileCheck -implicit-check-not=denormal-fp-math %s --check-prefixes=CHECK,INTERNALIZE + + +// Check an explicit full ieee request +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \ +// RUN: -fdenormal-fp-math=ieee \ +// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc \ +// RUN: -emit-llvm %s -o - | FileCheck -implicit-check-not=denormal-fp-math %s --check-prefixes=CHECK,INTERNALIZE + + +// Check explicit f32-only flushing request +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ +// RUN: -fcuda-is-device -fdenormal-fp-math-f32=preserve-sign \ +// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \ +// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF64-PSZF32 + + +// Check explicit flush all request. Only the f32 component of the library is +// dynamic, so the linked functions should use IEEE as the base mode and the new +// functions preserve-sign. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ +// RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign \ +// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \ +// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,PSZ + + +// Check explicit f32-only, ieee-other flushing request +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ +// RUN: -fcuda-is-device -fdenormal-fp-math=ieee -fdenormal-fp-math-f32=preserve-sign \ +// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \ +// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF64-PSZF32 + + +// Check inverse of normal usage. Requesting IEEE f32, with flushed f16/f64 +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ +// RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \ +// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \ +// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF32-PSZF64-DYNF32 + + +// Check backwards from the normal usage where both library components can be +// overridden. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ +// RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \ +// RUN: -mlink-builtin-bitcode %t.dynamic.full.bc -emit-llvm %s -o - \ +// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF32-PSZF64-DYNFULL + + + +// Check the case where no internalization is performed +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ +// RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \ +// RUN: -mlink-bitcode-file %t.dynamic.full.bc -emit-llvm %s -o - \ +// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,NOINTERNALIZE,NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL + + + +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) + +typedef _Float16 half; + +extern "C" { +__device__ half do_f16_stuff(half a, half b, half c); +__device__ float do_f32_stuff(float a, float b, float c); + +// Currently all library functions are internalized. Check a weak function in +// case we ever choose to not internalize these. In that case, the safest thing +// to do would likely be to preserve the dynamic denormal-fp-math. +__attribute__((weak)) __device__ float weak_do_f32_stuff(float a, float b, float c); +__device__ double do_f64_stuff(double a, double b, double c); + + + // CHECK: kernel_f16({{.*}}) #[[$KERNELATTR:[0-9]+]] +__global__ void kernel_f16(float* out, float* a, float* b, float* c) { + int id = 0; + out[id] = do_f16_stuff(a[id], b[id], c[id]); +} + +// CHECK: kernel_f32({{.*}}) #[[$KERNELATTR]] +__global__ void kernel_f32(float* out, float* a, float* b, float* c) { + int id = 0; + out[id] = do_f32_stuff(a[id], b[id], c[id]); + out[id] += weak_do_f32_stuff(a[id], b[id], c[id]); +} + +// CHECK: kernel_f64({{.*}}) #[[$KERNELATTR]] +__global__ void kernel_f64(double* out, double* a, double* b, double* c) { + int id = 0; + out[id] = do_f64_stuff(a[id], b[id], c[id]); +} +} + +// INTERNALIZE: define internal half @do_f16_stuff({{.*}}) #[[$FUNCATTR:[0-9]+]] +// INTERNALIZE: define internal float @do_f32_stuff({{.*}}) #[[$FUNCATTR]] +// INTERNALIZE: define internal double @do_f64_stuff({{.*}}) #[[$FUNCATTR]] +// INTERNALIZE: define internal float @weak_do_f32_stuff({{.*}}) #[[$WEAK_FUNCATTR:[0-9]+]] + + +// NOINTERNALIZE: define dso_local half @do_f16_stuff({{.*}}) #[[$FUNCATTR:[0-9]+]] +// NOINTERNALIZE: define dso_local float @do_f32_stuff({{.*}}) #[[$FUNCATTR]] +// NOINTERNALIZE: define dso_local double @do_f64_stuff({{.*}}) #[[$FUNCATTR]] +// NOINTERNALIZE: define weak float @weak_do_f32_stuff({{.*}}) #[[$WEAK_FUNCATTR:[0-9]+]] + + + +// We should not be littering call sites with the attribute +// Everything should use the default ieee with no explicit attribute + +// FIXME: Should check-not "denormal-fp-math" within the denormal-fp-math-f32 +// lines. + +// Default mode relies on the implicit check-not for the denormal-fp-math. + +// PSZ: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} } +// PSZ: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} } +// PSZ: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} } + +// FIXME: Should check-not "denormal-fp-math" within the line +// IEEEF64-PSZF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} } +// IEEEF64-PSZF32: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} } +// IEEEF64-PSZF32: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" {{.*}} "target-cpu"="gfx803" {{.*}} } + +// IEEEF32-PSZF64-DYNF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} } +// implicit check-not +// implicit check-not + + +// IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} } +// IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} } +// IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} } + + +// -mlink-bitcode-file doesn't internalize or propagate attributes. +// NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} } +// NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="dynamic,dynamic" {{.*}} } +// NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="dynamic,dynamic" {{.*}} } Index: clang/test/Driver/denormal-fp-math.c =================================================================== --- clang/test/Driver/denormal-fp-math.c +++ clang/test/Driver/denormal-fp-math.c @@ -1,6 +1,9 @@ // RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s // RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=preserve-sign -v 2>&1 | FileCheck -check-prefix=CHECK-PS %s // RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=positive-zero -v 2>&1 | FileCheck -check-prefix=CHECK-PZ %s + +// RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=dynamic -v 2>&1 | FileCheck -check-prefix=CHECK-DYNAMIC %s + // RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee -fno-fast-math -v 2>&1 | FileCheck -check-prefix=CHECK-NO-UNSAFE %s // RUN: %clang -### -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=ieee -fno-unsafe-math-optimizations -v 2>&1 | FileCheck -check-prefix=CHECK-NO-UNSAFE %s // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID0 %s @@ -12,6 +15,9 @@ // CHECK-IEEE-NOT: -fdenormal-fp-math= // CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign" // CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero" +// CHECK-DYNAMIC: "-fdenormal-fp-math=dynamic,dynamic" + + // CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee" // CHECK-INVALID0: error: invalid value 'foo' in '-fdenormal-fp-math=foo' // CHECK-INVALID1: error: invalid value 'ieee,foo' in '-fdenormal-fp-math=ieee,foo' Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -2148,31 +2148,36 @@ This indicates the denormal (subnormal) handling that may be assumed for the default floating-point environment. This is a comma separated pair. The elements may be one of ``"ieee"``, - ``"preserve-sign"``, or ``"positive-zero"``. The first entry - indicates the flushing mode for the result of floating point - operations. The second indicates the handling of denormal inputs + ``"preserve-sign"``, ``"positive-zero"``, or ``"dynamic"``. The + first entry indicates the flushing mode for the result of floating + point operations. The second indicates the handling of denormal inputs to floating point instructions. For compatibility with older bitcode, if the second value is omitted, both input and output modes will assume the same mode. - If this is attribute is not specified, the default is - ``"ieee,ieee"``. + If this is attribute is not specified, the default is ``"ieee,ieee"``. If the output mode is ``"preserve-sign"``, or ``"positive-zero"``, denormal outputs may be flushed to zero by standard floating-point operations. It is not mandated that flushing to zero occurs, but if a denormal output is flushed to zero, it must respect the sign - mode. Not all targets support all modes. While this indicates the - expected floating point mode the function will be executed with, - this does not make any attempt to ensure the mode is - consistent. User or platform code is expected to set the floating - point mode appropriately before function entry. - - If the input mode is ``"preserve-sign"``, or ``"positive-zero"``, a - floating-point operation must treat any input denormal value as - zero. In some situations, if an instruction does not respect this - mode, the input may need to be converted to 0 as if by - ``@llvm.canonicalize`` during lowering for correctness. + mode. Not all targets support all modes. + + If the mode is ``"dynamic"``, the behavior is derived from the + dynamic state of the floating-point environment. Transformations + which depend on the behavior of denormal values should not be + performed. + + While this indicates the expected floating point mode the function + will be executed with, this does not make any attempt to ensure + the mode is consistent. User or platform code is expected to set + the floating point mode appropriately before function entry. + + If the input mode is ``"preserve-sign"``, or ``"positive-zero"``, + a floating-point operation must treat any input denormal value as + zero. In some situations, if an instruction does not respect this + mode, the input may need to be converted to 0 as if by + ``@llvm.canonicalize`` during lowering for correctness. ``"denormal-fp-math-f32"`` Same as ``"denormal-fp-math"``, but only controls the behavior of Index: llvm/include/llvm/ADT/FloatingPointMode.h =================================================================== --- llvm/include/llvm/ADT/FloatingPointMode.h +++ llvm/include/llvm/ADT/FloatingPointMode.h @@ -79,7 +79,10 @@ PreserveSign, /// Denormals are flushed to positive zero. - PositiveZero + PositiveZero, + + /// Denormals have unknown treatment. + Dynamic }; /// Denormal flushing mode for floating point instruction results in the @@ -100,6 +103,11 @@ return DenormalMode(DenormalModeKind::Invalid, DenormalModeKind::Invalid); } + /// Return the assumed default mode for a function without denormal-fp-math. + static constexpr DenormalMode getDefault() { + return getIEEE(); + } + static constexpr DenormalMode getIEEE() { return DenormalMode(DenormalModeKind::IEEE, DenormalModeKind::IEEE); } @@ -114,6 +122,10 @@ DenormalModeKind::PositiveZero); } + static constexpr DenormalMode getDynamic() { + return DenormalMode(DenormalModeKind::Dynamic, DenormalModeKind::Dynamic); + } + bool operator==(DenormalMode Other) const { return Output == Other.Output && Input == Other.Input; } @@ -131,6 +143,18 @@ Input != DenormalModeKind::Invalid; } + /// Get the effective denormal mode if the mode if this caller calls into a + /// function with \p Callee. This promotes dynamic modes to the mode of the + /// caller. + DenormalMode mergeCalleeMode(DenormalMode Callee) const { + DenormalMode MergedMode = Callee; + if (Callee.Input == DenormalMode::Dynamic) + MergedMode.Input = Input; + if (Callee.Output == DenormalMode::Dynamic) + MergedMode.Output = Output; + return MergedMode; + } + inline void print(raw_ostream &OS) const; inline std::string str() const { @@ -151,10 +175,11 @@ parseDenormalFPAttributeComponent(StringRef Str) { // Assume ieee on unspecified attribute. return StringSwitch(Str) - .Cases("", "ieee", DenormalMode::IEEE) - .Case("preserve-sign", DenormalMode::PreserveSign) - .Case("positive-zero", DenormalMode::PositiveZero) - .Default(DenormalMode::Invalid); + .Cases("", "ieee", DenormalMode::IEEE) + .Case("preserve-sign", DenormalMode::PreserveSign) + .Case("positive-zero", DenormalMode::PositiveZero) + .Case("dynamic", DenormalMode::Dynamic) + .Default(DenormalMode::Invalid); } /// Return the name used for the denormal handling mode used by the the @@ -167,6 +192,8 @@ return "preserve-sign"; case DenormalMode::PositiveZero: return "positive-zero"; + case DenormalMode::Dynamic: + return "dynamic"; default: return ""; } Index: llvm/include/llvm/IR/Attributes.td =================================================================== --- llvm/include/llvm/IR/Attributes.td +++ llvm/include/llvm/IR/Attributes.td @@ -41,6 +41,9 @@ /// StringBool attribute. class StrBoolAttr : Attr; +/// Arbitrary string attribute. +class ComplexStrAttr P> : Attr; + /// Target-independent enum attributes. /// Alignment of parameter (5 bits) stored as log2 of alignment with +1 bias. @@ -318,6 +321,9 @@ def ProfileSampleAccurate : StrBoolAttr<"profile-sample-accurate">; def UseSampleProfile : StrBoolAttr<"use-sample-profile">; +def DenormalFPMath : ComplexStrAttr<"denormal-fp-math", [FnAttr]>; +def DenormalFPMathF32 : ComplexStrAttr<"denormal-fp-math-f32", [FnAttr]>; + class CompatRule { // The name of the function called to check the attribute of the caller and // callee and decide whether inlining should be allowed. The function's @@ -337,6 +343,8 @@ def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; +def : CompatRule<"checkDenormMode">; + class MergeRule { // The name of the function called to merge the attributes of the caller and Index: llvm/include/llvm/IR/Function.h =================================================================== --- llvm/include/llvm/IR/Function.h +++ llvm/include/llvm/IR/Function.h @@ -649,6 +649,15 @@ /// function. DenormalMode getDenormalMode(const fltSemantics &FPType) const; + /// Return the representational value of "denormal-fp-math". Code interested + /// in the semantics of the function should use getDenormalMode instead. + DenormalMode getDenormalModeRaw() const; + + /// Return the representational value of "denormal-fp-math-f32". Code + /// interested in the semantics of the function should use getDenormalMode + /// instead. + DenormalMode getDenormalModeF32Raw() const; + /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a Function) from the Function Src to this one. void copyAttributesFrom(const Function *Src); Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -1323,7 +1323,11 @@ // Flush any denormal constant float input according to denormal handling // mode. Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false); + if (!Ops0) + return nullptr; Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false); + if (!Ops1) + return nullptr; return ConstantExpr::getCompare(Predicate, Ops0, Ops1); } @@ -1358,6 +1362,10 @@ return Operand; const APFloat &APF = CFP->getValueAPF(); + // TODO: Should this canonicalize nans? + if (!APF.isDenormal()) + return Operand; + Type *Ty = CFP->getType(); DenormalMode DenormMode = I->getFunction()->getDenormalMode(Ty->getFltSemantics()); @@ -1366,7 +1374,8 @@ switch (Mode) { default: llvm_unreachable("unknown denormal mode"); - return Operand; + case DenormalMode::Dynamic: + return nullptr; case DenormalMode::IEEE: return Operand; case DenormalMode::PreserveSign: @@ -1392,7 +1401,11 @@ if (Instruction::isBinaryOp(Opcode)) { // Flush denormal inputs if needed. Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false); + if (!Op0) + return nullptr; Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false); + if (!Op1) + return nullptr; // Calculate constant result. Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL); @@ -1966,13 +1979,26 @@ if (Src.isDenormal() && CI->getParent() && CI->getFunction()) { DenormalMode DenormMode = CI->getFunction()->getDenormalMode(Src.getSemantics()); + + // TODO: Should allow folding for pure IEEE. if (DenormMode == DenormalMode::getIEEE()) return nullptr; + if (DenormMode == DenormalMode::getDynamic()) + return nullptr; + + // If we know if either input or output is flushed, we can fold. + if ((DenormMode.Input == DenormalMode::Dynamic && + DenormMode.Output == DenormalMode::IEEE) || + (DenormMode.Input == DenormalMode::IEEE && + DenormMode.Output == DenormalMode::Dynamic)) + return nullptr; + bool IsPositive = (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero || (DenormMode.Output == DenormalMode::PositiveZero && DenormMode.Input == DenormalMode::IEEE)); + return ConstantFP::get(CI->getContext(), APFloat::getZero(Src.getSemantics(), !IsPositive)); } Index: llvm/lib/CodeGen/CommandFlags.cpp =================================================================== --- llvm/lib/CodeGen/CommandFlags.cpp +++ llvm/lib/CodeGen/CommandFlags.cpp @@ -241,14 +241,15 @@ cl::init(false)); CGBINDOPT(EnableNoTrappingFPMath); - static const auto DenormFlagEnumOptions = - cl::values(clEnumValN(DenormalMode::IEEE, "ieee", - "IEEE 754 denormal numbers"), - clEnumValN(DenormalMode::PreserveSign, "preserve-sign", - "the sign of a flushed-to-zero number is preserved " - "in the sign of 0"), - clEnumValN(DenormalMode::PositiveZero, "positive-zero", - "denormals are flushed to positive zero")); + static const auto DenormFlagEnumOptions = cl::values( + clEnumValN(DenormalMode::IEEE, "ieee", "IEEE 754 denormal numbers"), + clEnumValN(DenormalMode::PreserveSign, "preserve-sign", + "the sign of a flushed-to-zero number is preserved " + "in the sign of 0"), + clEnumValN(DenormalMode::PositiveZero, "positive-zero", + "denormals are flushed to positive zero"), + clEnumValN(DenormalMode::Dynamic, "dynamic", + "denormals have unknown treatment")); // FIXME: Doesn't have way to specify separate input and output modes. static cl::opt DenormalFPMath( Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6747,20 +6747,23 @@ EVT VT = Op.getValueType(); EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + + // This is specifically a check for the handling of denormal inputs, not the + // result. + if (Mode.Input == DenormalMode::PreserveSign || + Mode.Input == DenormalMode::PositiveZero) { + // Test = X == 0.0 + return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); + } + // Testing it with denormal inputs to avoid wrong estimate. - if (Mode.Input == DenormalMode::IEEE) { - // This is specifically a check for the handling of denormal inputs, - // not the result. - - // Test = fabs(X) < SmallestNormal - const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); - APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); - SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); - SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); - return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); - } - // Test = X == 0.0 - return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); + // + // Test = fabs(X) < SmallestNormal + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); + SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); + SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); + return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); } SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, Index: llvm/lib/IR/Attributes.cpp =================================================================== --- llvm/lib/IR/Attributes.cpp +++ llvm/lib/IR/Attributes.cpp @@ -1943,6 +1943,37 @@ return AM; } +/// Callees with dynamic denormal modes are compatible with any caller mode. +static bool denormModeCompatible(DenormalMode CallerMode, + DenormalMode CalleeMode) { + if (CallerMode == CalleeMode || CalleeMode == DenormalMode::getDynamic()) + return true; + + // If they don't exactly match, it's OK if the mismatched component is + // dynamic. + if (CalleeMode.Input == CallerMode.Input && + CalleeMode.Output == DenormalMode::Dynamic) + return true; + + if (CalleeMode.Output == CallerMode.Output && + CalleeMode.Input == DenormalMode::Dynamic) + return true; + return false; +} + +static bool checkDenormMode(const Function &Caller, const Function &Callee) { + DenormalMode CallerMode = Caller.getDenormalModeRaw(); + DenormalMode CalleeMode = Callee.getDenormalModeRaw(); + + if (denormModeCompatible(CallerMode, CalleeMode)) { + DenormalMode CallerModeF32 = Caller.getDenormalModeF32Raw(); + DenormalMode CalleeModeF32 = Callee.getDenormalModeF32Raw(); + return denormModeCompatible(CallerModeF32, CalleeModeF32); + } + + return false; +} + template static bool isEqual(const Function &Caller, const Function &Callee) { return Caller.getFnAttribute(AttrClass::getKind()) == Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -698,17 +698,30 @@ DenormalMode Function::getDenormalMode(const fltSemantics &FPType) const { if (&FPType == &APFloat::IEEEsingle()) { - Attribute Attr = getFnAttribute("denormal-fp-math-f32"); - StringRef Val = Attr.getValueAsString(); - if (!Val.empty()) - return parseDenormalFPAttribute(Val); - + DenormalMode Mode = getDenormalModeF32Raw(); // If the f32 variant of the attribute isn't specified, try to use the // generic one. + if (Mode.isValid()) + return Mode; } + return getDenormalModeRaw(); +} + +DenormalMode Function::getDenormalModeRaw() const { Attribute Attr = getFnAttribute("denormal-fp-math"); - return parseDenormalFPAttribute(Attr.getValueAsString()); + StringRef Val = Attr.getValueAsString(); + return parseDenormalFPAttribute(Val); +} + +DenormalMode Function::getDenormalModeF32Raw() const { + Attribute Attr = getFnAttribute("denormal-fp-math-f32"); + if (Attr.isValid()) { + StringRef Val = Attr.getValueAsString(); + return parseDenormalFPAttribute(Val); + } + + return DenormalMode::getInvalid(); } const std::string &Function::getGC() const { Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1369,35 +1369,10 @@ return FP_DENORM_FLUSH_NONE; } - /// Returns true if a flag is compatible if it's enabled in the callee, but - /// disabled in the caller. - static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { - return CallerMode == CalleeMode || (!CallerMode && CalleeMode); - } - // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should // be able to override. bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { - if (DX10Clamp != CalleeMode.DX10Clamp) - return false; - if (IEEE != CalleeMode.IEEE) - return false; - - // Allow inlining denormals enabled into denormals flushed functions. - return oneWayCompatible(FP64FP16Denormals.Input != - DenormalMode::PreserveSign, - CalleeMode.FP64FP16Denormals.Input != - DenormalMode::PreserveSign) && - oneWayCompatible(FP64FP16Denormals.Output != - DenormalMode::PreserveSign, - CalleeMode.FP64FP16Denormals.Output != - DenormalMode::PreserveSign) && - oneWayCompatible(FP32Denormals.Input != DenormalMode::PreserveSign, - CalleeMode.FP32Denormals.Input != - DenormalMode::PreserveSign) && - oneWayCompatible(FP32Denormals.Output != DenormalMode::PreserveSign, - CalleeMode.FP32Denormals.Output != - DenormalMode::PreserveSign); + return DX10Clamp == CalleeMode.DX10Clamp && IEEE == CalleeMode.IEEE; } }; Index: llvm/test/CodeGen/Generic/denormal-fp-math-cl-opt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/denormal-fp-math-cl-opt.ll @@ -0,0 +1,9 @@ +; RUN: llc -denormal-fp-math=dynamic --denormal-fp-math-f32=preserve-sign -stop-after=finalize-isel < %s | FileCheck %s + +; Check that the command line flag annotates the IR with the +; appropriate attributes. + +; CHECK: attributes #0 = { "denormal-fp-math"="dynamic,dynamic" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } +define float @foo(float %var) { + ret float %var +} Index: llvm/test/CodeGen/X86/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -183,8 +183,8 @@ ret <4 x float> %call } -define <4 x float> @sqrt_v4f32_check_denorms_ninf(<4 x float> %x) #3 { -; SSE-LABEL: sqrt_v4f32_check_denorms_ninf: +define <4 x float> @sqrt_v4f32_check_denorms_ieee_ninf(<4 x float> %x) #3 { +; SSE-LABEL: sqrt_v4f32_check_denorms_ieee_ninf: ; SSE: # %bb.0: ; SSE-NEXT: rsqrtps %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 @@ -201,7 +201,7 @@ ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: sqrt_v4f32_check_denorms_ninf: +; AVX1-LABEL: sqrt_v4f32_check_denorms_ieee_ninf: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtps %xmm0, %xmm1 ; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm2 @@ -215,7 +215,58 @@ ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: sqrt_v4f32_check_denorms_ninf: +; AVX512-LABEL: sqrt_v4f32_check_denorms_ieee_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vrsqrtps %xmm0, %xmm1 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] +; AVX512-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; AVX512-NEXT: vmulps %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vmulps %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] +; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] +; AVX512-NEXT: vcmpleps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %call = tail call ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2 + ret <4 x float> %call +} + +define <4 x float> @sqrt_v4f32_check_denorms_dynamic_ninf(<4 x float> %x) #6 { +; SSE-LABEL: sqrt_v4f32_check_denorms_dynamic_ninf: +; SSE: # %bb.0: +; SSE-NEXT: rsqrtps %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: mulps %xmm1, %xmm2 +; SSE-NEXT: movaps {{.*#+}} xmm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; SSE-NEXT: mulps %xmm2, %xmm3 +; SSE-NEXT: mulps %xmm1, %xmm2 +; SSE-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: mulps %xmm3, %xmm2 +; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] +; SSE-NEXT: cmpleps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: sqrt_v4f32_check_denorms_dynamic_ninf: +; AVX1: # %bb.0: +; AVX1-NEXT: vrsqrtps %xmm0, %xmm1 +; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX1-NEXT: vmulps %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vmulps %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] +; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: sqrt_v4f32_check_denorms_dynamic_ninf: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtps %xmm0, %xmm1 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm2 @@ -971,6 +1022,7 @@ attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" } attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" } attributes #2 = { nounwind readnone } -attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee" } +attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,ieee" } attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" } attributes #5 = { "unsafe-fp-math"="true" "reciprocal-estimates"="all:0" } +attributes #6 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,dynamic" } Index: llvm/test/Transforms/Inline/AMDGPU/inline-denormal-fp-math.ll =================================================================== --- llvm/test/Transforms/Inline/AMDGPU/inline-denormal-fp-math.ll +++ llvm/test/Transforms/Inline/AMDGPU/inline-denormal-fp-math.ll @@ -37,9 +37,45 @@ ret i32 4 } +define i32 @func_dynamic_dynamic() #4 { +; CHECK-LABEL: @func_dynamic_dynamic( +; CHECK-NEXT: ret i32 5 +; + ret i32 5 +} + +define i32 @func_dynamic_ieee() #5 { +; CHECK-LABEL: @func_dynamic_ieee( +; CHECK-NEXT: ret i32 6 +; + ret i32 6 +} + +define i32 @func_ieee_dynamic() #6 { +; CHECK-LABEL: @func_ieee_dynamic( +; CHECK-NEXT: ret i32 7 +; + ret i32 7 +} + +define i32 @func_psz_dynamic() #7 { +; CHECK-LABEL: @func_psz_dynamic( +; CHECK-NEXT: ret i32 8 +; + ret i32 8 +} + +define i32 @func_dynamic_psz() #8 { +; CHECK-LABEL: @func_dynamic_psz( +; CHECK-NEXT: ret i32 9 +; + ret i32 9 +} + define i32 @call_default_from_psz_psz() #1 { ; CHECK-LABEL: @call_default_from_psz_psz( -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_default() +; CHECK-NEXT: ret i32 [[CALL]] ; %call = call i32 @func_default() ret i32 %call @@ -55,7 +91,8 @@ define i32 @call_ieee_ieee_from_psz_psz() #1 { ; CHECK-LABEL: @call_ieee_ieee_from_psz_psz( -; CHECK-NEXT: ret i32 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_ieee() +; CHECK-NEXT: ret i32 [[CALL]] ; %call = call i32 @func_ieee_ieee() ret i32 %call @@ -80,7 +117,8 @@ define i32 @call_psz_ieee_from_psz_psz() #1 { ; CHECK-LABEL: @call_psz_ieee_from_psz_psz( -; CHECK-NEXT: ret i32 3 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_ieee() +; CHECK-NEXT: ret i32 [[CALL]] ; %call = call i32 @func_psz_ieee() ret i32 %call @@ -88,7 +126,8 @@ define i32 @call_ieee_psz_from_psz_psz() #1 { ; CHECK-LABEL: @call_ieee_psz_from_psz_psz( -; CHECK-NEXT: ret i32 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_psz() +; CHECK-NEXT: ret i32 [[CALL]] ; %call = call i32 @func_ieee_psz() ret i32 %call @@ -114,7 +153,8 @@ define i32 @call_ieee_ieee_from_psz_ieee() #2 { ; CHECK-LABEL: @call_ieee_ieee_from_psz_ieee( -; CHECK-NEXT: ret i32 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_ieee() +; CHECK-NEXT: ret i32 [[CALL]] ; %call = call i32 @func_ieee_ieee() ret i32 %call @@ -148,7 +188,8 @@ define i32 @call_ieee_ieee_from_ieee_psz() #3 { ; CHECK-LABEL: @call_ieee_ieee_from_ieee_psz( -; CHECK-NEXT: ret i32 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_ieee() +; CHECK-NEXT: ret i32 [[CALL]] ; %call = call i32 @func_ieee_ieee() ret i32 %call @@ -180,7 +221,614 @@ ret i32 %call } +define i32 @call_dynamic_dynamic_from_ieee_ieee() #0 { +; CHECK-LABEL: @call_dynamic_dynamic_from_ieee_ieee( +; CHECK-NEXT: ret i32 5 +; + %call = call i32 @func_dynamic_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_ieee_ieee() #0 { +; CHECK-LABEL: @call_dynamic_ieee_from_ieee_ieee( +; CHECK-NEXT: ret i32 6 +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_ieee_ieee() #0 { +; CHECK-LABEL: @call_ieee_dynamic_from_ieee_ieee( +; CHECK-NEXT: ret i32 7 +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_ieee_ieee() #0 { +; CHECK-LABEL: @call_dynamic_psz_from_ieee_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_ieee_ieee() #0 { +; CHECK-LABEL: @call_psz_dynamic_from_ieee_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_dynamic_from_psz_psz() #1 { +; CHECK-LABEL: @call_dynamic_dynamic_from_psz_psz( +; CHECK-NEXT: ret i32 5 +; + %call = call i32 @func_dynamic_dynamic() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_psz_psz() #1 { +; CHECK-LABEL: @call_ieee_dynamic_from_psz_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_psz_psz() #1 { +; CHECK-LABEL: @call_dynamic_ieee_from_psz_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_psz_psz() #1 { +; CHECK-LABEL: @call_psz_dynamic_from_psz_psz( +; CHECK-NEXT: ret i32 8 +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_psz_psz() #1 { +; CHECK-LABEL: @call_dynamic_psz_from_psz_psz( +; CHECK-NEXT: ret i32 9 +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_dynamic_dynamic_from_psz_ieee() #2 { +; CHECK-LABEL: @call_dynamic_dynamic_from_psz_ieee( +; CHECK-NEXT: ret i32 5 +; + %call = call i32 @func_dynamic_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_psz_ieee() #2 { +; CHECK-LABEL: @call_dynamic_ieee_from_psz_ieee( +; CHECK-NEXT: ret i32 6 +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_psz_ieee() #2 { +; CHECK-LABEL: @call_ieee_dynamic_from_psz_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_psz_ieee() #2 { +; CHECK-LABEL: @call_dynamic_psz_from_psz_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_psz_ieee() #2 { +; CHECK-LABEL: @call_psz_dynamic_from_psz_ieee( +; CHECK-NEXT: ret i32 8 +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_dynamic_from_ieee_psz() #3 { +; CHECK-LABEL: @call_dynamic_dynamic_from_ieee_psz( +; CHECK-NEXT: ret i32 5 +; + %call = call i32 @func_dynamic_dynamic() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_ieee_psz() #3 { +; CHECK-LABEL: @call_ieee_dynamic_from_ieee_psz( +; CHECK-NEXT: ret i32 7 +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_ieee_psz() #3 { +; CHECK-LABEL: @call_dynamic_ieee_from_ieee_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_ieee_psz() #3 { +; CHECK-LABEL: @call_dynamic_psz_from_ieee_psz( +; CHECK-NEXT: ret i32 9 +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_ieee_psz() #3 { +; CHECK-LABEL: @call_psz_dynamic_from_ieee_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_dynamic_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_dynamic_dynamic_from_dynamic_dynamic( +; CHECK-NEXT: ret i32 5 +; + %call = call i32 @func_dynamic_dynamic() + ret i32 %call +} + +define i32 @call_ieee_ieee_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_ieee_ieee_from_dynamic_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_ieee() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_ieee_dynamic_from_dynamic_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_dynamic_ieee_from_dynamic_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_psz_dynamic_from_dynamic_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_dynamic_psz_from_dynamic_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_psz_psz_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_psz_psz_from_dynamic_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_psz() + ret i32 %call +} + +define i32 @call_psz_ieee_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_psz_ieee_from_dynamic_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_ieee() + ret i32 %call +} + +define i32 @call_ieee_psz_from_dynamic_dynamic() #4 { +; CHECK-LABEL: @call_ieee_psz_from_dynamic_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_psz() + ret i32 %call +} + +define i32 @call_ieee_ieee_from_dynamic_ieee() #5 { +; CHECK-LABEL: @call_ieee_ieee_from_dynamic_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_ieee() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_dynamic_ieee() #5 { +; CHECK-LABEL: @call_ieee_dynamic_from_dynamic_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_dynamic_ieee() #5 { +; CHECK-LABEL: @call_dynamic_ieee_from_dynamic_ieee( +; CHECK-NEXT: ret i32 6 +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_dynamic_ieee() #5 { +; CHECK-LABEL: @call_psz_dynamic_from_dynamic_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_dynamic_ieee() #5 { +; CHECK-LABEL: @call_dynamic_psz_from_dynamic_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_psz_psz_from_dynamic_ieee() #5 { +; CHECK-LABEL: @call_psz_psz_from_dynamic_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_psz() + ret i32 %call +} + +define i32 @call_psz_ieee_from_dynamic_ieee() #5 { +; CHECK-LABEL: @call_psz_ieee_from_dynamic_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_ieee() + ret i32 %call +} + +define i32 @call_ieee_psz_from_dynamic_ieee() #5 { +; CHECK-LABEL: @call_ieee_psz_from_dynamic_ieee( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_psz() + ret i32 %call +} + +define i32 @call_ieee_ieee_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_ieee_ieee_from_ieee_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_ieee() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_ieee_dynamic_from_ieee_dynamic( +; CHECK-NEXT: ret i32 7 +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_dynamic_ieee_from_ieee_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_dynamic_dynamic_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_dynamic_dynamic_from_ieee_dynamic( +; CHECK-NEXT: ret i32 5 +; + %call = call i32 @func_dynamic_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_dynamic_psz_from_ieee_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_psz_dynamic_from_ieee_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_psz_psz_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_psz_psz_from_ieee_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_psz() + ret i32 %call +} + +define i32 @call_psz_ieee_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_psz_ieee_from_ieee_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_ieee() + ret i32 %call +} + +define i32 @call_ieee_psz_from_ieee_dynamic() #6 { +; CHECK-LABEL: @call_ieee_psz_from_ieee_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_psz() + ret i32 %call +} + +define i32 @call_ieee_ieee_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_ieee_ieee_from_psz_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_ieee() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_ieee_dynamic_from_psz_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_dynamic_ieee_from_psz_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_dynamic_psz_from_psz_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_psz_dynamic_from_psz_dynamic( +; CHECK-NEXT: ret i32 8 +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_dynamic_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_dynamic_dynamic_from_psz_dynamic( +; CHECK-NEXT: ret i32 5 +; + %call = call i32 @func_dynamic_dynamic() + ret i32 %call +} + +define i32 @call_psz_ieee_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_psz_ieee_from_psz_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_ieee() + ret i32 %call +} + +define i32 @call_ieee_psz_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_ieee_psz_from_psz_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_psz() + ret i32 %call +} + +define i32 @call_psz_psz_from_psz_dynamic() #7 { +; CHECK-LABEL: @call_psz_psz_from_psz_dynamic( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_psz() + ret i32 %call +} + +define i32 @call_ieee_ieee_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_ieee_ieee_from_dynamic_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_ieee() + ret i32 %call +} + +define i32 @call_ieee_psz_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_ieee_psz_from_dynamic_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_psz() + ret i32 %call +} + +define i32 @call_psz_ieee_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_psz_ieee_from_dynamic_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_ieee() + ret i32 %call +} + +define i32 @call_dynamic_dynamic_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_dynamic_dynamic_from_dynamic_psz( +; CHECK-NEXT: ret i32 5 +; + %call = call i32 @func_dynamic_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_psz_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_dynamic_psz_from_dynamic_psz( +; CHECK-NEXT: ret i32 9 +; + %call = call i32 @func_dynamic_psz() + ret i32 %call +} + +define i32 @call_psz_dynamic_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_psz_dynamic_from_dynamic_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_dynamic() + ret i32 %call +} + +define i32 @call_dynamic_ieee_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_dynamic_ieee_from_dynamic_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_dynamic_ieee() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_dynamic_ieee() + ret i32 %call +} + +define i32 @call_ieee_dynamic_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_ieee_dynamic_from_dynamic_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_ieee_dynamic() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_ieee_dynamic() + ret i32 %call +} + +define i32 @call_psz_psz_from_dynamic_psz() #8 { +; CHECK-LABEL: @call_psz_psz_from_dynamic_psz( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func_psz_psz() +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @func_psz_psz() + ret i32 %call +} + +; -------------------------------------------------------------------- +; denormal-fp-math-f32 +; -------------------------------------------------------------------- + +define i32 @func_dynamic_dynamic_f32() #9 { +; CHECK-LABEL: @func_dynamic_dynamic_f32( +; CHECK-NEXT: ret i32 10 +; + ret i32 10 +} + +define i32 @func_psz_psz_f32() #10 { +; CHECK-LABEL: @func_psz_psz_f32( +; CHECK-NEXT: ret i32 11 +; + ret i32 11 +} + +define i32 @call_dynamic_dynamic_from_psz_psz_f32() #10 { +; CHECK-LABEL: @call_dynamic_dynamic_from_psz_psz_f32( +; CHECK-NEXT: ret i32 10 +; + %result = call i32 @func_dynamic_dynamic_f32() + ret i32 %result +} + +define i32 @call_psz_psz_from_psz_psz_f32() #10 { +; CHECK-LABEL: @call_psz_psz_from_psz_psz_f32( +; CHECK-NEXT: ret i32 10 +; + %result = call i32 @func_dynamic_dynamic_f32() + ret i32 %result +} + +define i32 @call_psz_psz_from_ieee_ieee_f32() #11 { +; CHECK-LABEL: @call_psz_psz_from_ieee_ieee_f32( +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @func_psz_psz_f32() +; CHECK-NEXT: ret i32 [[RESULT]] +; + %result = call i32 @func_psz_psz_f32() + ret i32 %result +} + attributes #0 = { "denormal-fp-math"="ieee,ieee" } attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #2 = { "denormal-fp-math"="preserve-sign,ieee" } attributes #3 = { "denormal-fp-math"="ieee,preserve-sign" } +attributes #4 = { "denormal-fp-math"="dynamic,dynamic" } +attributes #5 = { "denormal-fp-math"="dynamic,ieee" } +attributes #6 = { "denormal-fp-math"="ieee,dynamic" } +attributes #7 = { "denormal-fp-math"="preserve-sign,dynamic" } +attributes #8 = { "denormal-fp-math"="dynamic,preserve-sign" } +attributes #9 = { "denormal-fp-math-f32"="dynamic,dynamic" } +attributes #10 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } +attributes #11 = { "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" } Index: llvm/test/Transforms/InstSimplify/canonicalize.ll =================================================================== --- llvm/test/Transforms/InstSimplify/canonicalize.ll +++ llvm/test/Transforms/InstSimplify/canonicalize.ll @@ -146,6 +146,100 @@ ret float %ret } +define float @canonicalize_pos_denorm_dynamic_dynamic() "denormal-fp-math"="dynamic,dynamic" { +; CHECK-LABEL: @canonicalize_pos_denorm_dynamic_dynamic( +; CHECK-NEXT: [[RET:%.*]] = call float @llvm.canonicalize.f32(float 0x380FFFFFC0000000) +; CHECK-NEXT: ret float [[RET]] +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + ret float %ret +} + +define float @canonicalize_neg_denorm_dynamic_dynamic() "denormal-fp-math"="dynamic,dynamic" { +; CHECK-LABEL: @canonicalize_neg_denorm_dynamic_dynamic( +; CHECK-NEXT: [[RET:%.*]] = call float @llvm.canonicalize.f32(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret float [[RET]] +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 -2139095041 to float)) + ret float %ret +} + +; Dynamic output - cannot flush +define float @canonicalize_pos_denorm_dynamic_output() "denormal-fp-math"="dynamic,ieee" { +; CHECK-LABEL: @canonicalize_pos_denorm_dynamic_output( +; CHECK-NEXT: [[RET:%.*]] = call float @llvm.canonicalize.f32(float 0x380FFFFFC0000000) +; CHECK-NEXT: ret float [[RET]] +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + ret float %ret +} + +; Dynamic output - cannot flush +define float @canonicalize_neg_denorm_dynamic_output() "denormal-fp-math"="dynamic,ieee" { +; CHECK-LABEL: @canonicalize_neg_denorm_dynamic_output( +; CHECK-NEXT: [[RET:%.*]] = call float @llvm.canonicalize.f32(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret float [[RET]] +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 -2139095041 to float)) + ret float %ret +} + +; Dynamic input - cannot flush +define float @canonicalize_pos_denorm_dynamic_input() "denormal-fp-math"="ieee,dynamic" { +; CHECK-LABEL: @canonicalize_pos_denorm_dynamic_input( +; CHECK-NEXT: [[RET:%.*]] = call float @llvm.canonicalize.f32(float 0x380FFFFFC0000000) +; CHECK-NEXT: ret float [[RET]] +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + ret float %ret +} + +; Dynamic input - cannot flush +define float @canonicalize_neg_denorm_dynamic_input() "denormal-fp-math"="ieee,dynamic" { +; CHECK-LABEL: @canonicalize_neg_denorm_dynamic_input( +; CHECK-NEXT: [[RET:%.*]] = call float @llvm.canonicalize.f32(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret float [[RET]] +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 -2139095041 to float)) + ret float %ret +} + +; Input is flushed, can fold +define float @canonicalize_pos_denorm_dynamic_output_preserve_sign_input() "denormal-fp-math"="dynamic,preserve-sign" { +; CHECK-LABEL: @canonicalize_pos_denorm_dynamic_output_preserve_sign_input( +; CHECK-NEXT: ret float 0.000000e+00 +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + ret float %ret +} + +; Input is flushed, can fold +define float @canonicalize_neg_denorm_dynamic_output_preserve_sign_input() "denormal-fp-math"="dynamic,preserve-sign" { +; CHECK-LABEL: @canonicalize_neg_denorm_dynamic_output_preserve_sign_input( +; CHECK-NEXT: ret float -0.000000e+00 +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 -2139095041 to float)) + ret float %ret +} + +; Output is known flushed, can fold +define float @canonicalize_pos_preserve_sign_output_denorm_dynamic_input() "denormal-fp-math"="preserve-sign,dynamic" { +; CHECK-LABEL: @canonicalize_pos_preserve_sign_output_denorm_dynamic_input( +; CHECK-NEXT: ret float 0.000000e+00 +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) + ret float %ret +} + +; Output is known flushed, can fold +define float @canonicalize_neg_denorm_preserve_sign_output_dynamic_input() "denormal-fp-math"="preserve-sign,dynamic" { +; CHECK-LABEL: @canonicalize_neg_denorm_preserve_sign_output_dynamic_input( +; CHECK-NEXT: ret float -0.000000e+00 +; + %ret = call float @llvm.canonicalize.f32(float bitcast (i32 -2139095041 to float)) + ret float %ret +} + define float @canonicalize_inf() { ; CHECK-LABEL: @canonicalize_inf( ; CHECK-NEXT: ret float 0x7FF0000000000000 Index: llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll =================================================================== --- llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll +++ llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll @@ -1100,6 +1100,109 @@ ret i1 %cmp } +; ============================================================================ ; +; dynamic mode tests +; ============================================================================ ; + +define float @test_float_fadd_dynamic_ieee() #9 { +; CHECK-LABEL: @test_float_fadd_dynamic_ieee( +; CHECK-NEXT: [[RESULT:%.*]] = fadd float 0xB810000000000000, 0x3800000000000000 +; CHECK-NEXT: ret float [[RESULT]] +; + %result = fadd float 0xB810000000000000, 0x3800000000000000 + ret float %result +} + +define float @test_float_fadd_ieee_dynamic() #10 { +; CHECK-LABEL: @test_float_fadd_ieee_dynamic( +; CHECK-NEXT: [[RESULT:%.*]] = fadd float 0xB810000000000000, 0x3800000000000000 +; CHECK-NEXT: ret float [[RESULT]] +; + %result = fadd float 0xB810000000000000, 0x3800000000000000 + ret float %result +} + +define float @test_float_fadd_dynamic_dynamic() #11 { +; CHECK-LABEL: @test_float_fadd_dynamic_dynamic( +; CHECK-NEXT: [[RESULT:%.*]] = fadd float 0xB810000000000000, 0x3800000000000000 +; CHECK-NEXT: ret float [[RESULT]] +; + %result = fadd float 0xB810000000000000, 0x3800000000000000 + ret float %result +} + +; Check for failed to fold on each operand +define float @test_float_fadd_dynamic_dynamic_commute() #11 { +; CHECK-LABEL: @test_float_fadd_dynamic_dynamic_commute( +; CHECK-NEXT: [[RESULT:%.*]] = fadd float 0x3800000000000000, 0xB810000000000000 +; CHECK-NEXT: ret float [[RESULT]] +; + %result = fadd float 0x3800000000000000, 0xB810000000000000 + ret float %result +} + +define i1 @fcmp_double_dynamic_ieee() #9 { +; CHECK-LABEL: @fcmp_double_dynamic_ieee( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double 0x0008000000000000, 0x0 + ret i1 %cmp +} + +define i1 @fcmp_double_ieee_dynamic() #10 { +; CHECK-LABEL: @fcmp_double_ieee_dynamic( +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double 0x8000000000000, 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp une double 0x0008000000000000, 0x0 + ret i1 %cmp +} + +define i1 @fcmp_double_dynamic_dynamic() #11 { +; CHECK-LABEL: @fcmp_double_dynamic_dynamic( +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double 0x8000000000000, 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp une double 0x0008000000000000, 0x0 + ret i1 %cmp +} + +define i1 @fcmp_double_dynamic_dynamic_commute() #11 { +; CHECK-LABEL: @fcmp_double_dynamic_dynamic_commute( +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double 0.000000e+00, 0x8000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = fcmp une double 0x0, 0x0008000000000000 + ret i1 %cmp +} + +; Output doesn't matter. +define i1 @fcmp_double_dynamic_psz() #12 { +; CHECK-LABEL: @fcmp_double_dynamic_psz( +; CHECK-NEXT: ret i1 false +; + %cmp = fcmp une double 0x0008000000000000, 0x0 + ret i1 %cmp +} + +; Non-denormal values should fold +define float @test_float_fadd_dynamic_dynamic_normals() #11 { +; CHECK-LABEL: @test_float_fadd_dynamic_dynamic_normals( +; CHECK-NEXT: ret float 3.000000e+00 +; + %result = fadd float 1.0, 2.0 + ret float %result +} + +; Non-denormal values should fold +define i1 @fcmp_double_dynamic_dynamic_normals() #11 { +; CHECK-LABEL: @fcmp_double_dynamic_dynamic_normals( +; CHECK-NEXT: ret i1 true +; + %cmp = fcmp une double 1.0, 2.0 + ret i1 %cmp +} + attributes #0 = { nounwind "denormal-fp-math"="ieee,ieee" } attributes #1 = { nounwind "denormal-fp-math"="positive-zero,ieee" } attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,ieee" } @@ -1109,3 +1212,7 @@ attributes #6 = { nounwind "denormal-fp-math"="positive-zero,positive-zero" } attributes #7 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #8 = { nounwind "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="positive-zero,positive-zero" } +attributes #9 = { nounwind "denormal-fp-math"="dynamic,ieee" } +attributes #10 = { nounwind "denormal-fp-math"="ieee,dynamic" } +attributes #11 = { nounwind "denormal-fp-math"="dynamic,dynamic" } +attributes #12 = { nounwind "denormal-fp-math"="dynamic,preserve-sign" } Index: llvm/unittests/ADT/FloatingPointMode.cpp =================================================================== --- llvm/unittests/ADT/FloatingPointMode.cpp +++ llvm/unittests/ADT/FloatingPointMode.cpp @@ -20,6 +20,8 @@ parseDenormalFPAttributeComponent("preserve-sign")); EXPECT_EQ(DenormalMode::PositiveZero, parseDenormalFPAttributeComponent("positive-zero")); + EXPECT_EQ(DenormalMode::Dynamic, + parseDenormalFPAttributeComponent("dynamic")); EXPECT_EQ(DenormalMode::Invalid, parseDenormalFPAttributeComponent("foo")); } @@ -27,6 +29,7 @@ EXPECT_EQ("ieee", denormalModeKindName(DenormalMode::IEEE)); EXPECT_EQ("preserve-sign", denormalModeKindName(DenormalMode::PreserveSign)); EXPECT_EQ("positive-zero", denormalModeKindName(DenormalMode::PositiveZero)); + EXPECT_EQ("dynamic", denormalModeKindName(DenormalMode::Dynamic)); EXPECT_EQ("", denormalModeKindName(DenormalMode::Invalid)); } @@ -54,6 +57,10 @@ EXPECT_EQ(DenormalMode(DenormalMode::PositiveZero, DenormalMode::PositiveZero), parseDenormalFPAttribute("positive-zero,positive-zero")); + EXPECT_EQ(DenormalMode(DenormalMode::Dynamic, DenormalMode::Dynamic), + parseDenormalFPAttribute("dynamic")); + EXPECT_EQ(DenormalMode(DenormalMode::Dynamic, DenormalMode::Dynamic), + parseDenormalFPAttribute("dynamic,dynamic")); EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::PositiveZero), parseDenormalFPAttribute("ieee,positive-zero")); @@ -65,6 +72,10 @@ EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::PreserveSign), parseDenormalFPAttribute("ieee,preserve-sign")); + EXPECT_EQ(DenormalMode(DenormalMode::Dynamic, DenormalMode::PreserveSign), + parseDenormalFPAttribute("dynamic,preserve-sign")); + EXPECT_EQ(DenormalMode(DenormalMode::PreserveSign, DenormalMode::Dynamic), + parseDenormalFPAttribute("preserve-sign,dynamic")); EXPECT_EQ(DenormalMode(DenormalMode::Invalid, DenormalMode::Invalid), parseDenormalFPAttribute("foo")); @@ -102,6 +113,13 @@ EXPECT_EQ( "preserve-sign,positive-zero", DenormalMode(DenormalMode::PreserveSign, DenormalMode::PositiveZero).str()); + + EXPECT_EQ("dynamic,dynamic", + DenormalMode(DenormalMode::Dynamic, DenormalMode::Dynamic).str()); + EXPECT_EQ("ieee,dynamic", + DenormalMode(DenormalMode::IEEE, DenormalMode::Dynamic).str()); + EXPECT_EQ("dynamic,ieee", + DenormalMode(DenormalMode::Dynamic, DenormalMode::IEEE).str()); } TEST(FloatingPointModeTest, DenormalModeIsSimple) { @@ -110,6 +128,10 @@ DenormalMode::Invalid).isSimple()); EXPECT_FALSE(DenormalMode(DenormalMode::PreserveSign, DenormalMode::PositiveZero).isSimple()); + EXPECT_FALSE(DenormalMode(DenormalMode::PreserveSign, DenormalMode::Dynamic) + .isSimple()); + EXPECT_FALSE(DenormalMode(DenormalMode::Dynamic, DenormalMode::PreserveSign) + .isSimple()); } TEST(FloatingPointModeTest, DenormalModeIsValid) { @@ -125,10 +147,76 @@ DenormalMode::getInvalid()); EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::IEEE), DenormalMode::getIEEE()); + EXPECT_EQ(DenormalMode::getIEEE(), DenormalMode::getDefault()); + EXPECT_EQ(DenormalMode(DenormalMode::Dynamic, DenormalMode::Dynamic), + DenormalMode::getDynamic()); EXPECT_EQ(DenormalMode(DenormalMode::PreserveSign, DenormalMode::PreserveSign), DenormalMode::getPreserveSign()); EXPECT_EQ(DenormalMode(DenormalMode::PositiveZero, DenormalMode::PositiveZero), DenormalMode::getPositiveZero()); } +TEST(FloatingPointModeTest, DenormalModeMerge) { + EXPECT_EQ( + DenormalMode::getInvalid(), + DenormalMode::getInvalid().mergeCalleeMode(DenormalMode::getInvalid())); + EXPECT_EQ(DenormalMode::getIEEE(), DenormalMode::getInvalid().mergeCalleeMode( + DenormalMode::getIEEE())); + EXPECT_EQ(DenormalMode::getInvalid(), DenormalMode::getIEEE().mergeCalleeMode( + DenormalMode::getInvalid())); + + EXPECT_EQ(DenormalMode::getIEEE(), DenormalMode::getIEEE().mergeCalleeMode( + DenormalMode::getDynamic())); + EXPECT_EQ(DenormalMode::getPreserveSign(), + DenormalMode::getPreserveSign().mergeCalleeMode( + DenormalMode::getDynamic())); + EXPECT_EQ(DenormalMode::getPositiveZero(), + DenormalMode::getPositiveZero().mergeCalleeMode( + DenormalMode::getDynamic())); + EXPECT_EQ( + DenormalMode::getDynamic(), + DenormalMode::getDynamic().mergeCalleeMode(DenormalMode::getDynamic())); + + EXPECT_EQ(DenormalMode(DenormalMode::IEEE, DenormalMode::PreserveSign), + DenormalMode(DenormalMode::IEEE, DenormalMode::PreserveSign) + .mergeCalleeMode( + DenormalMode(DenormalMode::IEEE, DenormalMode::Dynamic))); + + EXPECT_EQ(DenormalMode(DenormalMode::PreserveSign, DenormalMode::IEEE), + DenormalMode(DenormalMode::PreserveSign, DenormalMode::IEEE) + .mergeCalleeMode( + DenormalMode(DenormalMode::Dynamic, DenormalMode::IEEE))); + + EXPECT_EQ( + DenormalMode(DenormalMode::PositiveZero, DenormalMode::PreserveSign), + DenormalMode(DenormalMode::PositiveZero, DenormalMode::PreserveSign) + .mergeCalleeMode( + DenormalMode(DenormalMode::Dynamic, DenormalMode::Dynamic))); + + EXPECT_EQ( + DenormalMode(DenormalMode::PositiveZero, DenormalMode::PreserveSign), + DenormalMode(DenormalMode::PositiveZero, DenormalMode::PreserveSign) + .mergeCalleeMode( + DenormalMode(DenormalMode::PositiveZero, DenormalMode::Dynamic))); + + EXPECT_EQ( + DenormalMode(DenormalMode::PositiveZero, DenormalMode::PreserveSign), + DenormalMode(DenormalMode::PositiveZero, DenormalMode::PreserveSign) + .mergeCalleeMode( + DenormalMode(DenormalMode::Dynamic, DenormalMode::PreserveSign))); + + // Test some invalid / undefined behavior cases + EXPECT_EQ( + DenormalMode::getPreserveSign(), + DenormalMode::getIEEE().mergeCalleeMode(DenormalMode::getPreserveSign())); + EXPECT_EQ( + DenormalMode::getPreserveSign(), + DenormalMode::getIEEE().mergeCalleeMode(DenormalMode::getPreserveSign())); + EXPECT_EQ( + DenormalMode::getIEEE(), + DenormalMode::getPreserveSign().mergeCalleeMode(DenormalMode::getIEEE())); + EXPECT_EQ( + DenormalMode::getIEEE(), + DenormalMode::getPreserveSign().mergeCalleeMode(DenormalMode::getIEEE())); +} } Index: llvm/utils/TableGen/Attributes.cpp =================================================================== --- llvm/utils/TableGen/Attributes.cpp +++ llvm/utils/TableGen/Attributes.cpp @@ -54,6 +54,7 @@ // Emit attribute enums in the same order llvm::Attribute::operator< expects. Emit({"EnumAttr", "TypeAttr", "IntAttr"}, "ATTRIBUTE_ENUM"); Emit({"StrBoolAttr"}, "ATTRIBUTE_STRBOOL"); + Emit({"ComplexStrAttr"}, "ATTRIBUTE_COMPLEXSTR"); OS << "#undef ATTRIBUTE_ALL\n"; OS << "#endif\n\n";