diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -190,6 +190,8 @@ /// float-to-int conversion instructions. CODEGENOPT(StrictFloatCastOverflow, 1, 1) +CODEGENOPT(UseComplexIntrinsics, 1, 0) ///< Use LLVM complex intrinsics + CODEGENOPT(UniformWGSize , 1, 0) ///< -cl-uniform-work-group-size CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss. /// Method of Objective-C dispatch to use. diff --git a/clang/include/clang/Basic/FPOptions.def b/clang/include/clang/Basic/FPOptions.def --- a/clang/include/clang/Basic/FPOptions.def +++ b/clang/include/clang/Basic/FPOptions.def @@ -17,7 +17,8 @@ OPTION(RoundingMode, LangOptions::RoundingMode, 3, FPContractMode) OPTION(FPExceptionMode, LangOptions::FPExceptionModeKind, 2, RoundingMode) OPTION(AllowFEnvAccess, bool, 1, FPExceptionMode) -OPTION(AllowFPReassociate, bool, 1, AllowFEnvAccess) +OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, AllowFEnvAccess) +OPTION(AllowFPReassociate, bool, 1, ComplexRange) OPTION(NoHonorNaNs, bool, 1, AllowFPReassociate) OPTION(NoHonorInfs, bool, 1, NoHonorNaNs) OPTION(NoSignedZero, bool, 1, NoHonorInfs) diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -238,6 +238,18 @@ /// Possible exception handling behavior. enum class ExceptionHandlingKind { None, SjLj, WinEH, DwarfCFI, Wasm }; + /// Controls for the complex arithmetic range rules. + enum ComplexRangeKind { + /// Disable both NaN checks and scaling + CX_Limited, + + /// Disable only NaN checks + CX_NoNan, + + /// Require NaN checks and scaling, + CX_Full + }; + enum class LaxVectorConversionKind { /// Permit no implicit vector bitcasts. None, @@ -485,7 +497,7 @@ class FPOptions { public: // We start by defining the layout. - using storage_type = uint16_t; + using storage_type = uint32_t; using RoundingMode = llvm::RoundingMode; @@ -529,6 +541,7 @@ setRoundingMode(LO.getFPRoundingMode()); setFPExceptionMode(LO.getFPExceptionMode()); setAllowFPReassociate(LO.AllowFPReassoc); + setComplexRange(LO.getComplexRange()); setNoHonorNaNs(LO.NoHonorNaNs); setNoHonorInfs(LO.NoHonorInfs); setNoSignedZero(LO.NoSignedZero); @@ -611,7 +624,7 @@ /// The type suitable for storing values of FPOptionsOverride. Must be twice /// as wide as bit size of FPOption. - using storage_type = uint32_t; + using storage_type = uint64_t; static_assert(sizeof(storage_type) >= 2 * sizeof(FPOptions::storage_type), "Too short type for FPOptionsOverride"); diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -207,6 +207,8 @@ BENIGN_LANGOPT(NoSignedZero , 1, 0, "Permit Floating Point optimization without regard to signed zeros") BENIGN_LANGOPT(AllowRecip , 1, 0, "Permit Floating Point reciprocal") BENIGN_LANGOPT(ApproxFunc , 1, 0, "Permit Floating Point approximation") +BENIGN_ENUM_LANGOPT(ComplexRange, ComplexRangeKind, 2, CX_Full, + "Range rules for complex multiplications and divisions") BENIGN_LANGOPT(ObjCGCBitmapPrint , 1, 0, "printing of GC's bitmap layout for __weak/__strong ivars") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1841,6 +1841,13 @@ "floating-point expressions are evaluated">, NegFlag>; +defm use_complex_intrinsics : BoolFOption<"use-complex-intrinsics", + CodeGenOpts<"UseComplexIntrinsics">, DefaultFalse, + PosFlag, + NegFlag>; + def ffor_scope : Flag<["-"], "ffor-scope">, Group; def fno_for_scope : Flag<["-"], "fno-for-scope">, Group; diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -88,6 +88,13 @@ ComplexPairTy EmitScalarToComplexCast(llvm::Value *Val, QualType SrcType, QualType DestType, SourceLocation Loc); + /// Convert a LLVM complex type representation into a pair of values + ComplexPairTy BreakLLVMComplexType(llvm::Value *Val) { + llvm::Value *Real = Builder.CreateExtractElement(Val, uint64_t(0)); + llvm::Value *Imag = Builder.CreateExtractElement(Val, uint64_t(1)); + return ComplexPairTy(Real, Imag); + } + //===--------------------------------------------------------------------===// // Visitor Methods //===--------------------------------------------------------------------===// @@ -251,6 +258,7 @@ ComplexPairTy LHS; ComplexPairTy RHS; QualType Ty; // Computation Type. + FPOptions FPFeatures; }; BinOpInfo EmitBinOps(const BinaryOperator *E); @@ -701,6 +709,14 @@ // still more of this within the type system. if (Op.LHS.second && Op.RHS.second) { + if (CGF.CGM.getCodeGenOpts().UseComplexIntrinsics) { + Value *Op0 = Builder.CreateComplexValue(Op.LHS.first, Op.LHS.second); + Value *Op1 = Builder.CreateComplexValue(Op.RHS.first, Op.RHS.second); + Value *Result = Builder.CreateComplexMul(Op0, Op1, + Op.FPFeatures.getComplexRange() != LangOptions::CX_Full); + return BreakLLVMComplexType(Result); + } + // If both operands are complex, emit the core math directly, and then // test for NaNs. If we find NaNs in the result, we delegate to a libcall // to carefully re-compute the correct infinity representation if @@ -794,6 +810,20 @@ llvm::Value *DSTr, *DSTi; if (LHSr->getType()->isFloatingPointTy()) { + // If we are using complex intrinsics, do so whenever the right-hand side + // is complex, since no major simplification is possible in this scenario. + // (Simplifications are possible if the LHS is real or pure imaginary). + if (CGF.CGM.getCodeGenOpts().UseComplexIntrinsics && RHSi) { + llvm::Value *Op0 = + Builder.CreateComplexValue(Op.LHS.first, Op.LHS.second); + llvm::Value *Op1 = + Builder.CreateComplexValue(Op.RHS.first, Op.RHS.second); + llvm::Value *Result = Builder.CreateComplexDiv(Op0, Op1, + Op.FPFeatures.getComplexRange() != LangOptions::CX_Full, + Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited); + return BreakLLVMComplexType(Result); + } + // If we have a complex operand on the RHS and FastMath is not allowed, we // delegate to a libcall to handle all of the complexities and minimize // underflow/overflow cases. When FastMath is allowed we construct the @@ -891,6 +921,7 @@ Ops.RHS = Visit(E->getRHS()); Ops.Ty = E->getType(); + Ops.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); return Ops; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3085,6 +3085,24 @@ if (Args.hasFlag(options::OPT_fno_strict_float_cast_overflow, options::OPT_fstrict_float_cast_overflow, false)) CmdArgs.push_back("-fno-strict-float-cast-overflow"); + + // For now, only enable complex intrinsics by default if we know we can lower + // the call correctly in ExpandComplex. + bool DefaultUseComplexIntrinsics = false; + switch (TC.getArch()) { + case llvm::Triple::x86: + case llvm::Triple::x86_64: + DefaultUseComplexIntrinsics = true; + break; + default: + DefaultUseComplexIntrinsics = false; + break; + } + if (Args.hasFlag(options::OPT_fuse_complex_intrinsics, + options::OPT_fno_use_complex_intrinsics, + DefaultUseComplexIntrinsics)) { + CmdArgs.push_back("-fuse-complex-intrinsics"); + } } static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs, diff --git a/clang/test/CodeGen/complex-intrinsics.c b/clang/test/CodeGen/complex-intrinsics.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/complex-intrinsics.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=NOINTRIN +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s --check-prefix=NOINTRIN +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s --check-prefix=NOINTRIN +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple spir -o - | FileCheck %s --check-prefix=NOINTRIN +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -fuse-complex-intrinsics -o - | FileCheck %s --check-prefix=INTRIN +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-pc-win64 -fuse-complex-intrinsics -o - | FileCheck %s --check-prefix=INTRIN +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -fuse-complex-intrinsics -DT=int -o - | FileCheck %s --check-prefix=INT +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -DT=int -o - | FileCheck %s --check-prefix=INT + +// Check defaults for intrinsics: +// RUN: %clang %s -O0 -S -emit-llvm -target x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=INTRIN +// RUN: %clang %s -O0 -S -emit-llvm -target x86_64-unknown-unknown -fuse-complex-intrinsics -o - | FileCheck %s --check-prefix=INTRIN +// RUN: %clang %s -O0 -S -emit-llvm -target x86_64-unknown-unknown -fno-use-complex-intrinsics -o - | FileCheck %s --check-prefix=NOINTRIN +// RUN: %clang %s -O0 -S -emit-llvm -target aarch64-unknown-unknown -o - | FileCheck %s --check-prefix=NOINTRIN +// RUN: %clang %s -O0 -S -emit-llvm -target aarch64-unknown-unknown -fuse-complex-intrinsics -o - | FileCheck %s --check-prefix=INTRIN +// RUN: %clang %s -O0 -S -emit-llvm -target aarch64-unknown-unknown -fno-use-complex-intrinsics -o - | FileCheck %s --check-prefix=NOINTRIN + +#ifndef T +# define T float +#endif + +T check_var; +// INTRIN: @check_var = {{.*}}global [[T:[a-z0-9]+]] +// NOINTRIN: @check_var = {{.*}}global [[T:[a-z0-9]+]] +// INT: @check_var = {{.*}}global [[T:i[0-9]+]] + +T _Complex add_rc(T a, T _Complex b) { + // INTRIN-LABEL: @add_rc( + // INTRIN-COUNT-1: fadd [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @add_rc( + // NOINTRIN-COUNT-1: fadd [[T]] + // NOINTRIN: ret + // INT-LABEL: @add_rc( + // INT-COUNT-1: add [[T]] + // INT: ret + return a + b; +} + +T _Complex add_cr(T _Complex a, T b) { + // INTRIN-LABEL: @add_cr( + // INTRIN-COUNT-1: fadd [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @add_cr( + // NOINTRIN-COUNT-1: fadd [[T]] + // NOINTRIN: ret + // INT-LABEL: @add_cr( + // INT-COUNT-1: add [[T]] + // INT: ret + return a + b; +} + +T _Complex add_cc(T _Complex a, T _Complex b) { + // INTRIN-LABEL: @add_cc( + // INTRIN-COUNT-2: fadd [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @add_cc( + // NOINTRIN-COUNT-2: fadd [[T]] + // NOINTRIN: ret + // INT-LABEL: @add_cc( + // INT-COUNT-2: add [[T]] + // INT: ret + return a + b; +} + +T _Complex sub_rc(T a, T _Complex b) { + // INTRIN-LABEL: @sub_rc( + // INTRIN: fsub [[T]] + // INTRIN: fneg [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @sub_rc( + // NOINTRIN: fsub [[T]] + // NOINTRIN: fneg [[T]] + // NOINTRIN: ret + // INT-LABEL: @sub_rc( + // INT-COUNT-2: sub [[T]] + // INT: ret + return a - b; +} + +T _Complex sub_cr(T _Complex a, T b) { + // INTRIN-LABEL: @sub_cr( + // INTRIN: fsub [[T]] + // INTRIN-NOT: fsub [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @sub_cr( + // NOINTRIN: fsub [[T]] + // NOINTRIN-NOT: fsub [[T]] + // NOINTRIN: ret + // INT-LABEL: @sub_cr( + // INT-COUNT-2: sub [[T]] + // INT: ret + return a - b; +} + +T _Complex sub_cc(T _Complex a, T _Complex b) { + // INTRIN-LABEL: @sub_cc( + // INTRIN-COUNT-2: fsub [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @sub_cc( + // NOINTRIN-COUNT-2: fsub [[T]] + // NOINTRIN: ret + // INT-LABEL: @sub_cc( + // INT-COUNT-2: sub [[T]] + // INT: ret + return a - b; +} + +T _Complex mul_rc(T a, T _Complex b) { + // INTRIN-LABEL: @mul_rc( + // INTRIN-COUNT-2: fmul [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @mul_rc( + // NOINTRIN-COUNT-2: fmul [[T]] + // NOINTRIN: ret + // INT-LABEL: @mul_rc( + // INT-COUNT-4: mul [[T]] + // INT: ret + return a * b; +} + +T _Complex mul_cr(T _Complex a, T b) { + // INTRIN-LABEL: @mul_cr( + // INTRIN-COUNT-2: fmul [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @mul_cr( + // NOINTRIN-COUNT-2: fmul [[T]] + // NOINTRIN: ret + // INT-LABEL: @mul_cr( + // INT-COUNT-4: mul [[T]] + // INT: ret + return a * b; +} + +T _Complex mul_cc(T _Complex a, T _Complex b) { + // INTRIN-LABEL: @mul_cc( + // INTRIN-NOT: fmul [[T]] + // INTRIN: call {{.*}} @llvm.experimental.complex.fmul + // INTRIN: ret + // NOINTRIN-LABEL: @mul_cc( + // NOINTRIN-COUNT-4: fmul [[T]] + // NOINTRIN: ret + // INT-LABEL: @mul_cc( + // INT-COUNT-4: mul [[T]] + // INT: ret + return a * b; +} + +T _Complex div_rc(T a, T _Complex b) { + // INTRIN-LABEL: @div_rc( + // INTRIN-NOT: fdiv [[T]] + // INTRIN: call {{.*}} @llvm.experimental.complex.fdiv + // INTRIN: ret + // NOINTRIN-LABEL: @div_rc( + // NOINTRIN: call {{.*}} @__div + // NOINTRIN: ret + // INT-LABEL: @div_rc( + // INT-COUNT-6: mul [[T]] + // INT: ret + return a / b; +} + +T _Complex div_cr(T _Complex a, T b) { + // INTRIN-LABEL: @div_cr( + // INTRIN-COUNT-2: fdiv [[T]] + // INTRIN: ret + // NOINTRIN-LABEL: @div_cr( + // NOINTRIN-COUNT-2: fdiv [[T]] + // NOINTRIN: ret + // INT-LABEL: @div_cr( + // INT-COUNT-5: mul [[T]] + // INT: ret + return a / b; +} + +T _Complex div_cc(T _Complex a, T _Complex b) { + // INTRIN-LABEL: @div_cc( + // INTRIN-NOT: fdiv [[T]] + // INTRIN: call {{.*}} @llvm.experimental.complex.fdiv + // INTRIN: ret + // NOINTRIN-LABEL: @div_cc( + // NOINTRIN: call {{.*}} @__div + // NOINTRIN: ret + // INT-LABEL: @div_cc( + // INT-COUNT-6: mul [[T]] + // INT: ret + return a / b; +}