Index: cfe/trunk/lib/CodeGen/CGExprComplex.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGExprComplex.cpp +++ cfe/trunk/lib/CodeGen/CGExprComplex.cpp @@ -15,9 +15,13 @@ #include "CodeGenModule.h" #include "clang/AST/ASTContext.h" #include "clang/AST/StmtVisitor.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include using namespace clang; using namespace CodeGen; @@ -587,11 +591,31 @@ return CGF.EmitCall(FuncInfo, Func, ReturnValueSlot(), Args).getComplexVal(); } +/// \brief Lookup the libcall name for a given floating point type complex +/// multiply. +static StringRef getComplexMultiplyLibCallName(llvm::Type *Ty) { + switch (Ty->getTypeID()) { + default: + llvm_unreachable("Unsupported floating point type!"); + case llvm::Type::HalfTyID: + return "__mulhc3"; + case llvm::Type::FloatTyID: + return "__mulsc3"; + case llvm::Type::DoubleTyID: + return "__muldc3"; + case llvm::Type::PPC_FP128TyID: + return "__multc3"; + case llvm::Type::X86_FP80TyID: + return "__mulxc3"; + } +} + // See C11 Annex G.5.1 for the semantics of multiplicative operators on complex // typed values. ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { using llvm::Value; Value *ResR, *ResI; + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); if (Op.LHS.first->getType()->isFloatingPointTy()) { // The general formulation is: @@ -603,23 +627,65 @@ // still more of this within the type system. if (Op.LHS.second && Op.RHS.second) { - // If both operands are complex, delegate to a libcall which works to - // prevent underflow and overflow. - StringRef LibCallName; - switch (Op.LHS.first->getType()->getTypeID()) { - default: - llvm_unreachable("Unsupported floating point type!"); - case llvm::Type::HalfTyID: - return EmitComplexBinOpLibCall("__mulhc3", Op); - case llvm::Type::FloatTyID: - return EmitComplexBinOpLibCall("__mulsc3", Op); - case llvm::Type::DoubleTyID: - return EmitComplexBinOpLibCall("__muldc3", Op); - case llvm::Type::PPC_FP128TyID: - return EmitComplexBinOpLibCall("__multc3", Op); - case llvm::Type::X86_FP80TyID: - return EmitComplexBinOpLibCall("__mulxc3", Op); - } + // If both operands are complex, emit the core math directly, and then + // test for NaNs. If we find NaNs in the result, we delegate to a libcall + // to carefully re-compute the correct infinity representation if + // possible. The expectation is that the presence of NaNs here is + // *extremely* rare, and so the cost of the libcall is almost irrelevant. + // This is good, because the libcall re-computes the core multiplication + // exactly the same as we do here and re-tests for NaNs in order to be + // a generic complex*complex libcall. + + // First compute the four products. + Value *AC = Builder.CreateFMul(Op.LHS.first, Op.RHS.first, "mul_ac"); + Value *BD = Builder.CreateFMul(Op.LHS.second, Op.RHS.second, "mul_bd"); + Value *AD = Builder.CreateFMul(Op.LHS.first, Op.RHS.second, "mul_ad"); + Value *BC = Builder.CreateFMul(Op.LHS.second, Op.RHS.first, "mul_bc"); + + // The real part is the difference of the first two, the imaginary part is + // the sum of the second. + ResR = Builder.CreateFSub(AC, BD, "mul_r"); + ResI = Builder.CreateFAdd(AD, BC, "mul_i"); + + // Emit the test for the real part becoming NaN and create a branch to + // handle it. We test for NaN by comparing the number to itself. + Value *IsRNaN = Builder.CreateFCmpUNO(ResR, ResR, "isnan_cmp"); + llvm::BasicBlock *ContBB = CGF.createBasicBlock("complex_mul_cont"); + llvm::BasicBlock *INaNBB = CGF.createBasicBlock("complex_mul_imag_nan"); + llvm::Instruction *Branch = Builder.CreateCondBr(IsRNaN, INaNBB, ContBB); + llvm::BasicBlock *OrigBB = Branch->getParent(); + + // Give hint that we very much don't expect to see NaNs. + // Value chosen to match UR_NONTAKEN_WEIGHT, see BranchProbabilityInfo.cpp + llvm::MDNode *BrWeight = MDHelper.createBranchWeights(1, (1U << 20) - 1); + Branch->setMetadata(llvm::LLVMContext::MD_prof, BrWeight); + + // Now test the imaginary part and create its branch. + CGF.EmitBlock(INaNBB); + Value *IsINaN = Builder.CreateFCmpUNO(ResI, ResI, "isnan_cmp"); + llvm::BasicBlock *LibCallBB = CGF.createBasicBlock("complex_mul_libcall"); + Branch = Builder.CreateCondBr(IsINaN, LibCallBB, ContBB); + Branch->setMetadata(llvm::LLVMContext::MD_prof, BrWeight); + + // Now emit the libcall on this slowest of the slow paths. + CGF.EmitBlock(LibCallBB); + Value *LibCallR, *LibCallI; + std::tie(LibCallR, LibCallI) = EmitComplexBinOpLibCall( + getComplexMultiplyLibCallName(Op.LHS.first->getType()), Op); + Builder.CreateBr(ContBB); + + // Finally continue execution by phi-ing together the different + // computation paths. + CGF.EmitBlock(ContBB); + llvm::PHINode *RealPHI = Builder.CreatePHI(ResR->getType(), 3, "real_mul_phi"); + RealPHI->addIncoming(ResR, OrigBB); + RealPHI->addIncoming(ResR, INaNBB); + RealPHI->addIncoming(LibCallR, LibCallBB); + llvm::PHINode *ImagPHI = Builder.CreatePHI(ResI->getType(), 3, "imag_mul_phi"); + ImagPHI->addIncoming(ResI, OrigBB); + ImagPHI->addIncoming(ResI, INaNBB); + ImagPHI->addIncoming(LibCallI, LibCallBB); + return ComplexPairTy(RealPHI, ImagPHI); } assert((Op.LHS.second || Op.RHS.second) && "At least one operand must be complex!"); Index: cfe/trunk/test/CodeGen/complex-math.c =================================================================== --- cfe/trunk/test/CodeGen/complex-math.c +++ cfe/trunk/test/CodeGen/complex-math.c @@ -89,7 +89,17 @@ } float _Complex mul_float_cc(float _Complex a, float _Complex b) { // X86-LABEL: @mul_float_cc( - // X86-NOT: fmul + // X86: %[[AC:[^ ]+]] = fmul + // X86: %[[BD:[^ ]+]] = fmul + // X86: %[[AD:[^ ]+]] = fmul + // X86: %[[BC:[^ ]+]] = fmul + // X86: %[[RR:[^ ]+]] = fsub float %[[AC]], %[[BD]] + // X86: %[[RI:[^ ]+]] = fadd float + // X86-DAG: %[[AD]] + // X86-DAG: , + // X86-DAG: %[[BC]] + // X86: fcmp uno float %[[RR]] + // X86: fcmp uno float %[[RI]] // X86: call {{.*}} @__mulsc3( // X86: ret return a * b; @@ -211,7 +221,17 @@ } double _Complex mul_double_cc(double _Complex a, double _Complex b) { // X86-LABEL: @mul_double_cc( - // X86-NOT: fmul + // X86: %[[AC:[^ ]+]] = fmul + // X86: %[[BD:[^ ]+]] = fmul + // X86: %[[AD:[^ ]+]] = fmul + // X86: %[[BC:[^ ]+]] = fmul + // X86: %[[RR:[^ ]+]] = fsub double %[[AC]], %[[BD]] + // X86: %[[RI:[^ ]+]] = fadd double + // X86-DAG: %[[AD]] + // X86-DAG: , + // X86-DAG: %[[BC]] + // X86: fcmp uno double %[[RR]] + // X86: fcmp uno double %[[RI]] // X86: call {{.*}} @__muldc3( // X86: ret return a * b; @@ -333,11 +353,31 @@ } long double _Complex mul_long_double_cc(long double _Complex a, long double _Complex b) { // X86-LABEL: @mul_long_double_cc( - // X86-NOT: fmul + // X86: %[[AC:[^ ]+]] = fmul + // X86: %[[BD:[^ ]+]] = fmul + // X86: %[[AD:[^ ]+]] = fmul + // X86: %[[BC:[^ ]+]] = fmul + // X86: %[[RR:[^ ]+]] = fsub x86_fp80 %[[AC]], %[[BD]] + // X86: %[[RI:[^ ]+]] = fadd x86_fp80 + // X86-DAG: %[[AD]] + // X86-DAG: , + // X86-DAG: %[[BC]] + // X86: fcmp uno x86_fp80 %[[RR]] + // X86: fcmp uno x86_fp80 %[[RI]] // X86: call {{.*}} @__mulxc3( // X86: ret // PPC-LABEL: @mul_long_double_cc( - // PPC-NOT: fmul + // PPC: %[[AC:[^ ]+]] = fmul + // PPC: %[[BD:[^ ]+]] = fmul + // PPC: %[[AD:[^ ]+]] = fmul + // PPC: %[[BC:[^ ]+]] = fmul + // PPC: %[[RR:[^ ]+]] = fsub ppc_fp128 %[[AC]], %[[BD]] + // PPC: %[[RI:[^ ]+]] = fadd ppc_fp128 + // PPC-DAG: %[[AD]] + // PPC-DAG: , + // PPC-DAG: %[[BC]] + // PPC: fcmp uno ppc_fp128 %[[RR]] + // PPC: fcmp uno ppc_fp128 %[[RI]] // PPC: call {{.*}} @__multc3( // PPC: ret return a * b;