Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1641,6 +1641,12 @@ If a function that has an ``sspstrong`` attribute is inlined into a function that doesn't have an ``sspstrong`` attribute, then the resulting function will have an ``sspstrong`` attribute. +``strictfp`` + This attribute indicates that the function was called from a scope that + requires strict floating point semantics. LLVM will not attempt any + optimizations that require assumptions about the floating point rounding + mode or that might alter the state of floating point status flags that + might otherwise be set or cleared by calling this function. ``"thunk"`` This attribute indicates that the function will delegate to some other function with a tail call. The prototype of a thunk should not be used for Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -558,7 +558,8 @@ ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50, ATTR_KIND_ALLOC_SIZE = 51, ATTR_KIND_WRITEONLY = 52, - ATTR_KIND_SPECULATABLE = 53 + ATTR_KIND_SPECULATABLE = 53, + ATTR_KIND_STRICT_FP = 54, }; enum ComdatSelectionKindCodes { Index: include/llvm/IR/Attributes.td =================================================================== --- include/llvm/IR/Attributes.td +++ include/llvm/IR/Attributes.td @@ -149,6 +149,9 @@ /// Strong Stack protection. def StackProtectStrong : EnumAttr<"sspstrong">; +/// Function was called in a scope requiring strict floating point semantics. +def StrictFP : EnumAttr<"strictfp">; + /// Hidden pointer to structure to return. def StructRet : EnumAttr<"sret">; Index: include/llvm/IR/CallSite.h =================================================================== --- include/llvm/IR/CallSite.h +++ include/llvm/IR/CallSite.h @@ -426,6 +426,11 @@ CALLSITE_DELEGATE_GETTER(isNoBuiltin()); } + /// Return true if the call requires strict floating point semantics. + bool isStrictFP() const { + CALLSITE_DELEGATE_GETTER(isStrictFP()); + } + /// Return true if the call should not be inlined. bool isNoInline() const { CALLSITE_DELEGATE_GETTER(isNoInline()); Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -1751,6 +1751,9 @@ !hasFnAttrImpl(Attribute::Builtin); } + /// Determine if the call requires strict floating point semantics. + bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); } + /// Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { @@ -3838,6 +3841,9 @@ !hasFnAttrImpl(Attribute::Builtin); } + /// Determine if the call requires strict floating point semantics. + bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); } + /// Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { Index: include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -134,6 +134,9 @@ Value *optimizeSqrt(CallInst *CI, IRBuilder<> &B); Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B); Value *optimizeTan(CallInst *CI, IRBuilder<> &B); + // Wrapper for all floating point library call optimizations + Value *optimizeFloatingPointLibCall(CallInst *CI, LibFunc Func, + IRBuilder<> &B); // Integer Library Call Optimizations Value *optimizeFFS(CallInst *CI, IRBuilder<> &B); Index: lib/Analysis/ConstantFolding.cpp =================================================================== --- lib/Analysis/ConstantFolding.cpp +++ lib/Analysis/ConstantFolding.cpp @@ -1359,7 +1359,7 @@ // bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { - if (CS.isNoBuiltin()) + if (CS.isNoBuiltin() || CS.isStrictFP()) return false; switch (F->getIntrinsicID()) { case Intrinsic::fabs: @@ -2066,7 +2066,7 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, ArrayRef Operands, const TargetLibraryInfo *TLI) { - if (CS.isNoBuiltin()) + if (CS.isNoBuiltin() || CS.isStrictFP()) return nullptr; if (!F->hasName()) return nullptr; @@ -2084,7 +2084,7 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap // (and to some extent ConstantFoldScalarCall). - if (CS.isNoBuiltin()) + if (CS.isNoBuiltin() || CS.isStrictFP()) return false; Function *F = CS.getCalledFunction(); if (!F) Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -654,6 +654,7 @@ KEYWORD(ssp); KEYWORD(sspreq); KEYWORD(sspstrong); + KEYWORD(strictfp); KEYWORD(safestack); KEYWORD(sanitize_address); KEYWORD(sanitize_thread); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1121,6 +1121,7 @@ B.addAttribute(Attribute::SanitizeThread); break; case lltok::kw_sanitize_memory: B.addAttribute(Attribute::SanitizeMemory); break; + case lltok::kw_strictfp: B.addAttribute(Attribute::StrictFP); break; case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break; @@ -1446,6 +1447,7 @@ case lltok::kw_sspreq: case lltok::kw_sspstrong: case lltok::kw_safestack: + case lltok::kw_strictfp: case lltok::kw_uwtable: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; @@ -1537,6 +1539,7 @@ case lltok::kw_sspreq: case lltok::kw_sspstrong: case lltok::kw_safestack: + case lltok::kw_strictfp: case lltok::kw_uwtable: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -207,6 +207,7 @@ kw_sret, kw_sanitize_thread, kw_sanitize_memory, + kw_strictfp, kw_swifterror, kw_swiftself, kw_uwtable, Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -1132,6 +1132,7 @@ case Attribute::SwiftError: return 1ULL << 52; case Attribute::WriteOnly: return 1ULL << 53; case Attribute::Speculatable: return 1ULL << 54; + case Attribute::StrictFP: return 1ULL << 55; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); break; @@ -1340,6 +1341,8 @@ return Attribute::StackProtectStrong; case bitc::ATTR_KIND_SAFESTACK: return Attribute::SafeStack; + case bitc::ATTR_KIND_STRICT_FP: + return Attribute::StrictFP; case bitc::ATTR_KIND_STRUCT_RET: return Attribute::StructRet; case bitc::ATTR_KIND_SANITIZE_ADDRESS: Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -598,6 +598,8 @@ return bitc::ATTR_KIND_STACK_PROTECT_STRONG; case Attribute::SafeStack: return bitc::ATTR_KIND_SAFESTACK; + case Attribute::StrictFP: + return bitc::ATTR_KIND_STRICT_FP; case Attribute::StructRet: return bitc::ATTR_KIND_STRUCT_RET; case Attribute::SanitizeAddress: Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6449,10 +6449,10 @@ // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for - // some reason. + // some reason or the call site requires strict floating point semantics. LibFunc Func; - if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && - LibInfo->getLibFunc(*F, Func) && + if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && + F->hasName() && LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; Index: lib/IR/Attributes.cpp =================================================================== --- lib/IR/Attributes.cpp +++ lib/IR/Attributes.cpp @@ -327,6 +327,8 @@ return "sspstrong"; if (hasAttribute(Attribute::SafeStack)) return "safestack"; + if (hasAttribute(Attribute::StrictFP)) + return "strictfp"; if (hasAttribute(Attribute::StructRet)) return "sret"; if (hasAttribute(Attribute::SanitizeThread)) Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -1377,6 +1377,7 @@ case Attribute::InaccessibleMemOrArgMemOnly: case Attribute::AllocSize: case Attribute::Speculatable: + case Attribute::StrictFP: return true; default: break; Index: lib/Transforms/IPO/ForceFunctionAttrs.cpp =================================================================== --- lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -57,6 +57,7 @@ .Case("ssp", Attribute::StackProtect) .Case("sspreq", Attribute::StackProtectReq) .Case("sspstrong", Attribute::StackProtectStrong) + .Case("strictfp", Attribute::StrictFP) .Case("uwtable", Attribute::UWTable) .Default(Attribute::None); } Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -2043,13 +2043,103 @@ return nullptr; } +Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, + LibFunc Func, + IRBuilder<> &Builder) { + // Don't optimize calls that require strict floating point semantics. + if (CI->isStrictFP()) + return nullptr; + + switch (Func) { + case LibFunc_cosf: + case LibFunc_cos: + case LibFunc_cosl: + return optimizeCos(CI, Builder); + case LibFunc_sinpif: + case LibFunc_sinpi: + case LibFunc_cospif: + case LibFunc_cospi: + return optimizeSinCosPi(CI, Builder); + case LibFunc_powf: + case LibFunc_pow: + case LibFunc_powl: + return optimizePow(CI, Builder); + case LibFunc_exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: + return optimizeExp2(CI, Builder); + case LibFunc_fabsf: + case LibFunc_fabs: + case LibFunc_fabsl: + return replaceUnaryCall(CI, Builder, Intrinsic::fabs); + case LibFunc_sqrtf: + case LibFunc_sqrt: + case LibFunc_sqrtl: + return optimizeSqrt(CI, Builder); + case LibFunc_log: + case LibFunc_log10: + case LibFunc_log1p: + case LibFunc_log2: + case LibFunc_logb: + return optimizeLog(CI, Builder); + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + return optimizeTan(CI, Builder); + case LibFunc_ceil: + return replaceUnaryCall(CI, Builder, Intrinsic::ceil); + case LibFunc_floor: + return replaceUnaryCall(CI, Builder, Intrinsic::floor); + case LibFunc_round: + return replaceUnaryCall(CI, Builder, Intrinsic::round); + case LibFunc_nearbyint: + return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); + case LibFunc_rint: + return replaceUnaryCall(CI, Builder, Intrinsic::rint); + case LibFunc_trunc: + return replaceUnaryCall(CI, Builder, Intrinsic::trunc); + case LibFunc_acos: + case LibFunc_acosh: + case LibFunc_asin: + case LibFunc_asinh: + case LibFunc_atan: + case LibFunc_atanh: + case LibFunc_cbrt: + case LibFunc_cosh: + case LibFunc_exp: + case LibFunc_exp10: + case LibFunc_expm1: + case LibFunc_sin: + case LibFunc_sinh: + case LibFunc_tanh: + if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName())) + return optimizeUnaryDoubleFP(CI, Builder, true); + return nullptr; + case LibFunc_copysign: + if (hasFloatVersion(CI->getCalledFunction()->getName())) + return optimizeBinaryDoubleFP(CI, Builder); + return nullptr; + case LibFunc_fminf: + case LibFunc_fmin: + case LibFunc_fminl: + case LibFunc_fmaxf: + case LibFunc_fmax: + case LibFunc_fmaxl: + return optimizeFMinFMax(CI, Builder); + default: + return nullptr; + } +} + Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + // TODO: Split out the code below that operates on FP calls so that + // we can all non-FP calls with the StrictFP attribute to be + // optimized. if (CI->isNoBuiltin()) return nullptr; LibFunc Func; Function *Callee = CI->getCalledFunction(); - StringRef FuncName = Callee->getName(); SmallVector OpBundles; CI->getOperandBundlesAsDefs(OpBundles); @@ -2057,6 +2147,8 @@ bool isCallingConvC = isCallingConvCCompatible(CI); // Command-line parameter overrides instruction attribute. + // This can't be moved to optimizeFloatingPointLibCall() because it may be + // used by the intrinsic optimizations. if (EnableUnsafeFPShrink.getNumOccurrences() > 0) UnsafeFPShrink = EnableUnsafeFPShrink; else if (isa(CI) && CI->hasUnsafeAlgebra()) @@ -2066,6 +2158,8 @@ if (IntrinsicInst *II = dyn_cast(CI)) { if (!isCallingConvC) return nullptr; + // The FP intrinsics have corresponding constrained versions so we don't + // need to check for the StrictFP attribute here. switch (II->getIntrinsicID()) { case Intrinsic::pow: return optimizePow(CI, Builder); @@ -2106,32 +2200,9 @@ return nullptr; if (Value *V = optimizeStringMemoryLibCall(CI, Builder)) return V; + if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder)) + return V; switch (Func) { - case LibFunc_cosf: - case LibFunc_cos: - case LibFunc_cosl: - return optimizeCos(CI, Builder); - case LibFunc_sinpif: - case LibFunc_sinpi: - case LibFunc_cospif: - case LibFunc_cospi: - return optimizeSinCosPi(CI, Builder); - case LibFunc_powf: - case LibFunc_pow: - case LibFunc_powl: - return optimizePow(CI, Builder); - case LibFunc_exp2l: - case LibFunc_exp2: - case LibFunc_exp2f: - return optimizeExp2(CI, Builder); - case LibFunc_fabsf: - case LibFunc_fabs: - case LibFunc_fabsl: - return replaceUnaryCall(CI, Builder, Intrinsic::fabs); - case LibFunc_sqrtf: - case LibFunc_sqrt: - case LibFunc_sqrtl: - return optimizeSqrt(CI, Builder); case LibFunc_ffs: case LibFunc_ffsl: case LibFunc_ffsll: @@ -2160,18 +2231,8 @@ return optimizeFWrite(CI, Builder); case LibFunc_fputs: return optimizeFPuts(CI, Builder); - case LibFunc_log: - case LibFunc_log10: - case LibFunc_log1p: - case LibFunc_log2: - case LibFunc_logb: - return optimizeLog(CI, Builder); case LibFunc_puts: return optimizePuts(CI, Builder); - case LibFunc_tan: - case LibFunc_tanf: - case LibFunc_tanl: - return optimizeTan(CI, Builder); case LibFunc_perror: return optimizeErrorReporting(CI, Builder); case LibFunc_vfprintf: @@ -2179,46 +2240,6 @@ return optimizeErrorReporting(CI, Builder, 0); case LibFunc_fputc: return optimizeErrorReporting(CI, Builder, 1); - case LibFunc_ceil: - return replaceUnaryCall(CI, Builder, Intrinsic::ceil); - case LibFunc_floor: - return replaceUnaryCall(CI, Builder, Intrinsic::floor); - case LibFunc_round: - return replaceUnaryCall(CI, Builder, Intrinsic::round); - case LibFunc_nearbyint: - return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); - case LibFunc_rint: - return replaceUnaryCall(CI, Builder, Intrinsic::rint); - case LibFunc_trunc: - return replaceUnaryCall(CI, Builder, Intrinsic::trunc); - case LibFunc_acos: - case LibFunc_acosh: - case LibFunc_asin: - case LibFunc_asinh: - case LibFunc_atan: - case LibFunc_atanh: - case LibFunc_cbrt: - case LibFunc_cosh: - case LibFunc_exp: - case LibFunc_exp10: - case LibFunc_expm1: - case LibFunc_sin: - case LibFunc_sinh: - case LibFunc_tanh: - if (UnsafeFPShrink && hasFloatVersion(FuncName)) - return optimizeUnaryDoubleFP(CI, Builder, true); - return nullptr; - case LibFunc_copysign: - if (hasFloatVersion(FuncName)) - return optimizeBinaryDoubleFP(CI, Builder); - return nullptr; - case LibFunc_fminf: - case LibFunc_fmin: - case LibFunc_fminl: - case LibFunc_fmaxf: - case LibFunc_fmax: - case LibFunc_fmaxl: - return optimizeFMinFMax(CI, Builder); default: return nullptr; } Index: test/Bitcode/compatibility.ll =================================================================== --- test/Bitcode/compatibility.ll +++ test/Bitcode/compatibility.ll @@ -608,6 +608,7 @@ ; CHECK: declare void @f.inaccessiblememonly() #33 declare void @f.inaccessiblemem_or_argmemonly() inaccessiblemem_or_argmemonly ; CHECK: declare void @f.inaccessiblemem_or_argmemonly() #34 +declare void @f.strictfp() #35 ; Functions -- section declare void @f.section() section "80" @@ -1252,6 +1253,9 @@ call void @f.nobuiltin() builtin ; CHECK: call void @f.nobuiltin() #42 + call void @f.strictfp() strictfp + ; CHECK: call void @f.strictfp() #43 + call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 tail call ghccc nonnull i32* @f.nonnull() minsize @@ -1670,6 +1674,7 @@ ; CHECK: attributes #40 = { writeonly } ; CHECK: attributes #41 = { speculatable } ; CHECK: attributes #42 = { builtin } +; CHECK: attributes #43 = { strictfp } ;; Metadata Index: test/Transforms/DCE/calls-errno.ll =================================================================== --- test/Transforms/DCE/calls-errno.ll +++ test/Transforms/DCE/calls-errno.ll @@ -76,6 +76,10 @@ ; CHECK-NEXT: %cos3 = call double @cos(double 0.000000e+00) %cos3 = call double @cos(double 0.000000e+00) nobuiltin +; cos(1) strictfp sets FP status flags +; CHECK-NEXT: %cos4 = call double @cos(double 1.000000e+00) + %cos4 = call double @cos(double 1.000000e+00) strictfp + ; pow(0, 1) is 0 %pow1 = call double @pow(double 0x7FF0000000000000, double 1.000000e+00) Index: test/Transforms/InstCombine/constant-fold-libfunc.ll =================================================================== --- test/Transforms/InstCombine/constant-fold-libfunc.ll +++ test/Transforms/InstCombine/constant-fold-libfunc.ll @@ -12,9 +12,20 @@ ret double %pi } +; Check that we don't constant fold builtin functions. + define double @test_acos_nobuiltin() { ; CHECK-LABEL: @test_acos_nobuiltin %pi = call double @acos(double -1.000000e+00) nobuiltin ; CHECK: call double @acos(double -1.000000e+00) ret double %pi } + +; Check that we don't constant fold strictfp results that require rounding. + +define double @test_acos_strictfp() { +; CHECK-LABEL: @test_acos_strictfp + %pi = call double @acos(double -1.000000e+00) strictfp +; CHECK: call double @acos(double -1.000000e+00) + ret double %pi +} Index: test/Transforms/InstCombine/memcpy-1.ll =================================================================== --- test/Transforms/InstCombine/memcpy-1.ll +++ test/Transforms/InstCombine/memcpy-1.ll @@ -15,3 +15,13 @@ ret i8* %ret ; CHECK: ret i8* %mem1 } + +; Verify that the strictfp attr doesn't block this optimization. + +define i8* @test_simplify2(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK-LABEL: @test_simplify2( + %ret = call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size) strictfp +; CHECK: call void @llvm.memcpy + ret i8* %ret +; CHECK: ret i8* %mem1 +}