Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -142,6 +142,7 @@ void initializeFinalizeMachineBundlesPass(PassRegistry&); void initializeFlattenCFGPassPass(PassRegistry&); void initializeFloat2IntLegacyPassPass(PassRegistry&); +void initializeForceConstrainedFPPassPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); void initializeForwardControlFlowIntegrityPass(PassRegistry&); void initializeFuncletLayoutPass(PassRegistry&); Index: include/llvm/Transforms/IPO.h =================================================================== --- include/llvm/Transforms/IPO.h +++ include/llvm/Transforms/IPO.h @@ -275,6 +275,9 @@ ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str, raw_ostream *ThinLinkOS = nullptr); +/// Experimental pass to force all FP ops to constrained intrinsics +ModulePass *createForceConstrainedFPPass(); + } // End llvm namespace #endif Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -9,6 +9,7 @@ DeadArgumentElimination.cpp ElimAvailExtern.cpp ExtractGV.cpp + ForceConstrainedFP.cpp ForceFunctionAttrs.cpp FunctionAttrs.cpp FunctionImport.cpp Index: lib/Transforms/IPO/ForceConstrainedFP.cpp =================================================================== --- lib/Transforms/IPO/ForceConstrainedFP.cpp +++ lib/Transforms/IPO/ForceConstrainedFP.cpp @@ -0,0 +1,193 @@ +//===----- ForceConstrainedFP.cpp - Convert all FP ops to constrained -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines a pass that converts all floating point operations to +/// the equivalent constrained intrinsic. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +using namespace llvm; + +#define DEBUG_TYPE "force-constrained-fp" + +STATISTIC(NumFPOpsConverted, "Number of FP ops converted to intrinsics"); + +namespace { + +class ForceConstrainedFPPass : public ModulePass { +public: + static char ID; + + ForceConstrainedFPPass() : ModulePass(ID) { + initializeForceConstrainedFPPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override; + +private: + bool processIntrinsicCall(IntrinsicInst* IntrinCall); + bool processBinaryOperator(BinaryOperator *BinOp); + + void replaceFPInstrWithIntrinsic(Instruction *I, Intrinsic::ID IID, + ArrayRef Operands); +}; + +bool ForceConstrainedFPPass::runOnModule(Module &M) { + bool Changed = false; + + for (auto &F : M.functions()) { + for (auto &BB : F) { + // Because we're erasing instructions, we need to pre-increment + // the iterator. + auto It = BB.begin(); + while (It != BB.end()) { + Instruction &I = *It; + ++It; + if (auto *BinOp = dyn_cast(&I)) + Changed |= processBinaryOperator(BinOp); + else if (auto *IntrinCall = dyn_cast(&I)) + Changed |= processIntrinsicCall(IntrinCall); + } + } + } + + return Changed; +} + +bool ForceConstrainedFPPass::processBinaryOperator(BinaryOperator* BinOp) { + Intrinsic::ID IID; + switch (BinOp->getOpcode()) { + default: + return false; + case Instruction::FAdd: + IID = Intrinsic::experimental_constrained_fadd; + break; + case Instruction::FSub: + IID = Intrinsic::experimental_constrained_fsub; + break; + case Instruction::FMul: + IID = Intrinsic::experimental_constrained_fmul; + break; + case Instruction::FDiv: + IID = Intrinsic::experimental_constrained_fdiv; + break; + case Instruction::FRem: + IID = Intrinsic::experimental_constrained_frem; + break; + } + replaceFPInstrWithIntrinsic(BinOp, IID, { BinOp->getOperand(0), + BinOp->getOperand(1) }); + return true; +} + +bool ForceConstrainedFPPass::processIntrinsicCall(IntrinsicInst* IntrinCall) { + Intrinsic::ID IID; + switch (IntrinCall->getIntrinsicID()) { + default: + return false; + case Intrinsic::fma: + IID = Intrinsic::experimental_constrained_fma; + break; + case Intrinsic::sqrt: + IID = Intrinsic::experimental_constrained_sqrt; + break; + case Intrinsic::pow: + IID = Intrinsic::experimental_constrained_pow; + break; + case Intrinsic::powi: + IID = Intrinsic::experimental_constrained_powi; + break; + case Intrinsic::sin: + IID = Intrinsic::experimental_constrained_sin; + break; + case Intrinsic::cos: + IID = Intrinsic::experimental_constrained_cos; + break; + case Intrinsic::exp: + IID = Intrinsic::experimental_constrained_exp; + break; + case Intrinsic::exp2: + IID = Intrinsic::experimental_constrained_exp2; + break; + case Intrinsic::log: + IID = Intrinsic::experimental_constrained_log; + break; + case Intrinsic::log10: + IID = Intrinsic::experimental_constrained_log10; + break; + case Intrinsic::log2: + IID = Intrinsic::experimental_constrained_log2; + break; + case Intrinsic::rint: + IID = Intrinsic::experimental_constrained_rint; + break; + case Intrinsic::nearbyint: + IID = Intrinsic::experimental_constrained_nearbyint; + break; + } + SmallVector Args(IntrinCall->arg_operands()); + replaceFPInstrWithIntrinsic(IntrinCall, IID, Args); + return true; +} + +void +ForceConstrainedFPPass::replaceFPInstrWithIntrinsic(Instruction *I, + Intrinsic::ID IID, + ArrayRef Operands) { + Module *M = I->getModule(); + LLVMContext &Context = I->getContext(); + Value *RoundingMode = MetadataAsValue::get(Context, + MDString::get(Context, + "round.dynamic")); + Value *ExBehavior = MetadataAsValue::get(Context, + MDString::get(Context, + "fpexcept.strict")); + Function *IntrinFn = Intrinsic::getDeclaration(M, IID, + I->getType()); + + SmallVector Args; + for (auto *Op : Operands) + Args.push_back(Op); + Args.push_back(RoundingMode); + Args.push_back(ExBehavior); + + Value *FPOp = CallInst::Create(IntrinFn, Args, I->getName() + ".strict", I); + + DEBUG(dbgs() << "ForceConstrainedFP: Replacing:\n " << *I + << "\n with:\n " << *FPOp); + ++NumFPOpsConverted; + + I->replaceAllUsesWith(FPOp); + I->eraseFromParent(); +} + +} // End anonymous namespace + +char ForceConstrainedFPPass::ID = 0; +INITIALIZE_PASS(ForceConstrainedFPPass, "force-constrained-fp", + "Force constrained floating point", false, false) + +ModulePass *llvm::createForceConstrainedFPPass() { + return new ForceConstrainedFPPass(); +} Index: lib/Transforms/IPO/IPO.cpp =================================================================== --- lib/Transforms/IPO/IPO.cpp +++ lib/Transforms/IPO/IPO.cpp @@ -30,6 +30,7 @@ initializeCrossDSOCFIPass(Registry); initializeDAEPass(Registry); initializeDAHPass(Registry); + initializeForceConstrainedFPPassPass(Registry); initializeForceFunctionAttrsLegacyPassPass(Registry); initializeGlobalDCELegacyPassPass(Registry); initializeGlobalOptLegacyPassPass(Registry); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -144,6 +144,10 @@ "enable-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN sinking pass (default = off)")); +static cl::opt + ForceConstrainedFP("convert-fp-to-constrained", cl::init(false), cl::Hidden, + cl::desc("Convert all FP ops to constrained intrinsics")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -400,6 +404,10 @@ // Allow forcing function attributes as a debugging and tuning aid. MPM.add(createForceFunctionAttrsLegacyPass()); + // Experimental pass to exercise the constrained FP intrinsics. + if (ForceConstrainedFP) + MPM.add(createForceConstrainedFPPass()); + // If all optimizations are disabled, just run the always-inline pass and, // if enabled, the function merging pass. if (OptLevel == 0) { Index: test/Transforms/force-constrained-fp.ll =================================================================== --- test/Transforms/force-constrained-fp.ll +++ test/Transforms/force-constrained-fp.ll @@ -0,0 +1,91 @@ +; RUN: opt < %s -force-constrained-fp -S 2>&1 | FileCheck %s + +define void @test(float %f, double %d) { + %t1 = fadd float %f, %f +; CHECK: %t1.strict = call float @llvm.experimental.constrained.fadd.f32( +; CHECK: float %f, float %f, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t2 = call double @llvm.sqrt.f64(double %d) +; CHECK: %t2.strict = call double @llvm.experimental.constrained.sqrt.f64( +; CHECK: double %d, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t3 = fsub double %t2, %d +; CHECK: %t3.strict = call double @llvm.experimental.constrained.fsub.f64( +; CHECK: double %t2.strict, double %d, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t4 = fmul float %t1, %f +; CHECK: %t4.strict = call float @llvm.experimental.constrained.fmul.f32( +; CHECK: float %t1.strict, float %f, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t5 = fdiv double %t2, %t3 +; CHECK: %t5.strict = call double @llvm.experimental.constrained.fdiv.f64( +; CHECK: double %t2.strict, double %t3.strict, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t6 = frem double %t2, %t3 +; CHECK: %t6.strict = call double @llvm.experimental.constrained.frem.f64( +; CHECK: double %t2.strict, double %t3.strict, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t7 = call float @llvm.fma.f32(float %t1, float %f, float %t4) +; CHECK: %t7.strict = call float @llvm.experimental.constrained.fma.f32( +; CHECK: float %t1.strict, float %f, float %t4.strict, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t8 = call double @llvm.pow.f64(double %d, double %t3) +; CHECK: %t8.strict = call double @llvm.experimental.constrained.pow.f64( +; CHECK: double %d, double %t3.strict, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t9 = call float @llvm.powi.f32(float %f, i32 4) +; CHECK: %t9.strict = call float @llvm.experimental.constrained.powi.f32( +; CHECK: float %f, i32 4, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t10 = call double @llvm.sin.f64(double %d) +; CHECK: %t10.strict = call double @llvm.experimental.constrained.sin.f64( +; CHECK: double %d, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t11 = call double @llvm.cos.f64(double %d) +; CHECK: %t11.strict = call double @llvm.experimental.constrained.cos.f64( +; CHECK: double %d, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t12 = call float @llvm.exp.f32(float %f) +; CHECK: %t12.strict = call float @llvm.experimental.constrained.exp.f32( +; CHECK: float %f, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t13 = call float @llvm.exp2.f32(float %f) +; CHECK: %t13.strict = call float @llvm.experimental.constrained.exp2.f32( +; CHECK: float %f, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t14 = call double @llvm.log.f64(double %d) +; CHECK: %t14.strict = call double @llvm.experimental.constrained.log.f64( +; CHECK: double %d, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t15 = call double @llvm.log10.f64(double %d) +; CHECK: %t15.strict = call double @llvm.experimental.constrained.log10.f64( +; CHECK: double %d, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t16 = call double @llvm.log2.f64(double %d) +; CHECK: %t16.strict = call double @llvm.experimental.constrained.log2.f64( +; CHECK: double %d, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t17 = call float @llvm.rint.f32(float %f) +; CHECK: %t17.strict = call float @llvm.experimental.constrained.rint.f32( +; CHECK: float %f, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + %t18 = call float @llvm.nearbyint.f32(float %f) +; CHECK: %t18.strict = call float @llvm.experimental.constrained.nearbyint.f32( +; CHECK: float %f, +; CHECK: metadata !"round.dynamic", metadata !"fpexcept.strict") + ret void +} + +declare double @llvm.sqrt.f64(double) +declare float @llvm.fma.f32(float, float, float) +declare double @llvm.pow.f64(double, double) +declare float @llvm.powi.f32(float, i32) +declare double @llvm.sin.f64(double) +declare double @llvm.cos.f64(double) +declare float @llvm.exp.f32(float) +declare float @llvm.exp2.f32(float) +declare double @llvm.log.f64(double) +declare double @llvm.log10.f64(double) +declare double @llvm.log2.f64(double) +declare float @llvm.rint.f32(float) +declare float @llvm.nearbyint.f32(float)