diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -96,8 +96,14 @@ /// Struct representing the available values in the scoped hash table. struct SimpleValue { Instruction *Inst; + unsigned StrictFPGeneration; - SimpleValue(Instruction *I) : Inst(I) { + SimpleValue(Instruction *I) : Inst(I), StrictFPGeneration(0) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + + SimpleValue(Instruction *I, unsigned StrictFPGeneration) + : Inst(I), StrictFPGeneration(StrictFPGeneration) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } @@ -125,7 +131,9 @@ case Intrinsic::experimental_constrained_fcmp: case Intrinsic::experimental_constrained_fcmps: { auto *CFP = cast(CI); - return CFP->isDefaultFPEnvironment(); + if (CFP->getExceptionBehavior().getValue() == fp::ebStrict) + return false; + return true; } } } @@ -138,6 +146,32 @@ isa(Inst) || isa(Inst) || isa(Inst) || isa(Inst); } + static bool canHandle(Instruction *Inst, + enum fp::ExceptionBehavior MaxExceptBehavior) { + bool isOK = canHandle(Inst); + if (isOK) { + if (auto *FPI = dyn_cast(Inst)) { + Optional InstExceptBehavior = + FPI->getExceptionBehavior(); + if (InstExceptBehavior) { + switch (InstExceptBehavior.getValue()) { + case fp::ebIgnore: + // The weakest value is always fine. It is equivalent to a + // normal FP instruction. + break; + case fp::ebMayTrap: + isOK = (MaxExceptBehavior == fp::ebMayTrap || + MaxExceptBehavior == fp::ebStrict); + break; + case fp::ebStrict: + isOK = (MaxExceptBehavior == fp::ebStrict); + break; + } + } + } + } + return isOK; + } }; } // end anonymous namespace @@ -331,6 +365,9 @@ if (LHS.isSentinel() || RHS.isSentinel()) return LHSI == RHSI; + // Non-constrained anything will have a StrictFPGeneration of zero. + if (LHS.StrictFPGeneration != RHS.StrictFPGeneration) + return false; if (LHSI->getOpcode() != RHSI->getOpcode()) return false; if (LHSI->isIdenticalToWhenDefined(RHSI)) @@ -360,6 +397,7 @@ } // TODO: Extend this for >2 args by matching the trailing N-2 args. + // TODO: Constrained FP intrinsics require matching the first two args. auto *LII = dyn_cast(LHSI); auto *RII = dyn_cast(RHSI); if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() && @@ -524,6 +562,7 @@ const SimplifyQuery SQ; MemorySSA *MSSA; std::unique_ptr MSSAUpdater; + bool isFunctionStrictFP; using AllocatorTy = RecyclingAllocator(MSSA)) {} + MSSAUpdater(std::make_unique(MSSA)), isFunctionStrictFP(isFunctionStrictFP) {} bool run(); @@ -636,13 +679,13 @@ public: StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads, InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls, - unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child, + unsigned cg, unsigned sfpcg, DomTreeNode *n, + DomTreeNode::const_iterator child, DomTreeNode::const_iterator end) - : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child), - EndIter(end), - Scopes(AvailableValues, AvailableLoads, AvailableInvariants, - AvailableCalls) - {} + : CurrentGeneration(cg), ChildGeneration(cg), StrictFPGeneration(sfpcg), + ChildStrictFPGeneration(sfpcg), Node(n), ChildIter(child), + EndIter(end), Scopes(AvailableValues, AvailableLoads, + AvailableInvariants, AvailableCalls) {} StackNode(const StackNode &) = delete; StackNode &operator=(const StackNode &) = delete; @@ -650,6 +693,11 @@ unsigned currentGeneration() const { return CurrentGeneration; } unsigned childGeneration() const { return ChildGeneration; } void childGeneration(unsigned generation) { ChildGeneration = generation; } + unsigned strictFPGeneration() const { return StrictFPGeneration; } + unsigned childStrictFPGeneration() const { return ChildStrictFPGeneration; } + void childStrictFPGeneration(unsigned generation) { + ChildStrictFPGeneration = generation; + } DomTreeNode *node() { return Node; } DomTreeNode::const_iterator childIter() const { return ChildIter; } @@ -666,6 +714,8 @@ private: unsigned CurrentGeneration; unsigned ChildGeneration; + unsigned StrictFPGeneration; + unsigned ChildStrictFPGeneration; DomTreeNode *Node; DomTreeNode::const_iterator ChildIter; DomTreeNode::const_iterator EndIter; @@ -821,7 +871,8 @@ const BasicBlock *BB, const BasicBlock *Pred); Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst, - unsigned CurrentGeneration); + unsigned CurrentGeneration, + unsigned StrictFPGeneration); bool overridingStores(const ParseMemoryInst &Earlier, const ParseMemoryInst &Later); @@ -856,6 +907,13 @@ bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration, Instruction *EarlierInst, Instruction *LaterInst); + bool isSameStrictFPGeneration(unsigned EarlierGeneration, + unsigned LaterGeneration, + Instruction *EarlierInst, + Instruction *LaterInst); + + unsigned getStrictFPGeneration(Instruction *I); + bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier, const IntrinsicInst *Later) { auto IsSubmask = [](const Value *Mask0, const Value *Mask1) { @@ -1023,6 +1081,29 @@ return MSSA->dominates(LaterDef, EarlierMA); } +/// Determine if the two instructions are together where CSE is safe. +/// This is before any function calls, just before a return, or between +/// two adjacent function calls. The two adjacent function calls may be +/// on different basic blocks only if there is exactly one path from the +/// first function call to the second function call. +bool EarlyCSE::isSameStrictFPGeneration(unsigned EarlierGeneration, + unsigned LaterGeneration, + Instruction *EarlierInst, + Instruction *LaterInst) { + return (EarlierGeneration == LaterGeneration); +} + +/// Return the value used for isSameStrictFPGeneration(), and for +/// instructions where strictfp is not used return zero. +unsigned EarlyCSE::getStrictFPGeneration(Instruction *I) { + if (ConstrainedFPIntrinsic *CI = dyn_cast(I)) { + if (CI->isDefaultFPEnvironment()) + return 0; + return StrictFPGeneration; + } + return 0; +} + bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) { // A location loaded from with an invariant_load is assumed to *never* change // within the visible scope of the compilation. @@ -1103,7 +1184,8 @@ } Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst, - unsigned CurrentGeneration) { + unsigned CurrentGeneration, + unsigned StrictFPGeneration) { if (InVal.DefInst == nullptr) return nullptr; if (InVal.MatchingId != MemInst.getMatchingId()) @@ -1192,8 +1274,12 @@ // have invalidated the live-out memory values of our parent value. For now, // just be conservative and invalidate memory if this block has multiple // predecessors. - if (!BB->getSinglePredecessor()) + if (!BB->getSinglePredecessor()) { ++CurrentGeneration; + if (StrictFPGeneration > 0) { + ++StrictFPGeneration; + } + } // If this node has a single predecessor which ends in a conditional branch, // we can infer the value of the branch condition given that we took this @@ -1351,34 +1437,61 @@ } // If this is a simple instruction that we can value number, process it. + Value *DeferredElideWith = nullptr; if (SimpleValue::canHandle(&Inst)) { + bool ConstrainedFPMayWriteMemory = false; + if (auto *CI = dyn_cast(&Inst)) { + assert(CI->getExceptionBehavior().getValue() != fp::ebStrict && + "Unexpected ebStrict from SimpleValue::canHandle()"); + if (CI->getExceptionBehavior().getValue() == fp::ebMayTrap) { + // Can write to memory in, for example, an exception handler. + ConstrainedFPMayWriteMemory = true; + } + } // See if the instruction has an available value. If so, use it. - if (Value *V = AvailableValues.lookup(&Inst)) { - LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V - << '\n'); + if (Value *V = AvailableValues.lookup( + SimpleValue(&Inst, getStrictFPGeneration(&Inst)))) { if (!DebugCounter::shouldExecute(CSECounter)) { LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); continue; } - if (auto *I = dyn_cast(V)) - I->andIRFlags(&Inst); - Inst.replaceAllUsesWith(V); - salvageKnowledge(&Inst, &AC); - removeMSSA(Inst); - Inst.eraseFromParent(); - Changed = true; - ++NumCSE; - continue; + if (ConstrainedFPMayWriteMemory) { + DeferredElideWith = V; + LLVM_DEBUG(dbgs() << "EarlyCSE CSE Deferred: " << V << " for " << Inst + << " to: " << '\n'); + } else { + LLVM_DEBUG(dbgs() + << "EarlyCSE CSE: " << Inst << " to: " << *V << '\n'); + if (auto *I = dyn_cast(V)) + I->andIRFlags(&Inst); + Inst.replaceAllUsesWith(V); + salvageKnowledge(&Inst, &AC); + removeMSSA(Inst); + Inst.eraseFromParent(); + Changed = true; + ++NumCSE; + continue; + } } // Otherwise, just remember that this value is available. - AvailableValues.insert(&Inst, &Inst); - continue; + if (!DeferredElideWith) { + AvailableValues.insert(SimpleValue(&Inst, getStrictFPGeneration(&Inst)), + &Inst); + continue; + } + } + else { + if (Inst.getOpcode() == Instruction::Call || + Inst.getOpcode() == Instruction::Invoke) { + if (StrictFPGeneration > 0) + ++StrictFPGeneration; + } } ParseMemoryInst MemInst(&Inst, TTI); // If this is a non-volatile load, process it. - if (MemInst.isValid() && MemInst.isLoad()) { + if (MemInst.isValid() && MemInst.isLoad() && !DeferredElideWith) { // (conservatively) we can't peak past the ordering implied by this // operation, but we can add this load to our set of available values if (MemInst.isVolatile() || !MemInst.isUnordered()) { @@ -1405,8 +1518,10 @@ // we can assume the current load loads the same value as the dominating // load. LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); - if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) { - LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst + if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration, + StrictFPGeneration)) { + LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: (curgen " << CurrentGeneration + << " sfpgen " << StrictFPGeneration << ") " << Inst << " to: " << *InVal.DefInst << '\n'); if (!DebugCounter::shouldExecute(CSECounter)) { LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); @@ -1442,7 +1557,7 @@ LastStore = nullptr; // If this is a read-only call, process it. - if (CallValue::canHandle(&Inst)) { + if (CallValue::canHandle(&Inst) && !DeferredElideWith) { // If we have an available version of this call, and if it is the right // generation, replace this instruction. std::pair InVal = AvailableCalls.lookup(&Inst); @@ -1486,10 +1601,11 @@ // operations, we can remove the write. The primary benefit is in allowing // the available load table to remain valid and value forward past where // the store originally was. - if (MemInst.isValid() && MemInst.isStore()) { + if (MemInst.isValid() && MemInst.isStore() && !DeferredElideWith) { LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.DefInst && - InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) { + InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration, + StrictFPGeneration)) { // It is okay to have a LastStore to a different pointer here if MemorySSA // tells us that the load and store are from the same memory generation. // In that case, LastStore should keep its present value since we're @@ -1518,6 +1634,8 @@ // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. + // Well, constrained FP may end up here, and we do handle that, but we + // still need to treat it like a memory write. if (Inst.mayWriteToMemory()) { ++CurrentGeneration; @@ -1565,6 +1683,19 @@ LastStore = nullptr; } } + if (DeferredElideWith) { + LLVM_DEBUG(dbgs() << "EarlyCSE CSE Finally: " << Inst + << " to: " << *DeferredElideWith << '\n'); + if (auto *I = dyn_cast(DeferredElideWith)) + I->andIRFlags(&Inst); + Inst.replaceAllUsesWith(DeferredElideWith); + salvageKnowledge(&Inst, &AC); + removeMSSA(Inst); + Inst.eraseFromParent(); + Changed = true; + ++NumCSE; + DeferredElideWith = nullptr; + } } return Changed; @@ -1580,10 +1711,11 @@ bool Changed = false; + StrictFPGeneration = (isFunctionStrictFP ? 1 : 0); // Process the root node. nodesToProcess.push_back(new StackNode( AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls, - CurrentGeneration, DT.getRootNode(), + CurrentGeneration, StrictFPGeneration, DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end())); assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it."); @@ -1596,6 +1728,7 @@ // Initialize class members. CurrentGeneration = NodeToProcess->currentGeneration(); + StrictFPGeneration = NodeToProcess->strictFPGeneration(); // Check if the node needs to be processed. if (!NodeToProcess->isProcessed()) { @@ -1609,7 +1742,8 @@ nodesToProcess.push_back( new StackNode(AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls, NodeToProcess->childGeneration(), - child, child->begin(), child->end())); + NodeToProcess->childStrictFPGeneration(), child, + child->begin(), child->end())); } else { // It has been processed, and there are no more children to process, // so delete it and pop it off the stack. @@ -1630,7 +1764,7 @@ auto *MSSA = UseMemorySSA ? &AM.getResult(F).getMSSA() : nullptr; - EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA, F.hasFnAttribute(Attribute::StrictFP)); if (!CSE.run()) return PreservedAnalyses::all(); @@ -1674,7 +1808,7 @@ auto *MSSA = UseMemorySSA ? &getAnalysis().getMSSA() : nullptr; - EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA, F.hasFnAttribute(Attribute::StrictFP)); return CSE.run(); } diff --git a/llvm/test/Transforms/EarlyCSE/maytrap-strictfp.ll b/llvm/test/Transforms/EarlyCSE/maytrap-strictfp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/maytrap-strictfp.ll @@ -0,0 +1,335 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s +; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s + +; Test use of constrained floating point intrinsics in the default +; floating point environment. + +define double @multiple_fadd(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fadd( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %2 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fadd_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fadd_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fsub(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fsub( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %2 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fsub_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fsub_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fmul(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fmul( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %2 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fmul_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fmul_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fdiv(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fdiv( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fdiv_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fdiv_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_frem(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_frem( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %2 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_frem_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_frem_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define i32 @multiple_fptoui(double %a) #0 { +; CHECK-LABEL: @multiple_fptoui( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A:%.*]], metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 + %2 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define i32 @multiple_fptoui_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fptoui_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A:%.*]], metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A]], metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define double @multiple_uitofp(i32 %a) #0 { +; CHECK-LABEL: @multiple_uitofp( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %2 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define double @multiple_uitofp_split(i32 %a) #0 { +; CHECK-LABEL: @multiple_uitofp_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define i32 @multiple_fptosi(double %a) #0 { +; CHECK-LABEL: @multiple_fptosi( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A:%.*]], metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 + %2 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define i32 @multiple_fptosi_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fptosi_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A:%.*]], metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A]], metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define double @multiple_sitofp(i32 %a) #0 { +; CHECK-LABEL: @multiple_sitofp( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define double @multiple_sitofp_split(i32 %a) #0 { +; CHECK-LABEL: @multiple_sitofp_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define i1 @multiple_fcmp(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmp( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +define i1 @multiple_fcmp_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmp_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A]], double [[B]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +define i1 @multiple_fcmps(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmps( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +define i1 @multiple_fcmps_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmps_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double [[B]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + call void @arbitraryfunc() #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +attributes #0 = { strictfp } + +declare void @arbitraryfunc() #0 +declare double @foo.f64(double, double) #0 +declare i32 @bar.i32(i32, i32) #0 + +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata) diff --git a/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll b/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s +; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s + +; Test use of constrained floating point intrinsics in the default +; floating point environment. + +define double @multiple_fadd(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fadd( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fsub(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fsub( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fmul(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fmul( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fdiv(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fdiv( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_frem(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_frem( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define i32 @multiple_fptoui(double %a) #0 { +; CHECK-LABEL: @multiple_fptoui( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A:%.*]], metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A]], metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + %2 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define double @multiple_uitofp(i32 %a) #0 { +; CHECK-LABEL: @multiple_uitofp( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define i32 @multiple_fptosi(double %a) #0 { +; CHECK-LABEL: @multiple_fptosi( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A:%.*]], metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A]], metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + %2 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define double @multiple_sitofp(i32 %a) #0 { +; CHECK-LABEL: @multiple_sitofp( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define i1 @multiple_fcmp(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmp( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A]], double [[B]], metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +define i1 @multiple_fcmps(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmps( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double [[B]], metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +attributes #0 = { strictfp } + +declare void @arbitraryfunc() #0 +declare double @foo.f64(double, double) #0 +declare i32 @bar.i32(i32, i32) #0 + +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata) diff --git a/llvm/test/Transforms/EarlyCSE/round-dyn-strictfp.ll b/llvm/test/Transforms/EarlyCSE/round-dyn-strictfp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/round-dyn-strictfp.ll @@ -0,0 +1,329 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s +; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s + +; Test use of constrained floating point intrinsics in the default +; floating point environment. + +define double @multiple_fadd(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fadd( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fadd_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fadd_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A]], double [[B]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fsub(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fsub( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fsub_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fsub_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A]], double [[B]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fmul(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fmul( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fmul_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fmul_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A]], double [[B]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fdiv(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fdiv( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_fdiv_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fdiv_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A]], double [[B]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_frem(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_frem( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define double @multiple_frem_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_frem_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A]], double [[B]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %2) #0 + ret double %2 +} + +define i32 @multiple_fptoui(double %a) #0 { +; CHECK-LABEL: @multiple_fptoui( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A:%.*]], metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + %2 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define i32 @multiple_fptoui_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fptoui_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A:%.*]], metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define double @multiple_uitofp(i32 %a) #0 { +; CHECK-LABEL: @multiple_uitofp( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define double @multiple_uitofp_split(i32 %a) #0 { +; CHECK-LABEL: @multiple_uitofp_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define i32 @multiple_fptosi(double %a) #0 { +; CHECK-LABEL: @multiple_fptosi( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A:%.*]], metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + %2 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define i32 @multiple_fptosi_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fptosi_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A:%.*]], metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.ignore") #0 + %3 = call i32 @bar.i32(i32 %1, i32 %1) #0 + ret i32 %2 +} + +define double @multiple_sitofp(i32 %a) #0 { +; CHECK-LABEL: @multiple_sitofp( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] +; + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define double @multiple_sitofp_split(i32 %a) #0 { +; CHECK-LABEL: @multiple_sitofp_split( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %3 = call double @foo.f64(double %1, double %1) #0 + ret double %2 +} + +define i1 @multiple_fcmp(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmp( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +define i1 @multiple_fcmp_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmp_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +define i1 @multiple_fcmps(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmps( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +define i1 @multiple_fcmps_split(double %a, double %b) #0 { +; CHECK-LABEL: @multiple_fcmps_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: call void @arbitraryfunc() #[[ATTR0]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + call void @arbitraryfunc() #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %3 = zext i1 %1 to i32 + %4 = zext i1 %2 to i32 + %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 + ret i1 %2 +} + +attributes #0 = { strictfp } + +declare void @arbitraryfunc() #0 +declare double @foo.f64(double, double) #0 +declare i32 @bar.i32(i32, i32) #0 + +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata) diff --git a/llvm/test/Transforms/EarlyCSE/tfpropagation.ll b/llvm/test/Transforms/EarlyCSE/tfpropagation.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/tfpropagation.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s +; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s + +define i64 @branching_int(i32 %a) { +; CHECK-LABEL: @branching_int( +; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[A:%.*]] to i64 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i64 1, [[CONV1]] +; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END3:%.*]] +; CHECK: if.then3: +; CHECK-NEXT: [[C:%.*]] = call double @truefunc.f64.i1(i1 true) +; CHECK-NEXT: br label [[OUT:%.*]] +; CHECK: if.end3: +; CHECK-NEXT: [[D:%.*]] = call double @falsefunc.f64.i1(i1 false) +; CHECK-NEXT: br label [[OUT]] +; CHECK: out: +; CHECK-NEXT: ret i64 [[CONV1]] +; + %conv1 = zext i32 %a to i64 + %cmp2 = icmp ugt i64 1, %conv1 + br i1 %cmp2, label %if.then3, label %if.end3 + +if.then3: + %c = call double @truefunc.f64.i1(i1 %cmp2) + br label %out + +if.end3: + %d = call double @falsefunc.f64.i1(i1 %cmp2) + br label %out + +out: + ret i64 %conv1 +} + +define double @branching_fp(i64 %a) { +; CHECK-LABEL: @branching_fp( +; CHECK-NEXT: [[CONV1:%.*]] = uitofp i64 [[A:%.*]] to double +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double 1.000000e+00, [[CONV1]] +; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END3:%.*]] +; CHECK: if.then3: +; CHECK-NEXT: [[C:%.*]] = call double @truefunc.f64.i1(i1 true) +; CHECK-NEXT: br label [[OUT:%.*]] +; CHECK: if.end3: +; CHECK-NEXT: [[D:%.*]] = call double @falsefunc.f64.i1(i1 false) +; CHECK-NEXT: br label [[OUT]] +; CHECK: out: +; CHECK-NEXT: ret double [[CONV1]] +; + %conv1 = uitofp i64 %a to double + %cmp2 = fcmp ogt double 1.000000e+00, %conv1 + br i1 %cmp2, label %if.then3, label %if.end3 + +if.then3: + %c = call double @truefunc.f64.i1(i1 %cmp2) + br label %out + +if.end3: + %d = call double @falsefunc.f64.i1(i1 %cmp2) + br label %out + +out: + ret double %conv1 +} + +define double @branching_exceptignore(i64 %a) #0 { +; CHECK-LABEL: @branching_exceptignore( +; CHECK-NEXT: [[CONV1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 1.000000e+00, double [[CONV1]], metadata !"ogt", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END3:%.*]] +; CHECK: if.then3: +; CHECK-NEXT: [[C:%.*]] = call double @truefunc.f64.i1(i1 true) #[[ATTR0]] +; CHECK-NEXT: br label [[OUT:%.*]] +; CHECK: if.end3: +; CHECK-NEXT: [[D:%.*]] = call double @falsefunc.f64.i1(i1 false) #[[ATTR0]] +; CHECK-NEXT: br label [[OUT]] +; CHECK: out: +; CHECK-NEXT: ret double [[CONV1]] +; + %conv1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmps.f64(double 1.000000e+00, double %conv1, metadata !"ogt", metadata !"fpexcept.ignore") #0 + br i1 %cmp2, label %if.then3, label %if.end3 + +if.then3: + %c = call double @truefunc.f64.i1(i1 %cmp2) #0 + br label %out + +if.end3: + %d = call double @falsefunc.f64.i1(i1 %cmp2) #0 + br label %out + +out: + ret double %conv1 +} + +define double @branching_maytrap(i64 %a) #0 { +; CHECK-LABEL: @branching_maytrap( +; CHECK-NEXT: [[CONV1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 1.000000e+00, double [[CONV1]], metadata !"ogt", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END3:%.*]] +; CHECK: if.then3: +; CHECK-NEXT: [[C:%.*]] = call double @truefunc.f64.i1(i1 true) #[[ATTR0]] +; CHECK-NEXT: br label [[OUT:%.*]] +; CHECK: if.end3: +; CHECK-NEXT: [[D:%.*]] = call double @falsefunc.f64.i1(i1 false) #[[ATTR0]] +; CHECK-NEXT: br label [[OUT]] +; CHECK: out: +; CHECK-NEXT: ret double [[CONV1]] +; + %conv1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmps.f64(double 1.000000e+00, double %conv1, metadata !"ogt", metadata !"fpexcept.maytrap") #0 + br i1 %cmp2, label %if.then3, label %if.end3 + +if.then3: + %c = call double @truefunc.f64.i1(i1 %cmp2) #0 + br label %out + +if.end3: + %d = call double @falsefunc.f64.i1(i1 %cmp2) #0 + br label %out + +out: + ret double %conv1 +} + +declare double @truefunc.f64.i1(i1) +declare double @falsefunc.f64.i1(i1) +declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) #0 +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) #0 + +attributes #0 = { strictfp } + +declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) strictfp