Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -120,6 +120,7 @@ void initializeEarlyIfConverterPass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); +void initializeFencesPREPass(PassRegistry&); void initializeGCOVProfilerPass(PassRegistry&); void initializeAddressSanitizerPass(PassRegistry&); void initializeAddressSanitizerModulePass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -16,17 +16,32 @@ #define LLVM_TRANSFORMS_SCALAR_H #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Intrinsics.h" namespace llvm { class BasicBlockPass; class FunctionPass; -class Pass; class GetElementPtrInst; +class Instruction; +class Pass; class PassInfo; class TerminatorInst; class TargetLowering; class TargetMachine; +class Value; + +//===----------------------------------------------------------------------===// +// +// FencesPRE - Elimination of target-specific fences based on a PRE algorithm +// +FunctionPass *createFencesPREPass( + Intrinsic::ID FenceInt = Intrinsic::not_intrinsic, + ArrayRef FenceArgs = {}, + std::function isStrongerFence + = [](const Instruction &I){return false;}, + std::function isWeakerFence + = [](const Instruction &I){return false;}); //===----------------------------------------------------------------------===// // Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -14,7 +14,10 @@ #include "PPCTargetMachine.h" #include "PPC.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -155,6 +158,28 @@ void PPCPassConfig::addIRPasses() { addPass(createAtomicExpandPass(&getPPCTargetMachine())); + if (getOptLevel() != CodeGenOpt::None) { + // We first try to eliminate redundant hwsync instructions, ignoring lwsyncs. + addPass(createFencesPREPass(Intrinsic::ppc_sync, {}, + /*IsStrongerFence=*/[](const Instruction &I) {return false;}, + /*IsWeakerFence=*/[](const Instruction &I) { + if (auto FI = dyn_cast(&I)) + if (FI->getIntrinsicID() == Intrinsic::ppc_lwsync) + return true; + return false;})); + // Then we eliminate redundant lwsyncs, taking into account the hwsyncs. + addPass(createFencesPREPass(Intrinsic::ppc_lwsync, {}, + /*IsStrongerFence=*/[](const Instruction &I) { + if (auto FI = dyn_cast(&I)) + if (FI->getIntrinsicID() == Intrinsic::ppc_sync) + return true; + return false;}, + /*IsWeakerFence=*/[](const Instruction &I) {return false;})); + // These two passes require that critical edges be split. So we should + // cleanup the CFG afterwards. + // FIXME: breaks a bunch of brittle tests + // addPass(createCFGSimplificationPass()); + } TargetPassConfig::addIRPasses(); } Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -7,6 +7,7 @@ DCE.cpp DeadStoreElimination.cpp EarlyCSE.cpp + FencesPRE.cpp FlattenCFGPass.cpp GVN.cpp IndVarSimplify.cpp Index: lib/Transforms/Scalar/FencesPRE.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/FencesPRE.cpp @@ -0,0 +1,521 @@ +//===- FencesPRE.cpp - Fence Elimination Implementation -=====================// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates target-specific fence intrinsics that are redundant, or +// partially redundant if it can introduce new fences to make them fully +// redundant. +// +// The algorithm is based on the following PRE paper, adapted to work on fences: +// Bernhard Scholz, R. Nigel Horspool, Jens Knoop: +// Optimizing for space and time usage with speculative partial redundancy +// elimination. +// LCTES 2004: 221-230 +// +// The general idea is to build a graph based on the CFG, with a node for each +// instruction that may affect memory, and a node for each join/split in the +// CFG, label this graph with the cost for edge of having a fence on that edge +// (currently using BlockFrequencyInfo). Then each memory-affecting instruction +// immediately before a fence is marked as a source, each one immediately after +// a fence is marked as a sink, and a min-cut provides us a minimum-cost set of +// fences that provide the same ordering guarantees. +// There are a few subtleties: for example some nodes must be duplicated to +// prevent them from being both a source and a sink; and the graph is only +// built around the fences (to avoid the cost of allocating a potentially huge +// graph). +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +#define DEBUG_TYPE "fences-pre" + +STATISTIC(NumFencesDetected, "Number of fences pre-FencesPRE"); +STATISTIC(NumFencesInserted, "Number of fences added by FencesPRE"); +STATISTIC(NumFencesDeleted, "Number of fences erased by FencesPRE"); + +namespace { + class FencesPRE : public FunctionPass { + public: + static char ID; + FencesPRE(Intrinsic::ID FenceInt = Intrinsic::not_intrinsic, + ArrayRef Args = {}, + // Stronger fences can make the fence under consideration + // redundant. + std::function IsStrongerFence + = [](const Instruction &I){return false;}, + // Weaker fences are ignored by the algorithm. + std::function IsWeakerFence + = [](const Instruction &I){return false;}) : + FunctionPass(ID), FenceIntrinsicID(FenceInt), FenceArgs(Args), + isStrongerFence(IsStrongerFence), isWeakerFence(IsWeakerFence) { + initializeFencesPREPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequiredID(BreakCriticalEdgesID); + AU.addRequired(); + } + private: + Intrinsic::ID FenceIntrinsicID; + ArrayRef FenceArgs; + std::function isStrongerFence; + std::function isWeakerFence; + BlockFrequencyInfo *BFI; + + // signed so that we can also represent flow. + // Capacity of edges is (BlockFrequency >> 2) + 1 + // (shifted to fit in 63 bits, +1 to account for code size increases). + typedef int64_t Capacity; + typedef uint32_t Height; + + typedef struct FlowGraphNode { + Instruction * I; + // nullptr unless there is exactly one edge leading to this node, and a + // fence exist on this edge. Used to keep existing fences when possible, + // instead of erasing them and inserting a new one at the same spot. + Instruction * FenceBeforeIt; + Capacity Excess; + Height Height; + // This bit is used in the algorithm at the end to extract the min-cut + // from the max-flow. + bool Mark; + std::vector > Succs; + std::vector >::iterator Next; + // Source nodes must be separate, or a single node could be source and + // sink, which causes havoc. Terminator nodes are also duplicated to + // avoid trouble when a basic block contains just a terminator node. + // This is a unique_ptr because the node must be freed at the end, and + // is not in the FlowGraph map (since the non-duplicate already is). + std::unique_ptr DupNode; + FlowGraphNode() : I(nullptr), FenceBeforeIt(nullptr), Excess(0), + Height(0), Mark(false), Succs(), Next(Succs.begin()), + DupNode() {} + FlowGraphNode(Instruction * Inst) : I(Inst), FenceBeforeIt(nullptr), + Excess(0), Height(0), Mark(false), + Succs(), Next(Succs.begin()), + DupNode() {} + } FlowGraphNode; + + typedef std::pair Edge; + typedef DenseMap > FlowGraph; + typedef SmallPtrSetImpl NodeSubset; + typedef SmallVectorImpl Cut; + typedef DenseMap Flow; + typedef std::vector > Buckets; + + FlowGraphNode *makeGraphUpwards(const Instruction &FI, Instruction &Root, + FlowGraph &FlowGraph, NodeSubset &Sources) const; + FlowGraphNode *makeGraphDownwards(const Instruction &FI, Instruction &Root, + FlowGraph &FlowGraph, NodeSubset &Sources) const; + void findCut(const FlowGraph &FlowGraph, const NodeSubset &Sources, + const NodeSubset &Sinks, Cut &Cut) const; + void insertFenceBefore(Instruction &I) const; + bool isStop(const Instruction &I) const; + bool isWantedFence(const Instruction &I) const; + FlowGraphNode *getNode(FlowGraph &FlowGraph, Instruction &I, + bool shouldDuplicate=false) const; + void addEdge(FlowGraphNode *N1, FlowGraphNode *N2) const; + Capacity getResidualCapacity(Flow &Flow, Capacity C, FlowGraphNode &U, + FlowGraphNode &V) const; + bool push(Buckets &B, Flow &Flow, const NodeSubset &S, const NodeSubset &T, + Capacity C, FlowGraphNode &U, FlowGraphNode &V) const; + void relabel(Flow &Flow, FlowGraphNode &U) const; + Height discharge(Buckets &B, Flow &Flow, const NodeSubset &S, const NodeSubset &T, + FlowGraphNode &U) const; + void markNode(Flow &Flow, Cut &Cut, FlowGraphNode &U) const; + }; +} + +char FencesPRE::ID = 0; +INITIALIZE_PASS_BEGIN(FencesPRE, "fences-pre", + "Partial redundancy elimination for fences", + false, false) +INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo) +INITIALIZE_PASS_END(FencesPRE, "fences-pre", + "Partial redundancy elimination for fences", + false, false) +FunctionPass *llvm:: +createFencesPREPass(Intrinsic::ID FenceInt, ArrayRef FenceArgs, + std::function isStrongerFence, + std::function isWeakerFence) { + return new FencesPRE(FenceInt, FenceArgs, isStrongerFence, isWeakerFence); +} + +bool FencesPRE::runOnFunction(Function &F) { + if (!FenceIntrinsicID) + return false; + BFI = &getAnalysis(); + + // This FlowGraph ensures that the entire graph is freed at the end of the + // function. + DenseMap > FlowGraph; + SmallPtrSet Sources; + SmallPtrSet Sinks; + SmallVector Cut; + SmallPtrSet Fences; + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (isWantedFence(*I)) { + Fences.insert(&*I); + ++NumFencesDetected; + FlowGraphNode *UpNode = makeGraphUpwards(*I, *I, FlowGraph, Sources); + FlowGraphNode *DownNode = makeGraphDownwards(*I, *I, FlowGraph, Sinks); + // These nodes can be null if there is a stronger fence just + // before/after this one + if (UpNode && DownNode) { + DownNode->FenceBeforeIt = &*I; + addEdge(UpNode, DownNode); + } + } + } + if (Fences.empty()) + return false; + + DEBUG(dbgs() << "Sources:\n"; + for (auto S : Sources) { + dbgs() << "\t" << S << " : "; S->I->dump(); + }); + DEBUG(dbgs() << "Sinks:\n"; + for (auto T : Sinks) { + dbgs() << "\t" << T << " : "; T->I->dump(); + }); + + findCut(FlowGraph, Sources, Sinks, Cut); + + for (auto NPair : Cut) { + assert(NPair.first->I->getParent() == NPair.second->I->getParent()); + DEBUG(dbgs() << "Pair in the cut:" << NPair.first << " - " << + NPair.second << "\n"; + dbgs() << "\t"; NPair.first->I->dump(); + dbgs() << "\t"; NPair.second->I->dump()); + // If a fence is already there, just mark it as not for deletion. + Instruction *FenceBetweenNPair = NPair.second->FenceBeforeIt; + if (FenceBetweenNPair != nullptr) { + DEBUG(dbgs() << "preserving fence " << FenceBetweenNPair << "\n"); + Fences.erase(FenceBetweenNPair); + continue; + } + // Otherwise, insert one. + insertFenceBefore(*(NPair.second->I)); + ++NumFencesInserted; + } + // And then erase any fence that is still marked for deletion. + for (auto FI : Fences) { + FI->eraseFromParent(); + ++NumFencesDeleted; + } + + return !Fences.empty(); +} + +bool FencesPRE::isWantedFence(const Instruction &I) const { + auto FI = dyn_cast(&I); + if (!FI) + return false; + if (FI->getIntrinsicID() != FenceIntrinsicID) + return false; + unsigned NumArgs = FI->getNumArgOperands(); + if (NumArgs != FenceArgs.size()) + return false; + for (unsigned i = 0; i < NumArgs; ++i) { + // Args for CallInst are enumerated from 1. + if (FenceArgs[i] != FI->getArgOperand(i+1)) + return false; + } + + return true; +} + +FencesPRE::FlowGraphNode * +FencesPRE::getNode(FlowGraph &FlowGraph, Instruction &I, bool shouldDuplicate) const { + // Does nothing if already present. + // FIXME: this does one allocation every time. + FlowGraph.insert(make_pair(&I, std::unique_ptr(new FlowGraphNode(&I)))); + auto Node = FlowGraph[&I].get(); + if (shouldDuplicate) { + if(!Node->DupNode) { + Node->DupNode = std::unique_ptr(new FlowGraphNode(&I)); + } + Node = Node->DupNode.get(); + } + return Node; +} + +void FencesPRE::addEdge(FlowGraphNode *N1, FlowGraphNode *N2) const { + if (N1 == nullptr || N2 == nullptr) + return; + + auto BB1 = N1->I->getParent(), BB2 = N2->I->getParent(); + Capacity Capacity; + if (BB1 != BB2) + Capacity = (UINT64_MAX >> 1); + // Plus one, because it should never be 0, because of code size, inserting a + // fence is never completely free, even if the code is only very rarely + // executed + else + Capacity = (BFI->getBlockFreq(BB1).getFrequency() >> 2) + 1; + + N1->Succs.push_back(std::make_pair(N2, Capacity)); + // This may invalidate iterators, so we reset N->Next; + N1->Next = N1->Succs.begin(); + // We also insert the reverse edge so that excess preflow can flow backward. + N1->Succs.push_back(std::make_pair(N2, 0)); + N2->Next = N2->Succs.begin(); + DEBUG(dbgs() << "addEdge (" << Capacity << "): "<< N1 << " -> " << N2 << "\n"; + dbgs() << "\t"; N1->I->dump(); + dbgs() << "\t"; N2->I->dump();); +} + +// FIXME: The first argument is currently unused, will be useful once metadata +// is attached to fences by AtomicExpand. +FencesPRE::FlowGraphNode *FencesPRE:: +makeGraphUpwards(const Instruction &FI, Instruction &Root, + FlowGraph &FlowGraph, NodeSubset &Sources) const { + auto BB = Root.getParent(); + DEBUG(dbgs() << "makeGraphUpwards from " << &Root << " : "; + Root.dump()); + + // Get a reverse iterator at the Root instruction + BasicBlock::reverse_iterator it = BB->rbegin(); + for (; &*it != &Root ; ++it) {} + + for (auto b = BB->rend(); it != b; ++it) { + if (isStrongerFence(*it)) + return nullptr; + if (!isStop(*it)) + continue; + + auto Node = getNode(FlowGraph, *it, true); + Sources.insert(Node); + return Node; + } + + // We have reached the beginning of the basic block without finding any + // reason to stop. Let's continue in the predecessors. + auto InsertPt = BB->getFirstInsertionPt(); + auto Node = getNode(FlowGraph, *InsertPt); + // But first, we should check whether we are at the beginning of the function. + if (&BB->getParent()->getEntryBlock() == BB) { + Sources.insert(Node); + return Node; + } + for (auto PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + // FIXME: add a conservative option where we do not go forward if there is + // any of our predecessor that we do not post-dominate. + BasicBlock *BBP = *PI; + Instruction *TermInst = BBP->getTerminator(); + auto TNode = getNode(FlowGraph, *TermInst, true); + auto UpNode = makeGraphUpwards(FI, *TermInst, FlowGraph, Sources); + if (UpNode) { + addEdge(TNode, Node); + addEdge(UpNode, TNode); + } + } + return Node; +} + +// FIXME: reduce duplication with makeGraphUpwards +FencesPRE::FlowGraphNode *FencesPRE:: +makeGraphDownwards(const Instruction &FI, Instruction &Root, + FlowGraph &FlowGraph, NodeSubset &Sinks) const { + auto BB = Root.getParent(); + DEBUG(dbgs() << "makeGraphDownwards from " << &Root << " : "; + Root.dump()); + + // Get an iterator at the Root instruction + BasicBlock::iterator it = BB->begin(); + for (; &*it != &Root ; ++it) {} + + for (auto b = BB->end(); it != b; ++it) { + if (isStrongerFence(*it)) + return nullptr; + if (!isStop(*it)) + continue; + + auto Node = getNode(FlowGraph, *it); + Sinks.insert(Node); + return Node; + } + + // We have reached the end of the basic block without finding any + // reason to stop. Let's continue in the successors. + auto TermInst = BB->getTerminator(); + auto Node = getNode(FlowGraph, *TermInst, true); + for (auto PI = succ_begin(BB), E = succ_end(BB); PI != E; ++PI) { + // FIXME: add a conservative option where we do not go forward if there is + // any of our predecessor that we do not post-dominate. + BasicBlock *BBS = *PI; + Instruction *InsertPt = BBS->getFirstInsertionPt(); + auto INode = getNode(FlowGraph, *InsertPt); + auto DownNode = makeGraphDownwards(FI, *InsertPt, FlowGraph, Sinks); + if (DownNode) { + addEdge(Node, INode); + addEdge(INode, DownNode); + } + } + return Node; +} + +bool FencesPRE::isStop(const Instruction &I) const { + return (I.mayReadOrWriteMemory() && + !isWantedFence(I) && + !isWeakerFence(I)) || + isa(I); +} + +void FencesPRE::insertFenceBefore(Instruction &I) const { + IRBuilder<> Builder(&I); + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *Fence = Intrinsic::getDeclaration(M, FenceIntrinsicID); + Builder.CreateCall(Fence, FenceArgs); +} + +/* ----- Implementation of a basic push-relabel min-cut algorithm ----- */ + +FencesPRE::Capacity FencesPRE:: +getResidualCapacity(Flow &Flow, Capacity C, + FlowGraphNode &U, FlowGraphNode &V) const { + return C - Flow[std::make_pair(&U, &V)]; +} + + +bool FencesPRE:: +push(Buckets &B, Flow &Flow, const NodeSubset &S, const NodeSubset &T, + Capacity C, FlowGraphNode &U, FlowGraphNode &V) const { + auto amount = std::min(U.Excess, getResidualCapacity(Flow, C, U, V)); + if (amount <= 0) + return false; + Flow[std::make_pair(&U, &V)] += amount; + Flow[std::make_pair(&V, &U)] -= amount; + U.Excess -= amount; + V.Excess += amount; + DEBUG(dbgs() << "\t push " << amount << " : " << &U << " -> " << &V << "\n"); + if (S.count(&V) || T.count(&V)) + return true; + if (B.size() <= V.Height) + B.resize(V.Height + 1); + // FIXME: it should probably be a set to remove duplicates for efficiency + B[V.Height].push_back(&V); + return true; +} + +void FencesPRE::relabel(Flow &Flow, FlowGraphNode &U) const { + auto minHeight = UINT32_MAX; + for (auto &VWithCap : U.Succs) { + auto &V = *VWithCap.first; + auto C = VWithCap.second; + if (getResidualCapacity(Flow, C, U, V) > 0) + minHeight = std::min(minHeight, V.Height+1); + } + DEBUG(dbgs() << "\trelabel " << &U << " from " << U.Height + << " to " << minHeight << "\n"); + assert(minHeight != UINT32_MAX); + U.Height = minHeight; +} + +FencesPRE::Height FencesPRE:: +discharge(Buckets &B, Flow &Flow, const NodeSubset &S, + const NodeSubset &T, FlowGraphNode &U) const { + Height MaxHeight = 0; + while (U.Excess > 0) { + if (U.Next != U.Succs.end()) { + auto VWithCap = *U.Next; + auto &V = *VWithCap.first; + auto C = VWithCap.second; + if (U.Height > V.Height && push(B, Flow, S, T, C, U, V)) + MaxHeight = std::max(MaxHeight, V.Height); + else + ++U.Next; + } else { + relabel(Flow, U); + U.Next = U.Succs.begin(); + } + } + return MaxHeight; +} + +void FencesPRE::markNode(Flow &Flow, Cut &Cut, FlowGraphNode &U) const { + if (U.Mark) + return; + U.Mark = true; + DEBUG(dbgs() << "Marking node " << &U << " : "; U.I->dump()); + for (auto &VWithCap : U.Succs) + if (getResidualCapacity(Flow, VWithCap.second, U, *VWithCap.first) > 0) + markNode(Flow, Cut, *VWithCap.first); + else + Cut.push_back(std::make_pair(&U, VWithCap.first)); +} + +void FencesPRE::findCut(const FlowGraph &FlowGraph, const NodeSubset &Sources, + const NodeSubset &Sinks, Cut &Cut) const { + Buckets Buckets; + // The flow is a separate map and not part of the graph, because when pushing + // along an edge U->V we must also update Flow[V->U] and not just Flow[U->V]. + Flow Flow; + + // First we must make all of the Succs vectors into actual maps. + for (auto &InstToNode : FlowGraph) { + auto N = InstToNode.second.get(); + do { + std::sort(N->Succs.begin(), N->Succs.end()); + std::unique(N->Succs.begin(), N->Succs.end()); + } while ((N = N->DupNode.get())); + } + + // Initialization of the sources + for (auto S : Sources) { + S->Excess = INT64_MAX; + S->Height = FlowGraph.size(); + for (auto &VWithCap : S->Succs) + push(Buckets, Flow, Sources, Sinks, VWithCap.second, *S, *VWithCap.first); + } + + // We are in a trivial case with no more work to do. + if (Buckets.size() == 0) { + for (auto N : Sources) + markNode(Flow, Cut, *N); + return; + } + + // signed int so we can detect when it hits -1 + int CurrentActiveHeight = 0; + while (CurrentActiveHeight >= 0) { + if (Buckets[CurrentActiveHeight].empty()) { + --CurrentActiveHeight; + continue; + } + FlowGraphNode &N = *Buckets[CurrentActiveHeight].back(); + Buckets[CurrentActiveHeight].pop_back(); + + DEBUG(dbgs() << "Discharging node " << &N << " : "; N.I->dump()); + Height NewMaxHeight = discharge(Buckets, Flow, Sources, Sinks, N); + CurrentActiveHeight = std::max(CurrentActiveHeight, (int)NewMaxHeight); + } + // We are done, just produce the cut now; + for (auto N : Sources) + markNode(Flow, Cut, *N); +} Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -35,6 +35,7 @@ initializeCorrelatedValuePropagationPass(Registry); initializeDCEPass(Registry); initializeDeadInstEliminationPass(Registry); + initializeFencesPREPass(Registry); initializeScalarizerPass(Registry); initializeDSEPass(Registry); initializeGVNPass(Registry); Index: test/CodeGen/PowerPC/atomics-fences-pre.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/atomics-fences-pre.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 +; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction). +; This is already checked for in Atomics-64.ll +; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64 + +; sync 1 + sync 1 -> sync 1 +define i32 @acquire_release(i32* %mem) { +; CHECK-LABEL: acquire_release +; CHECK: lwz +; CHECK: sync 1 +; CHECK-NOT: sync +; CHECK: stw + %val = load atomic i32* %mem acquire, align 4 + store atomic i32 42, i32* %mem release, align 4 + ret i32 %val +} + +; sync 0 + sync 1 -> sync 0 +define i32 @acquire_seq_cst(i32* %mem) { +; CHECK-LABEL: acquire_seq_cst +; CHECK: lwz +; CHECK: sync 0 +; CHECK-NOT: sync +; CHECK: stw + %val = load atomic i32* %mem acquire, align 4 + store atomic i32 42, i32* %mem seq_cst, align 4 + ret i32 %val +} + +; The insertion of a fence after the monotonic store allows the elimination of +; the fence before the release store. +define void @basic-pre(i32* %mem, i1 %cond) { +; CHECK-LABEL: basic-pre +entry: +; CHECK-LABEL: BB#0 +; CHECK: lwz +; CHECK: sync 1 +; CHECK-NOT: sync +; CHECK: b + %val1 = load atomic i32* %mem acquire, align 4 + br i1 %cond, label %branch, label %exit + +branch: +; CHECK-LABEL: BB#1 +; CHECK-NOT: sync +; CHECK: stw +; CHECK: sync 1 + store atomic i32 21, i32* %mem monotonic, align 4 + br label %exit + +exit: +; CHECK-LABEL: LBB2 +; CHECK-NOT: sync +; CHECK: stw + store atomic i32 42, i32* %mem release, align 4 + ret void +}