Index: include/llvm/Transforms/Scalar/SROA.h =================================================================== --- include/llvm/Transforms/Scalar/SROA.h +++ include/llvm/Transforms/Scalar/SROA.h @@ -26,6 +26,8 @@ namespace llvm { +class ParallelRegionInfo; + /// A private "module" namespace for types and utilities used by SROA. These /// are implementation details and should not be used by clients. namespace sroa LLVM_LIBRARY_VISIBILITY { @@ -59,6 +61,7 @@ LLVMContext *C = nullptr; DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; + ParallelRegionInfo *PRI = nullptr; /// \brief Worklist of alloca instructions to simplify. /// @@ -114,7 +117,7 @@ /// Helper used by both the public run method and by the legacy pass. PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, - AssumptionCache &RunAC); + AssumptionCache &RunAC, ParallelRegionInfo &RunPRI); bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, Index: include/llvm/Transforms/Utils/PromoteMemToReg.h =================================================================== --- include/llvm/Transforms/Utils/PromoteMemToReg.h +++ include/llvm/Transforms/Utils/PromoteMemToReg.h @@ -22,6 +22,7 @@ class DominatorTree; class AliasSetTracker; class AssumptionCache; +class ParallelRegionInfo; /// \brief Return true if this alloca is legal for promotion. /// @@ -29,7 +30,12 @@ /// (transitively) using this alloca. This also enforces that there is only /// ever one layer of bitcasts or GEPs between the alloca and the lifetime /// markers. -bool isAllocaPromotable(const AllocaInst *AI); +/// +/// In case the dominance tree and the parallel region info are _both_ given +/// we also verify that promoting the alloca does not break invariants of the +/// parallel regions. See ParallelRegionInfo::isSafeToPromote(*) for details. +bool isAllocaPromotable(const AllocaInst *AI, const DominatorTree *DT = nullptr, + const ParallelRegionInfo *PRI = nullptr); /// \brief Promote the specified list of alloca instructions into scalar /// registers, inserting PHI nodes as appropriate. Index: lib/Analysis/ParallelRegionInfo.cpp =================================================================== --- lib/Analysis/ParallelRegionInfo.cpp +++ lib/Analysis/ParallelRegionInfo.cpp @@ -468,6 +468,8 @@ bool ParallelRegionInfo::isSafeToPromote(const AllocaInst &AI, const DominatorTree &DT) const { + if (TopLevelParallelRegions.empty()) + return true; // First check if we know that AI is contained in a parallel region. ParallelRegion *AIPR = nullptr; Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ParallelRegionInfo.h" #include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -4082,6 +4083,13 @@ AI.eraseFromParent(); return true; } + + // For now we do not perform SROA if it might break a parallel region. + // TODO Perform this check on the slices and promote the ones that are safe to + // promote. + if (!PRI->isSafeToPromote(AI, *DT)) + return false; + const DataLayout &DL = AI.getModule()->getDataLayout(); // Skip alloca forms that this analysis can't handle. @@ -4191,11 +4199,13 @@ } PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, - AssumptionCache &RunAC) { + AssumptionCache &RunAC, + ParallelRegionInfo &RunPRI) { DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); DT = &RunDT; AC = &RunAC; + PRI = &RunPRI; BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); @@ -4243,7 +4253,8 @@ PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) { return runImpl(F, AM.getResult(F), - AM.getResult(F)); + AM.getResult(F), + AM.getResult(F)); } /// A legacy pass for the legacy pass manager that wraps the \c SROA pass. @@ -4264,12 +4275,14 @@ auto PA = Impl.runImpl( F, getAnalysis().getDomTree(), - getAnalysis().getAssumptionCache(F)); + getAnalysis().getAssumptionCache(F), + getAnalysis().getParallelRegionInfo()); return !PA.areAllPreserved(); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.setPreservesCFG(); } @@ -4286,5 +4299,6 @@ "Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ParallelRegionInfoPass) INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) Index: lib/Transforms/Utils/Mem2Reg.cpp =================================================================== --- lib/Transforms/Utils/Mem2Reg.cpp +++ lib/Transforms/Utils/Mem2Reg.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ParallelRegionInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -28,7 +29,8 @@ STATISTIC(NumPromoted, "Number of alloca's promoted"); static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, - AssumptionCache &AC) { + AssumptionCache &AC, + ParallelRegionInfo &PRI) { std::vector Allocas; BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function bool Changed = false; @@ -40,7 +42,7 @@ // the entry node for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) if (AllocaInst *AI = dyn_cast(I)) // Is it an alloca? - if (isAllocaPromotable(AI)) + if (isAllocaPromotable(AI, &DT, &PRI)) Allocas.push_back(AI); if (Allocas.empty()) @@ -56,7 +58,8 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); - if (!promoteMemoryToRegister(F, DT, AC)) + auto &PRI = AM.getResult(F); + if (!promoteMemoryToRegister(F, DT, AC, PRI)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -81,12 +84,15 @@ DominatorTree &DT = getAnalysis().getDomTree(); AssumptionCache &AC = getAnalysis().getAssumptionCache(F); - return promoteMemoryToRegister(F, DT, AC); + ParallelRegionInfo &PRI = + getAnalysis().getParallelRegionInfo(); + return promoteMemoryToRegister(F, DT, AC, PRI); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.setPreservesCFG(); } }; @@ -98,6 +104,7 @@ false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ParallelRegionInfoPass) INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", false, false) Index: lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/ParallelRegionInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" @@ -48,7 +49,13 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -bool llvm::isAllocaPromotable(const AllocaInst *AI) { +bool llvm::isAllocaPromotable(const AllocaInst *AI, const DominatorTree *DT, + const ParallelRegionInfo *PRI) { + // If the dominator tree and parallel region info were given we check if the + // promotion would break any parallel region requirements. + if (PRI && DT && !PRI->isSafeToPromote(*AI, *DT)) + return false; + // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. unsigned AS = AI->getType()->getAddressSpace(); Index: test/PIR/alloca_promotion.ll =================================================================== --- /dev/null +++ test/PIR/alloca_promotion.ll @@ -0,0 +1,77 @@ +; RUN: opt -mem2reg -sroa -S < %s | FileCheck %s + +declare void @foo(); +declare void @bar(i32); + +; Verify we do not promote allocas that are used inside and outside a +; parallel region. +define i32 @alloca_used_in_seq_and_par_code() { +entry: +; CHECK: %local_alloca = alloca i32 + %local_alloca = alloca i32 + fork label %forked, %cont + +forked: ; preds = %entry +; CHECK: store i32 0, i32* %local_alloca + store i32 0, i32* %local_alloca + call void @foo() + halt label %cont + +cont: ; preds = %entry, %forked + call void @foo() +; CHECK: store i32 1, i32* %local_alloca + store i32 1, i32* %local_alloca + join label %join + +join: ; preds = %cont +; CHECK: %val = load i32, i32* %local_alloca + %val = load i32, i32* %local_alloca + ret i32 %val +} + +; Verify we do not promote allocas even if they are used only inside a parallel +; region but defined outside. +define i32 @alloca_used_only_in_par_code() { +entry: +; CHECK: alloca i32 + %local_alloca = alloca i32 + fork label %forked, %cont + +forked: ; preds = %entry + store i32 0, i32* %local_alloca + call void @foo() + halt label %cont + +cont: ; preds = %entry, %forked + call void @foo() + store i32 1, i32* %local_alloca + %val = load i32, i32* %local_alloca + join label %join + +join: ; preds = %cont +; CHECK: ret i32 %val + ret i32 %val +} + +; Verify we do promote allocas that are used only outside a parallel region. +define i32 @alloca_used_only_in_seq_code() { +entry: +; CHECK-NOT: alloca i32 + %local_alloca = alloca i32 + store i32 0, i32* %local_alloca + fork label %forked, %cont + +forked: ; preds = %entry + call void @foo() + halt label %cont + +cont: ; preds = %entry, %forked + call void @foo() + %val = load i32, i32* %local_alloca + join label %join + +join: ; preds = %cont + store i32 1, i32* %local_alloca +; CHECK: ret i32 0 + ret i32 %val +}