diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h --- a/llvm/include/llvm/Transforms/Scalar/SROA.h +++ b/llvm/include/llvm/Transforms/Scalar/SROA.h @@ -30,6 +30,7 @@ class LLVMContext; class PHINode; class SelectInst; +class TargetTransformInfo; class Use; /// A private "module" namespace for types and utilities used by SROA. These @@ -65,6 +66,7 @@ LLVMContext *C = nullptr; DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; + TargetTransformInfo *TTI = nullptr; /// Worklist of alloca instructions to simplify. /// @@ -120,7 +122,7 @@ /// Helper used by both the public run method and by the legacy pass. PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, - AssumptionCache &RunAC); + AssumptionCache &RunAC, TargetTransformInfo &TTI); bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -41,6 +41,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -2951,6 +2952,12 @@ DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedSize() || !NewAI.getAllocatedType()->isSingleValueType()); + if (EmitMemCpy && + SliceSize == + DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedSize() && + (SliceSize <= 2 * Pass.TTI->getRegisterBitWidth(true))) + EmitMemCpy = false; + // If we're just going to emit a memcpy, the alloca hasn't changed, and the // size hasn't been shrunk based on analysis of the viable range, this is // a no-op. @@ -4716,11 +4723,13 @@ } PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, - AssumptionCache &RunAC) { + AssumptionCache &RunAC, + TargetTransformInfo &RunTTI) { LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); DT = &RunDT; AC = &RunAC; + TTI = &RunTTI; BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); @@ -4774,7 +4783,8 @@ PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) { return runImpl(F, AM.getResult(F), - AM.getResult(F)); + AM.getResult(F), + AM.getResult(F)); } /// A legacy pass for the legacy pass manager that wraps the \c SROA pass. @@ -4798,13 +4808,15 @@ auto PA = Impl.runImpl( F, getAnalysis().getDomTree(), - getAnalysis().getAssumptionCache(F)); + getAnalysis().getAssumptionCache(F), + getAnalysis().getTTI(F)); return !PA.areAllPreserved(); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.setPreservesCFG(); } @@ -4820,5 +4832,6 @@ "Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false)