Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -100,7 +100,7 @@ polly::createFlattenSchedulePass(); polly::createDeLICMPass(); polly::createDumpModulePass("", true); - polly::createSimplifyPass(); + polly::createSimplifyPass(0); polly::createPruneUnprofitablePass(); } } PollyForcePassLinking; // Force link by creating a global definition. Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1936,6 +1936,8 @@ /// Scop constructor; invoked from ScopBuilder::buildScop. Scop(Region &R, ScalarEvolution &SE, LoopInfo &LI, ScopDetection::DetectionContext &DC, OptimizationRemarkEmitter &ORE); + LoopInfo *getLI() const { return Affinator.getLI(); } + //@} /// Initialize this ScopBuilder. @@ -3018,6 +3020,20 @@ /// Return whether @p Inst has a use outside of this SCoP. bool isEscaping(Instruction *Inst); + + struct ScopStatistics { + int NumAffineLoops; + int NumBoxedLoops; + + int NumValueWrites = 0; + int NumValueWritesInLoops = 0; + int NumPHIWrites = 0; + int NumPHIWritesInLoops = 0; + int NumSingletonWrites = 0; + int NumSingletonWritesInLoops = 0; + }; + + ScopStatistics getStatistics() const; }; /// Print Scop scop to raw_ostream OS. Index: include/polly/Simplify.h =================================================================== --- include/polly/Simplify.h +++ include/polly/Simplify.h @@ -41,7 +41,7 @@ /// The order in which implicit writes are executed relative to each other is /// undefined. llvm::SmallVector getAccessesInOrder(ScopStmt &Stmt); -llvm::Pass *createSimplifyPass(); +llvm::Pass *createSimplifyPass(int CallNo); } // namespace polly namespace llvm { Index: include/polly/Support/SCEVAffinator.h =================================================================== --- include/polly/Support/SCEVAffinator.h +++ include/polly/Support/SCEVAffinator.h @@ -73,6 +73,8 @@ /// Check an AddRec for the loop @p L is cached. bool hasNSWAddRecForLoop(llvm::Loop *L) const; + llvm::LoopInfo *getLI() const { return &LI; } + private: /// Key to identify cached expressions. using CacheKey = std::pair; Index: lib/Analysis/PruneUnprofitable.cpp =================================================================== --- lib/Analysis/PruneUnprofitable.cpp +++ lib/Analysis/PruneUnprofitable.cpp @@ -26,11 +26,34 @@ STATISTIC(ScopsPruned, "Number of pruned SCoPs because it they cannot be " "optimized in a significant way"); +STATISTIC(NumPrunedLoops, "Number of pruned loops"); +STATISTIC(NumPrunedBoxedLoops, "Number of pruned boxed loops"); +STATISTIC(NumPrunedAffineLoops, "Number of pruned affine loops"); + +STATISTIC(NumLoopsInScop, "Number of loops in scops after pruning"); +STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after pruning"); +STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after pruning"); + class PruneUnprofitable : public ScopPass { private: PruneUnprofitable(const PruneUnprofitable &) = delete; const PruneUnprofitable &operator=(const PruneUnprofitable &) = delete; + void updateStatistics(Scop &S, bool Pruned) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + auto ScopStats = S.getStatistics(); + if (Pruned) { + NumPrunedLoops += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops; + NumPrunedBoxedLoops += ScopStats.NumBoxedLoops; + NumPrunedAffineLoops += ScopStats.NumAffineLoops; + } else { + NumLoopsInScop += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops; + NumBoxedLoops += ScopStats.NumBoxedLoops; + NumAffineLoops += ScopStats.NumAffineLoops; + } +#endif + } + public: static char ID; explicit PruneUnprofitable() : ScopPass(ID) {} @@ -53,9 +76,12 @@ DEBUG(dbgs() << "SCoP pruned because it probably cannot be optimized in " "a significant way\n"); ScopsPruned++; + updateStatistics(S, true); S.invalidate(PROFITABLE, DebugLoc()); + return false; } + updateStatistics(S, false); return false; } }; Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -1238,10 +1238,11 @@ int NumLoops = 1; int MaxLoopDepth = 1; - if (auto *TripCountC = dyn_cast(TripCount)) - if (TripCountC->getType()->getScalarSizeInBits() <= 64) - if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) - NumLoops -= 1; + if (MinProfitableTrips > 0) + if (auto *TripCountC = dyn_cast(TripCount)) + if (TripCountC->getType()->getScalarSizeInBits() <= 64) + if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) + NumLoops -= 1; for (auto &SubLoop : *L) { LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -121,7 +121,11 @@ STATISTIC(AssumptionsDelinearization, "Number of delinearization assumptions taken."); +STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo"); STATISTIC(NumLoopsInScop, "Number of loops in scops"); +STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo"); +STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo"); + STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); @@ -131,6 +135,17 @@ "Number of scops with maximal loop depth 6 and larger"); STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); +STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo"); +STATISTIC( + NumValueWritesInLoops, + "Number of scalar value writes nested in affine loops after ScopInfo"); +STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo"); +STATISTIC(NumPHIWritesInLoops, + "Number of scalar phi writes nested in affine loops after ScopInfo"); +STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo"); +STATISTIC(NumSingletonWritesInLoops, + "Number of singleton writes nested in affine loops after ScopInfo"); + // The maximal number of basic sets we allow during domain construction to // be created. More complex scops will result in very high compile time and // are also unlikely to result in good code @@ -5160,6 +5175,46 @@ return false; } +Scop::ScopStatistics Scop::getStatistics() const { + auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0); + + ScopStatistics Result; + int NumTotalLoops = LoopStat.NumLoops; + Result.NumBoxedLoops = getBoxedLoops().size(); + Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops; + + for (const ScopStmt &Stmt : *this) { + isl::set Domain = Stmt.getDomain().intersect_params(getContext()); + bool IsInLoop = Stmt.getNumIterators() >= 1; + for (MemoryAccess *MA : Stmt) { + if (!MA->isWrite()) + continue; + + if (MA->isLatestValueKind()) { + Result.NumValueWrites += 1; + if (IsInLoop) + Result.NumValueWritesInLoops += 1; + } + + if (MA->isLatestAnyPHIKind()) { + Result.NumPHIWrites += 1; + if (IsInLoop) + Result.NumPHIWritesInLoops += 1; + } + + isl::set AccSet = + MA->getAccessRelation().intersect_domain(Domain).range(); + if (AccSet.is_singleton()) { + Result.NumSingletonWrites += 1; + if (IsInLoop) + Result.NumSingletonWritesInLoops += 1; + } + } + } + + return Result; +} + raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) { scop.print(OS, PollyPrintInstructions); return OS; @@ -5177,7 +5232,11 @@ AU.setPreservesAll(); } -void updateLoopCountStatistic(ScopDetection::LoopStats Stats) { +void updateLoopCountStatistic(ScopDetection::LoopStats Stats, + Scop::ScopStatistics ScopStats) { + assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops); + + NumScops += 1; NumLoopsInScop += Stats.NumLoops; MaxNumLoopsInScop = std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); @@ -5194,6 +5253,16 @@ NumScopsDepthFive++; else NumScopsDepthLarger++; + + NumAffineLoops += ScopStats.NumAffineLoops; + NumBoxedLoops += ScopStats.NumBoxedLoops; + + NumValueWrites += ScopStats.NumValueWrites; + NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; + NumPHIWrites += ScopStats.NumPHIWrites; + NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; + NumSingletonWrites += ScopStats.NumSingletonWrites; + NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; } bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) { @@ -5213,11 +5282,13 @@ ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE); S = SB.getScop(); // take ownership of scop object +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) if (S) { ScopDetection::LoopStats Stats = ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0); - updateLoopCountStatistic(Stats); + updateLoopCountStatistic(Stats, S->getStatistics()); } +#endif return false; } @@ -5268,9 +5339,11 @@ std::unique_ptr S = SB.getScop(); if (!S) continue; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) ScopDetection::LoopStats Stats = ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0); - updateLoopCountStatistic(Stats); + updateLoopCountStatistic(Stats, S->getStatistics()); +#endif bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second; assert(Inserted && "Building Scop for the same region twice!"); (void)Inserted; Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -56,6 +56,9 @@ cl::location(polly::PerfMonitoring), cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +STATISTIC(ScopsProcessed, "Number of SCoP processed"); +STATISTIC(CodegennedScops, "Number of successfully generated SCoPs"); + namespace polly { /// Mark a basic block unreachable. /// @@ -162,6 +165,8 @@ if (!AstRoot) return false; + ScopsProcessed++; + auto &DL = S.getFunction().getParent()->getDataLayout(); Region *R = &S.getRegion(); assert(!R->isTopLevelRegion() && "Top level regions are not supported"); @@ -249,6 +254,8 @@ NodeBuilder.create(AstRoot); NodeBuilder.finalize(); fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI); + + CodegennedScops++; } Function *F = EnteringBB->getParent(); Index: lib/CodeGen/IslAst.cpp =================================================================== --- lib/CodeGen/IslAst.cpp +++ lib/CodeGen/IslAst.cpp @@ -78,6 +78,16 @@ cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +STATISTIC(ScopsProcessed, "Number of SCoPs processed"); +STATISTIC(ScopsBeneficial, "Number of beneficial SCoPs"); + +STATISTIC(NumForLoops, "Number of for-loops"); +STATISTIC(NumParallel, "Number of parallel for-loops"); +STATISTIC(NumInnermostParallel, "Number of innermost parallel for-loops"); +STATISTIC(NumOutermostParallel, "Number of outermost parallel for-loops"); +STATISTIC(NumRedunctionParallel, "Number of reduction-parallel for-loops"); +STATISTIC(NumExecutedInParellel, "Number of for-loops executed in parallel"); + namespace polly { /// Temporary information used when building the ast. struct AstBuildUserInfo { @@ -401,6 +411,35 @@ return true; } +static void walkAst(__isl_keep isl_ast_node *Ast) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + assert(Ast); + isl_ast_node_foreach_descendant_top_down( + Ast, + [](__isl_keep isl_ast_node *Node, void *User) -> isl_bool { + switch (isl_ast_node_get_type(Node)) { + case isl_ast_node_for: + NumForLoops++; + if (IslAstInfo::isParallel(Node)) + NumParallel++; + if (IslAstInfo::isInnermostParallel(Node)) + NumInnermostParallel++; + if (IslAstInfo::isOutermostParallel(Node)) + NumOutermostParallel++; + if (IslAstInfo::isReductionParallel(Node)) + NumRedunctionParallel++; + if (IslAstInfo::isExecutedInParallel(Node)) + NumExecutedInParellel++; + break; + default: + break; + } + return isl_bool_true; // Continue traversing subrees. + }, + nullptr); +#endif +} + IslAst::IslAst(Scop &Scop) : S(Scop), Root(nullptr), RunCondition(nullptr), Ctx(Scop.getSharedIslCtx()) {} @@ -421,6 +460,8 @@ if (!benefitsFromPolly(S, PerformParallelTest)) return; + ScopsBeneficial++; + isl_ctx *Ctx = S.getIslCtx(); isl_options_set_ast_build_atomic_upper_bound(Ctx, true); isl_options_set_ast_build_detect_min_max(Ctx, true); @@ -455,6 +496,8 @@ Root = isl_ast_build_node_from_schedule(Build, S.getScheduleTree().release()); + walkAst(Root); + isl_ast_build_free(Build); } @@ -692,6 +735,8 @@ if (Scop.isToBeSkipped()) return false; + ScopsProcessed++; + const Dependences &D = getAnalysis().getDependences(Dependences::AL_Statement); Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -328,13 +328,13 @@ PM.add(polly::createPolyhedralInfoPass()); if (EnableSimplify) - PM.add(polly::createSimplifyPass()); + PM.add(polly::createSimplifyPass(0)); if (EnableForwardOpTree) PM.add(polly::createForwardOpTreePass()); if (EnableDeLICM) PM.add(polly::createDeLICMPass()); if (EnableSimplify) - PM.add(polly::createSimplifyPass()); + PM.add(polly::createSimplifyPass(1)); if (ImportJScop) PM.add(polly::createJSONImporterPass()); Index: lib/Transform/DeLICM.cpp =================================================================== --- lib/Transform/DeLICM.cpp +++ lib/Transform/DeLICM.cpp @@ -61,6 +61,16 @@ STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized"); +STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM"); +STATISTIC(NumValueWritesInLoops, + "Number of scalar value writes nested in affine loops after DeLICM"); +STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM"); +STATISTIC(NumPHIWritesInLoops, + "Number of scalar phi writes nested in affine loops after DeLICM"); +STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM"); +STATISTIC(NumSingletonWritesInLoops, + "Number of singleton writes nested in affine loops after DeLICM"); + isl::union_map computeReachingOverwrite(isl::union_map Schedule, isl::union_map Writes, bool InclPrevWrite, @@ -1402,6 +1412,16 @@ collapseToUnused(S); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + auto ScopStats = S.getStatistics(); + NumValueWrites += ScopStats.NumValueWrites; + NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; + NumPHIWrites += ScopStats.NumPHIWrites; + NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; + NumSingletonWrites += ScopStats.NumSingletonWrites; + NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; +#endif + return false; } Index: lib/Transform/ForwardOpTree.cpp =================================================================== --- lib/Transform/ForwardOpTree.cpp +++ lib/Transform/ForwardOpTree.cpp @@ -56,6 +56,16 @@ STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); +STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); +STATISTIC(NumValueWritesInLoops, + "Number of scalar value writes nested in affine loops after OpTree"); +STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); +STATISTIC(NumPHIWritesInLoops, + "Number of scalar phi writes nested in affine loops after OpTree"); +STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); +STATISTIC(NumSingletonWritesInLoops, + "Number of singleton writes nested in affine loops after OpTree"); + namespace { /// The state of whether an operand tree was/can be forwarded. @@ -844,6 +854,17 @@ DEBUG(dbgs() << "\nFinal Scop:\n"); DEBUG(dbgs() << S); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + // Update statistics + auto ScopStats = S.getStatistics(); + NumValueWrites += ScopStats.NumValueWrites; + NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; + NumPHIWrites += ScopStats.NumPHIWrites; + NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; + NumSingletonWrites += ScopStats.NumSingletonWrites; + NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; +#endif + return false; } Index: lib/Transform/ScheduleOptimizer.cpp =================================================================== --- lib/Transform/ScheduleOptimizer.cpp +++ lib/Transform/ScheduleOptimizer.cpp @@ -237,6 +237,28 @@ "transformations is applied on the schedule tree"), cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +STATISTIC(ScopsProcessed, "Number of scops processed"); +STATISTIC(ScopsRescheduled, "Number of scops optimized"); +STATISTIC(ScopsOptimized, "Number of scops optimized"); + +#define THREE_STATISTICS(VARNAME, DESC) \ + static llvm::Statistic VARNAME[3] = { \ + {DEBUG_TYPE, #VARNAME "0", DESC " (original)", {0}, false}, \ + {DEBUG_TYPE, #VARNAME "1", DESC " (after scheduler)", {0}, false}, \ + {DEBUG_TYPE, #VARNAME "2", DESC " (after optimizer)", {0}, false}} + +THREE_STATISTICS(NumBands, "Number of bands"); +THREE_STATISTICS(NumBandMembers, "Number of band members"); +THREE_STATISTICS(NumCoincident, "Number of coincident band members"); +THREE_STATISTICS(NumPermutable, "Number of permutable bands"); + +STATISTIC(FirstLevelTileOpts, "Number of first level tiling applied"); +STATISTIC(SecondLevelTileOpts, "Number of second level tiling applied"); +STATISTIC(RegisterTileOpts, "Number of register tiling applied"); +STATISTIC(PrevectOpts, "Number of strip-mining for prevectorization applied"); +STATISTIC(MatMulOpts, + "Number of matrix multiplication patterns detected and optimized"); + /// Create an isl::union_set, which describes the isolate option based on /// IsolateDomain. /// @@ -368,6 +390,7 @@ if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf) Node = Node.parent(); auto LoopMarker = isl::id::alloc(Node.get_ctx(), "SIMD", nullptr); + PrevectOpts++; return Node.insert_mark(LoopMarker); } @@ -456,17 +479,23 @@ __isl_give isl::schedule_node ScheduleTreeOptimizer::standardBandOpts(isl::schedule_node Node, void *User) { - if (FirstLevelTiling) + if (FirstLevelTiling) { Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes, FirstLevelDefaultTileSize); + FirstLevelTileOpts++; + } - if (SecondLevelTiling) + if (SecondLevelTiling) { Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes, SecondLevelDefaultTileSize); + SecondLevelTileOpts++; + } - if (RegisterTiling) + if (RegisterTiling) { Node = applyRegisterTiling(Node, RegisterTileSizes, RegisterDefaultTileSize); + RegisterTileOpts++; + } if (PollyVectorizerChoice == VECTORIZER_NONE) return Node; @@ -1235,6 +1264,7 @@ isMatrMultPattern(isl::manage(isl_schedule_node_copy(Node)), OAI->D, MMI)) { DEBUG(dbgs() << "The matrix multiplication pattern was detected\n"); + MatMulOpts++; return optimizeMatMulPattern(isl::manage(Node), OAI->TTI, MMI).release(); } @@ -1308,6 +1338,37 @@ char IslScheduleOptimizer::ID = 0; +static void walkScheduleTree(isl::schedule Schedule, int Version) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + auto Root = Schedule.get_root(); + if (!Root) + return; + + Root.foreach_ancestor_top_down([Version]( + isl::schedule_node Node) -> isl::stat { + switch (isl_schedule_node_get_type(Node.get())) { + case isl_schedule_node_band: { + NumBands[Version]++; + if (isl_schedule_node_band_get_permutable(Node.get()) == isl_bool_true) + NumPermutable[Version]++; + + int CountMembers = isl_schedule_node_band_n_member(Node.get()); + NumBandMembers[Version] += CountMembers; + for (int i = 0; i < CountMembers; i += 1) { + if (Node.band_member_get_coincident(i)) + NumCoincident[Version]++; + } + + } break; + default: + break; + } + + return isl::stat::ok; + }); +#endif +} + bool IslScheduleOptimizer::runOnScop(Scop &S) { // Skip SCoPs in case they're already optimised by PPCGCodeGeneration @@ -1352,6 +1413,9 @@ if (!Domain) return false; + ScopsProcessed++; + walkScheduleTree(S.getScheduleTree(), 0); + isl::union_map Validity = give(D.getDependences(ValidityKinds)); isl::union_map Proximity = give(D.getDependences(ProximityKinds)); @@ -1432,11 +1496,15 @@ auto Schedule = SC.compute_schedule(); isl_options_set_on_error(Ctx, OnErrorStatus); + walkScheduleTree(Schedule, 1); + // In cases the scheduler is not able to optimize the code, we just do not // touch the schedule. if (!Schedule) return false; + ScopsRescheduled++; + DEBUG({ auto *P = isl_printer_to_str(Ctx); P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); @@ -1451,10 +1519,12 @@ auto *TTI = &getAnalysis().getTTI(F); const OptimizerAdditionalInfoTy OAI = {TTI, const_cast(&D)}; auto NewSchedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI); + walkScheduleTree(NewSchedule, 1); if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewSchedule)) return false; + ScopsOptimized++; S.setScheduleTree(NewSchedule.release()); S.markAsOptimized(); Index: lib/Transform/Simplify.cpp =================================================================== --- lib/Transform/Simplify.cpp +++ lib/Transform/Simplify.cpp @@ -27,25 +27,44 @@ namespace { +#define TWO_STATISTICS(VARNAME, DESC) \ + static llvm::Statistic VARNAME[2] = { \ + {DEBUG_TYPE, #VARNAME "0", DESC " (first)", {0}, false}, \ + {DEBUG_TYPE, #VARNAME "1", DESC " (second)", {0}, false}} + /// Number of max disjuncts we allow in removeOverwrites(). This is to avoid /// that the analysis of accesses in a statement is becoming too complex. Chosen /// to be relatively small because all the common cases should access only few /// array elements per statement. static int const SimplifyMaxDisjuncts = 4; -STATISTIC(ScopsProcessed, "Number of SCoPs processed"); -STATISTIC(ScopsModified, "Number of SCoPs simplified"); - -STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes"); -STATISTIC(TotalWritesCoalesced, "Number of writes coalesced with another"); -STATISTIC(TotalRedundantWritesRemoved, - "Number of writes of same value removed in any SCoP"); -STATISTIC(TotalEmptyPartialAccessesRemoved, - "Number of empty partial accesses removed"); -STATISTIC(TotalDeadAccessesRemoved, "Number of dead accesses removed"); -STATISTIC(TotalDeadInstructionsRemoved, - "Number of unused instructions removed"); -STATISTIC(TotalStmtsRemoved, "Number of statements removed in any SCoP"); +TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed"); +TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified"); + +TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes"); +TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another"); +TWO_STATISTICS(TotalRedundantWritesRemoved, + "Number of writes of same value removed in any SCoP"); +TWO_STATISTICS(TotalEmptyPartialAccessesRemoved, + "Number of empty partial accesses removed"); +TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed"); +TWO_STATISTICS(TotalDeadInstructionsRemoved, + "Number of unused instructions removed"); +TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP"); + +TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify"); +TWO_STATISTICS( + NumValueWritesInLoops, + "Number of scalar value writes nested in affine loops after Simplify"); +TWO_STATISTICS(NumPHIWrites, + "Number of scalar phi writes after the first simplification"); +TWO_STATISTICS( + NumPHIWritesInLoops, + "Number of scalar phi writes nested in affine loops after Simplify"); +TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify"); +TWO_STATISTICS( + NumSingletonWritesInLoops, + "Number of singleton writes nested in affine loops after Simplify"); static bool isImplicitRead(MemoryAccess *MA) { return MA->isRead() && MA->isOriginalScalarKind(); @@ -100,6 +119,10 @@ class Simplify : public ScopPass { private: + /// The number of invocation of Simplify to determine which statistics to + /// update. + int CallNo; + /// The last/current SCoP that is/has been processed. Scop *S; @@ -176,7 +199,7 @@ Stmt.removeSingleMemoryAccess(MA); OverwritesRemoved++; - TotalOverwritesRemoved++; + TotalOverwritesRemoved[CallNo]++; } // Unconditional writes overwrite other values. @@ -315,7 +338,7 @@ // We removed MA, OtherMA takes its role. MA = OtherMA; - TotalWritesCoalesced++; + TotalWritesCoalesced[CallNo]++; WritesCoalesced++; // Don't look for more candidates. @@ -437,7 +460,7 @@ Stmt.removeSingleMemoryAccess(MA); RedundantWritesRemoved++; - TotalRedundantWritesRemoved++; + TotalRedundantWritesRemoved[CallNo]++; } } } @@ -476,7 +499,7 @@ StmtsRemoved = NumStmtsBefore - S->getSize(); DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore << ") statements\n"); - TotalStmtsRemoved += StmtsRemoved; + TotalStmtsRemoved[CallNo] += StmtsRemoved; } /// Remove accesses that have an empty domain. @@ -501,7 +524,7 @@ for (MemoryAccess *MA : DeferredRemove) { Stmt.removeSingleMemoryAccess(MA); EmptyPartialAccessesRemoved++; - TotalEmptyPartialAccessesRemoved++; + TotalEmptyPartialAccessesRemoved[CallNo]++; } } } @@ -530,7 +553,7 @@ Stmt->removeSingleMemoryAccess(MA); DeadAccessesRemoved++; - TotalDeadAccessesRemoved++; + TotalDeadAccessesRemoved[CallNo]++; } // Remove all non-reachable instructions. @@ -548,7 +571,7 @@ DEBUG(dbgs() << "Removing "; Inst->print(dbgs()); dbgs() << " because it is not used\n"); DeadInstructionsRemoved++; - TotalDeadInstructionsRemoved++; + TotalDeadInstructionsRemoved[CallNo]++; continue; } @@ -595,7 +618,7 @@ public: static char ID; - explicit Simplify() : ScopPass(ID) {} + explicit Simplify(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredTransitive(); @@ -610,7 +633,7 @@ // Prepare processing of this SCoP. this->S = &S; - ScopsProcessed++; + ScopsProcessed[CallNo]++; DEBUG(dbgs() << "Removing partial writes that never happen...\n"); removeEmptyPartialAccesses(); @@ -632,10 +655,20 @@ removeUnnecessaryStmts(); if (isModified()) - ScopsModified++; + ScopsModified[CallNo]++; DEBUG(dbgs() << "\nFinal Scop:\n"); DEBUG(dbgs() << S); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + auto ScopStats = S.getStatistics(); + NumValueWrites[CallNo] += ScopStats.NumValueWrites; + NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops; + NumPHIWrites[CallNo] += ScopStats.NumPHIWrites; + NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops; + NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites; + NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops; +#endif + return false; } @@ -688,7 +721,7 @@ } } // namespace polly -Pass *polly::createSimplifyPass() { return new Simplify(); } +Pass *polly::createSimplifyPass(int CallNo) { return new Simplify(CallNo); } INITIALIZE_PASS_BEGIN(Simplify, "polly-simplify", "Polly - Simplify", false, false)