Index: include/polly/Support/GICHelper.h =================================================================== --- include/polly/Support/GICHelper.h +++ include/polly/Support/GICHelper.h @@ -15,6 +15,7 @@ #define POLLY_SUPPORT_GIC_HELPER_H #include "llvm/ADT/APInt.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/raw_ostream.h" #include "isl/aff.h" #include "isl/ctx.h" @@ -319,6 +320,13 @@ return OS; } +template +llvm::DiagnosticInfoOptimizationBase & +operator<<(llvm::DiagnosticInfoOptimizationBase &OS, const IslPtr &Obj) { + OS << IslObjTraits::to_str(Obj.keep()); + return OS; +} + /// Smart pointer to an ISL object, but does not release it when destroyed. /// /// This is meant to be used as function parameter type. The caller guarantees Index: lib/Transform/DeLICM.cpp =================================================================== --- lib/Transform/DeLICM.cpp +++ lib/Transform/DeLICM.cpp @@ -133,6 +133,7 @@ #include "polly/ScopPass.h" #include "polly/Support/ISLTools.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #define DEBUG_TYPE "polly-delicm" using namespace polly; @@ -641,6 +642,18 @@ } }; +std::string printIntruction(Instruction *Instr, + bool IsForDebug = false) { + std::string Result; + raw_string_ostream OS(Result); + Instr->print(OS, IsForDebug); + OS.flush(); + size_t i = 0; + while (i < Result.size() && Result[i] == ' ') + i += 1; + return Result.substr(i); +} + /// Base class for algorithms based on zones, like DeLICM. class ZoneAlgorithm { protected: @@ -685,9 +698,14 @@ /// { DomainMustWrite[] -> Element[] } IslPtr AllMustWrites; + /// Used to emit optimization diagnostics (rejected scops, mapped scalars, + /// etc.) + OptimizationRemarkEmitter *ORE; + /// Prepare the object before computing the zones of @p S. - ZoneAlgorithm(Scop *S) - : IslCtx(S->getSharedIslCtx()), S(S), Schedule(give(S->getSchedule())) { + ZoneAlgorithm(Scop *S, OptimizationRemarkEmitter *ORE) + : IslCtx(S->getSharedIslCtx()), S(S), Schedule(give(S->getSchedule())), + ORE(ORE) { auto Domains = give(S->getDomains()); @@ -724,8 +742,15 @@ if (MA->isRead()) { // Reject store after load to same location. - if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) + if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) { + OptimizationRemarkMissed R(DEBUG_TYPE, "LoadAfterStore", + MA->getAccessInstruction()); + R << "load after store of same element in same statement"; + R << " (previous stores: " << Stores; + R << ", loading: " << AccRel << ")"; + ORE->emit(R); return false; + } Loads = give(isl_union_map_union(Loads.take(), AccRel.take())); @@ -734,18 +759,35 @@ if (!isa(MA->getAccessInstruction())) { DEBUG(dbgs() << "WRITE that is not a StoreInst not supported\n"); + OptimizationRemarkMissed R(DEBUG_TYPE, "UnusualStore", + MA->getAccessInstruction()); + R << "encountered write that is not a StoreInst: " + << printIntruction(MA->getAccessInstruction()); + ORE->emit(R); return false; } // In region statements the order is less clear, eg. the load and store // might be in a boxed loop. if (Stmt->isRegionStmt() && - !isl_union_map_is_disjoint(Loads.keep(), AccRel.keep())) + !isl_union_map_is_disjoint(Loads.keep(), AccRel.keep())) { + OptimizationRemarkMissed R(DEBUG_TYPE, "StoreInSubregion", + MA->getAccessInstruction()); + R << "store is in a non-affine subregion"; + ORE->emit(R); return false; + } // Do not allow more than one store to the same location. - if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) + if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) { + OptimizationRemarkMissed R(DEBUG_TYPE, "StoreAfterStore", + MA->getAccessInstruction()); + R << "store after store of same element in same statement"; + R << " (previous stores: " << Stores; + R << ", storing: " << AccRel << ")"; + ORE->emit(R); return false; + } Stores = give(isl_union_map_union(Stores.take(), AccRel.take())); } @@ -1510,7 +1552,7 @@ } public: - DeLICMImpl(Scop *S) : ZoneAlgorithm(S) {} + DeLICMImpl(Scop *S, OptimizationRemarkEmitter *ORE) : ZoneAlgorithm(S, ORE) {} /// Calculate the lifetime (definition to last use) of every array element. /// @@ -1537,6 +1579,12 @@ if (isl_ctx_last_error(IslCtx.get()) == isl_error_quota) { DeLICMOutOfQuota++; DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); + DebugLoc Begin, End; + getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End); + OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin, + S->getEntry()); + R << "maximal number of operations exceeded during zone analysis"; + ORE->emit(R); } DeLICMAnalyzed++; @@ -1611,7 +1659,8 @@ std::unique_ptr Impl; void collapseToUnused(Scop &S) { - Impl = make_unique(&S); + Impl = make_unique( + &S, &getAnalysis().getORE()); if (!Impl->computeZone()) { DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); @@ -1631,6 +1680,7 @@ virtual void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredTransitive(); + AU.addRequired(); AU.setPreservesAll(); } Index: test/DeLICM/reject_loadafterstore.ll =================================================================== --- /dev/null +++ test/DeLICM/reject_loadafterstore.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-delicm -analyze -pass-remarks-missed=polly-delicm < %s 2>&1 | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + %dummy = load double, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: load after store of same element in same statement Index: test/DeLICM/reject_outofquota.ll =================================================================== --- /dev/null +++ test/DeLICM/reject_outofquota.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-delicm -analyze -pass-remarks-analysis=polly-delicm -polly-delicm-max-ops=1 < %s 2>&1 | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: maximal number of operations exceeded during zone analysis Index: test/DeLICM/reject_storeafterstore.ll =================================================================== --- /dev/null +++ test/DeLICM/reject_storeafterstore.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-delicm -analyze -pass-remarks-missed=polly-delicm < %s 2>&1 | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double 0.0, double* %A_idx + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: store after store of same element in same statement Index: test/DeLICM/reject_storeinsubregion.ll =================================================================== --- /dev/null +++ test/DeLICM/reject_storeinsubregion.ll @@ -0,0 +1,75 @@ +; RUN: opt %loadPolly -polly-delicm -analyze -pass-remarks-missed=polly-delicm < %s 2>&1 | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + %A_idxp = getelementptr inbounds double, double* %A, i32 %j + %add.cmp = fcmp ogt double %add, 42.0 + br i1 %add.cmp , label %body_true, label %body_false + + body_true: + %dummy = load double, double* %A_idxp + br label %reduction.inc + + body_false: + store double 0.0, double* %A_idxp + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: store is in a non-affine subregion Index: test/DeLICM/reject_unusualstore.ll =================================================================== --- /dev/null +++ test/DeLICM/reject_unusualstore.ll @@ -0,0 +1,71 @@ +; RUN: opt %loadPolly -polly-delicm -analyze -pass-remarks-missed=polly-delicm < %s 2>&1 | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) + +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + %tmp = bitcast double* %A_idx to i8* + call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 8, i32 1, i1 false) + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + store double %phi, double* %A_idx + %dummy = load double, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: encountered write that is not a StoreInst: call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 8, i32 1, i1 false)