Index: polly/trunk/include/polly/CodeGen/BlockGenerators.h =================================================================== --- polly/trunk/include/polly/CodeGen/BlockGenerators.h +++ polly/trunk/include/polly/CodeGen/BlockGenerators.h @@ -329,6 +329,25 @@ ValueMapT &BBMap, __isl_keep isl_id_to_ast_expr *NewAccesses); + /// When statement tracing is enable, build the print instructions for + /// printing the current statement instance and its input scalars. + /// + /// The printed output looks like: + /// + /// Stmt1(0) + /// + /// If printing of scalars is enabled, it also appends the value of each + /// scalar to the line: + /// + /// Stmt1(0) %i=1 %sum=5 + /// + /// @param Stmt The statement we generate code for. + /// @param LTS A mapping from loops virtual canonical induction + /// variable to their new values. + /// @param BBMap A mapping from old values to their new values in this block. + void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT <S, + ValueMapT &BBMap); + /// Generate instructions that compute whether one instance of @p Set is /// executed. /// Index: polly/trunk/include/polly/CodeGen/RuntimeDebugBuilder.h =================================================================== --- polly/trunk/include/polly/CodeGen/RuntimeDebugBuilder.h +++ polly/trunk/include/polly/CodeGen/RuntimeDebugBuilder.h @@ -30,6 +30,28 @@ /// run time. struct RuntimeDebugBuilder { + /// Generate a constant string into the builder's llvm::Module which can be + /// passed to createGPUPrinter() or createGPUPrinter(). + /// + /// @param Builder The builder used to emit the printer calls. + /// @param Str The string to be printed. + + /// @return A global containing @p Str. + static llvm::Value *getPrintableString(PollyIRBuilder &Builder, + llvm::StringRef Str) { + // TODO: Get rid of magic number 4. It it NVPTX's constant address space and + // works on X86 (CPU) only because its backend ignores the address space. + return Builder.CreateGlobalStringPtr(Str, "", 4); + } + + /// Return whether an llvm::Value of the type @p Ty is printable for + /// debugging. + /// + /// That is, whether such a value can be passed to createGPUPrinter() or + /// createGPUPrinter() to be dumped as runtime. If false is returned, those + /// functions will fail. + static bool isPrintable(llvm::Type *Ty); + /// Print a set of LLVM-IR Values or StringRefs via printf /// /// This function emits a call to printf that will print the given arguments. @@ -78,7 +100,7 @@ static void createPrinter(PollyIRBuilder &Builder, bool UseGPU, std::vector &Values, llvm::StringRef String, Args... args) { - Values.push_back(Builder.CreateGlobalStringPtr(String, "", 4)); + Values.push_back(getPrintableString(Builder, String)); createPrinter(Builder, UseGPU, Values, args...); } Index: polly/trunk/lib/CodeGen/BlockGenerators.cpp =================================================================== --- polly/trunk/lib/CodeGen/BlockGenerators.cpp +++ polly/trunk/lib/CodeGen/BlockGenerators.cpp @@ -51,6 +51,17 @@ cl::location(PollyDebugPrinting), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt TraceStmts( + "polly-codegen-trace-stmts", + cl::desc("Add printf calls that print the statement being executed"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + +static cl::opt TraceScalars( + "polly-codegen-trace-scalars", + cl::desc("Add printf calls that print the values of all scalar values " + "used in a statement. Requires -polly-codegen-trace-stmts."), + cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + BlockGenerator::BlockGenerator( PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap, @@ -436,6 +447,7 @@ BasicBlock *CopyBB = splitBB(BB); Builder.SetInsertPoint(&CopyBB->front()); generateScalarLoads(Stmt, LTS, BBMap, NewAccesses); + generateBeginStmtTrace(Stmt, LTS, BBMap); copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); @@ -647,6 +659,108 @@ Builder.SetInsertPoint(TailBlock, TailBlock->getFirstInsertionPt()); } +static std::string getInstName(Value *Val) { + std::string Result; + raw_string_ostream OS(Result); + Val->printAsOperand(OS, false); + return OS.str(); +} + +void BlockGenerator::generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT <S, + ValueMapT &BBMap) { + if (!TraceStmts) + return; + + Scop *S = Stmt.getParent(); + const char *BaseName = Stmt.getBaseName(); + + isl::ast_build AstBuild = Stmt.getAstBuild(); + isl::set Domain = Stmt.getDomain(); + + isl::union_map USchedule = AstBuild.get_schedule().intersect_domain(Domain); + isl::map Schedule = isl::map::from_union_map(USchedule); + assert(Schedule.is_empty().is_false() && + "The stmt must have a valid instance"); + + isl::multi_pw_aff ScheduleMultiPwAff = + isl::pw_multi_aff::from_map(Schedule.reverse()); + isl::ast_build RestrictedBuild = AstBuild.restrict(Schedule.range()); + + // Sequence of strings to print. + SmallVector Values; + + // Print the name of the statement. + // TODO: Indent by the depth of the statement instance in the schedule tree. + Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, BaseName)); + Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "(")); + + // Add the coordinate of the statement instance. + int DomDims = ScheduleMultiPwAff.dim(isl::dim::out); + for (int i = 0; i < DomDims; i += 1) { + if (i > 0) + Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ",")); + + isl::ast_expr IsInSet = + RestrictedBuild.expr_from(ScheduleMultiPwAff.get_pw_aff(i)); + Values.push_back(ExprBuilder->create(IsInSet.copy())); + } + + if (TraceScalars) { + Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")")); + DenseSet Encountered; + + // Add the value of each scalar (and the result of PHIs) used in the + // statement. + // TODO: Values used in region-statements. + for (Instruction *Inst : Stmt.insts()) { + if (!RuntimeDebugBuilder::isPrintable(Inst->getType())) + continue; + + if (isa(Inst)) { + Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, " ")); + Values.push_back(RuntimeDebugBuilder::getPrintableString( + Builder, getInstName(Inst))); + Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "=")); + Values.push_back(getNewValue(Stmt, Inst, BBMap, LTS, + LI.getLoopFor(Inst->getParent()))); + } else { + for (Value *Op : Inst->operand_values()) { + // Do not print values that cannot change during the execution of the + // SCoP. + auto *OpInst = dyn_cast(Op); + if (!OpInst) + continue; + if (!S->contains(OpInst)) + continue; + + // Print each scalar at most once, and exclude values defined in the + // statement itself. + if (Encountered.count(OpInst)) + continue; + + Values.push_back( + RuntimeDebugBuilder::getPrintableString(Builder, " ")); + Values.push_back(RuntimeDebugBuilder::getPrintableString( + Builder, getInstName(OpInst))); + Values.push_back( + RuntimeDebugBuilder::getPrintableString(Builder, "=")); + Values.push_back(getNewValue(Stmt, OpInst, BBMap, LTS, + LI.getLoopFor(Inst->getParent()))); + Encountered.insert(OpInst); + } + } + + Encountered.insert(Inst); + } + + Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "\n")); + } else { + Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")\n")); + } + + RuntimeDebugBuilder::createCPUPrinter(Builder, ArrayRef(Values)); +} + void BlockGenerator::generateScalarStores( ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap, __isl_keep isl_id_to_ast_expr *NewAccesses) { @@ -1375,6 +1489,7 @@ ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy]; generateScalarLoads(Stmt, LTS, EntryBBMap, IdToAstExp); + generateBeginStmtTrace(Stmt, LTS, EntryBBMap); for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) { Index: polly/trunk/lib/CodeGen/RuntimeDebugBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/RuntimeDebugBuilder.cpp +++ polly/trunk/lib/CodeGen/RuntimeDebugBuilder.cpp @@ -103,6 +103,19 @@ createCPUPrinterT(Builder, Values); } +bool RuntimeDebugBuilder::isPrintable(Type *Ty) { + if (Ty->isFloatingPointTy()) + return true; + + if (Ty->isIntegerTy()) + return Ty->getIntegerBitWidth() <= 64; + + if (isa(Ty)) + return true; + + return false; +} + static std::tuple> prepareValuesForPrinting(PollyIRBuilder &Builder, ArrayRef Values) { std::string FormatString; Index: polly/trunk/test/Isl/CodeGen/RuntimeDebugBuilder/stmt_tracing.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/RuntimeDebugBuilder/stmt_tracing.ll +++ polly/trunk/test/Isl/CodeGen/RuntimeDebugBuilder/stmt_tracing.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-codegen-trace-stmts -polly-codegen-trace-scalars -polly-codegen -S < %s | FileCheck %s +; + +define void @func(i32 %n, double* %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double 0.0, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: @0 = private unnamed_addr addrspace(4) constant [10 x i8] c"Stmt_body\00" +; CHECK: @1 = private unnamed_addr addrspace(4) constant [2 x i8] c"(\00" +; CHECK: @2 = private unnamed_addr addrspace(4) constant [2 x i8] c")\00" +; CHECK: @3 = private unnamed_addr addrspace(4) constant [2 x i8] c"\0A\00" +; CHECK: @4 = private unnamed_addr constant [12 x i8] c"%s%s%ld%s%s\00" + +; CHECK: polly.stmt.body: +; CHECK: call i32 (...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @4, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([10 x i8], [10 x i8] addrspace(4)* @0, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @1, i32 0, i32 0), i64 %polly.indvar, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @2, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @3, i32 0, i32 0)) +; CHECK-NEXT: call i32 @fflush(i8* null)