Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -55,6 +55,7 @@ llvm::Pass *createIslScheduleOptimizerPass(); llvm::Pass *createFlattenSchedulePass(); llvm::Pass *createDeLICMPass(); +llvm::Pass *createMaximalStaticExpansionPass(); extern char &CodePreparationID; } // namespace polly @@ -88,6 +89,7 @@ polly::createPPCGCodeGenerationPass(); #endif polly::createIslScheduleOptimizerPass(); + polly::createMaximalStaticExpansionPass(); polly::createFlattenSchedulePass(); polly::createDeLICMPass(); polly::createDumpModulePass("", true); @@ -109,6 +111,7 @@ void initializePPCGCodeGenerationPass(llvm::PassRegistry &); #endif void initializeIslScheduleOptimizerPass(llvm::PassRegistry &); +void initializeMaximalStaticExpanderPass(llvm::PassRegistry &); void initializePollyCanonicalizePass(llvm::PassRegistry &); void initializeFlattenSchedulePass(llvm::PassRegistry &); void initializeDeLICMPass(llvm::PassRegistry &); Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -654,9 +654,6 @@ /// @param ElementSize The size of one element accessed. void computeBoundsOnAccessRelation(unsigned ElementSize); - /// Get the original access function as read from IR. - __isl_give isl_map *getOriginalAccessRelation() const; - /// Return the space in which the access relation lives in. __isl_give isl_space *getOriginalAccessRelationSpace() const; @@ -840,6 +837,9 @@ /// Get an isl string representing the access function read from IR. std::string getOriginalAccessRelationStr() const; + /// Get the original access function as read from IR. + __isl_give isl_map *getOriginalAccessRelation() const; + /// Get an isl string representing a new access function, if available. std::string getNewAccessRelationStr() const; Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -60,6 +60,7 @@ Transform/FlattenAlgo.cpp Transform/DeLICM.cpp Transform/Simplify.cpp + Transform/MaximalStaticExpansion.cpp ${POLLY_HEADER_FILES} ) set_target_properties(PollyCore PROPERTIES FOLDER "Polly") Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -140,6 +140,11 @@ cl::desc("Import the polyhedral description of the detected Scops"), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt FullyIndexedStaticExpansion( + "polly-mse", + cl::desc("Fully expand the memory accesses of the detected Scops"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::opt ExportJScop( "polly-export", cl::desc("Export the polyhedral description of the detected Scops"), @@ -237,6 +242,7 @@ initializeDependenceInfoWrapperPassPass(Registry); initializeJSONExporterPass(Registry); initializeJSONImporterPass(Registry); + initializeMaximalStaticExpanderPass(Registry); initializeIslAstInfoWrapperPassPass(Registry); initializeIslScheduleOptimizerPass(Registry); initializePollyCanonicalizePass(Registry); @@ -313,6 +319,9 @@ if (DeadCodeElim) PM.add(polly::createDeadCodeElimPass()); + if (FullyIndexedStaticExpansion) + PM.add(polly::createMaximalStaticExpansionPass()); + if (EnablePruneUnprofitable) PM.add(polly::createPruneUnprofitablePass()); Index: lib/Transform/MaximalStaticExpansion.cpp =================================================================== --- /dev/null +++ lib/Transform/MaximalStaticExpansion.cpp @@ -0,0 +1,349 @@ +//===---------------- MaximalStaticExpansion.cpp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass fully expand the memory accesses of a Scop to get rid of +// dependencies. +// +//===----------------------------------------------------------------------===// + +#include "polly/DependenceInfo.h" +#include "polly/FlattenAlgo.h" +#include "polly/LinkAllPasses.h" +#include "polly/Options.h" +#include "polly/ScopInfo.h" +#include "polly/Support/GICHelper.h" +#include "polly/Support/ISLOStream.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace polly; + +namespace { +class MaximalStaticExpander : public ScopPass { +public: + static char ID; + explicit MaximalStaticExpander() : ScopPass(ID) {} + + ~MaximalStaticExpander() {} + + /// Expand the accesses of the SCoP @p S. + bool runOnScop(Scop &S) override; + + /// Print the SCoP @p S. + void printScop(raw_ostream &OS, Scop &S) const override; + + /// Register all analyses and transformations required. + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + // The set of not expandable SAI. + std::set NotExpandables; + + // Check which SAI from SCoP @p S is expandable. + // The parameter @p Dependences is the RAW dependences of the SCoP @p S. + void checkExpendability(Scop &S, isl::union_map &Dependences); + + // Return true if the @p SAI in parameter is expandable. + bool isExpandable(const ScopArrayInfo *SAI); + + // Expand the write memory access @p MA belonging to the SCoP @p S. + void expandWrite(Scop &S, MemoryAccess *MA); + + // Expand the read memory access @p MAP belonging to the SCoP @p S. + // The parameter @p Writes is all the write memory accesses of the SCoP @p S. + // The parameter @p Dependences is the RAW dependences of the SCoP @p S. + void expandRead(Scop &S, MemoryAccess *MA, std::set &Writes, + isl::union_map &Dependences); +}; +} // namespace + +char MaximalStaticExpander::ID = 0; + +bool MaximalStaticExpander::isExpandable(const ScopArrayInfo *SAI) { + return (NotExpandables.find(SAI) == NotExpandables.end()); +} + +void MaximalStaticExpander::checkExpendability(Scop &S, + isl::union_map &Dependences) { + for (auto &SAI : S.arrays()) { + int NumberWrites = 0; + bool NotExpandable = false; + for (ScopStmt &Stmt : S) { + for (MemoryAccess *MA : Stmt) { + + // Check if the current MemoryAccess involved the current SAI + if (SAI != MA->getLatestScopArrayInfo()) { + continue; + } + + // For now, we are not able to expand Scalar + if (MA->isLatestScalarKind() and 0) { + errs() << "MSE ERROR : " << SAI->getName() + << " is a Scalar access. \n"; + NotExpandable = true; + } + + // For now, we are not able to expand MayWrite + if (MA->isMayWrite()) { + errs() << "MSE ERROR : " << SAI->getName() + << " has a maywrite access. \n"; + NotExpandable = true; + } + + // For now, we are not able to expand SAI with more than one write + if (MA->isMustWrite()) { + NumberWrites++; + if (NumberWrites > 1) { + errs() << "MSE ERROR : " << SAI->getName() + << " has more than 1 write access. \n"; + NotExpandable = true; + } + } + + // Check if it is possible to extand this read + if (MA->isRead()) { + + // Get the domain of the current ScopStmt + auto StmtDomain = isl::give(Stmt.getDomain()); + + // Get the domain of the future Read access + auto ReadDomainSet = + isl::give(isl_map_domain(MA->getAccessRelation())); + auto ReadDomain = isl::union_set(ReadDomainSet); + auto CurrentReadWriteDependences = + Dependences.reverse().intersect_domain(ReadDomain); + auto DepsDomain = CurrentReadWriteDependences.domain(); + + unsigned NumberElementMap = 0; + CurrentReadWriteDependences.foreach_map( + [=, &NumberElementMap](isl::map Map) -> isl::stat { + NumberElementMap++; + return isl::stat::ok; + }); + + // If there are multiple maps in the Deps, we cannot handle this case + // for now + if (NumberElementMap != 1) { + errs() << "MSE ERROR : " << SAI->getName() + << " has too many dependences to be handle for now. \n"; + NotExpandable = true; + } + + auto DepsDomainSet = isl::set(DepsDomain); + + // Partial read accesses are not handled by Polly + if (!StmtDomain.is_subset(DepsDomainSet)) { + errs() << "MSE ERROR : " << SAI->getName() + << " expansion leads to a partial read access. \n"; + NotExpandable = true; + } + } + } + } + + // No need to expand SAI with no write + if (NumberWrites == 0) { + errs() << "MSE ERROR : " << SAI->getName() << " has 0 access. \n"; + NotExpandable = true; + } + if (NotExpandable) { + NotExpandables.insert(SAI); + } + } +} + +void MaximalStaticExpander::expandRead(Scop &S, MemoryAccess *MA, + std::set &Writes, + isl::union_map &Dependences) { + + // Get RAW dependences for the current WA + auto WriteDomainSet = isl::give(isl_map_domain(MA->getAccessRelation())); + auto WriteDomain = isl::union_set(WriteDomainSet); + + auto CurrentReadWriteDependences = + Dependences.reverse().intersect_domain(WriteDomain); + + // If no dependences, no need to modify anything + if (CurrentReadWriteDependences.is_empty()) { + return; + } + + auto NewAccessMap = isl::map::from_union_map(CurrentReadWriteDependences); + + isl::id Id; + + // Get the in and out ID of the read relation we want to expand + auto ReadAccessRelation = isl::give(MA->getAccessRelation()); + auto ReadOutId = ReadAccessRelation.get_tuple_id(isl::dim::out); + auto NewOutId = NewAccessMap.get_tuple_id(isl::dim::out); + + // Find the name of the expanded corresponding SAI + for (auto Write : Writes) { + auto WriteAccessRelation = isl::give(Write->getOriginalAccessRelation()); + auto WriteOutId = WriteAccessRelation.get_tuple_id(isl::dim::out); + auto WriteInId = WriteAccessRelation.get_tuple_id(isl::dim::in); + + if (ReadOutId.keep() == WriteOutId.keep() && + NewOutId.keep() == WriteInId.keep()) { + auto NewWriteAccessRelation = isl::give(Write->getLatestAccessRelation()); + Id = NewWriteAccessRelation.get_tuple_id(isl::dim::out); + break; + } + } + + // Replace the out tuple id with the one of the access array + NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, Id); + + // Set the new access relation + MA->setNewAccessRelation(NewAccessMap.copy()); +} + +void MaximalStaticExpander::expandWrite(Scop &S, MemoryAccess *MA) { + + // Get the current AM + auto CurrentAccessMap = isl::give(MA->getAccessRelation()); + + // If the access is already fully expanded, do nothing + unsigned in_dimensions = CurrentAccessMap.dim(isl::dim::in); + unsigned out_dimensions = CurrentAccessMap.dim(isl::dim::out); + if (in_dimensions == out_dimensions) { + return; + } + + // Get domain from the current AM + auto Domain = CurrentAccessMap.domain(); + + // Create a new AM from the domain + auto NewAccessMap = isl::map::from_domain(Domain); + + // Add dimensions to the new AM according to the current in_dim + // Fully indexed expansion + NewAccessMap = NewAccessMap.add_dims(isl::dim::out, in_dimensions); + + // Create the string representing the name of the new SAI + // One new SAI for each statement so that each write go to a different memory + // cell + auto CurrentStmtDomain = isl::give(MA->getStatement()->getDomain()); + auto CurrentStmtName = CurrentStmtDomain.get_tuple_name(); + auto CurrentOutId = CurrentAccessMap.get_tuple_id(isl::dim::out); + std::string CurrentOutIdString = + MA->getScopArrayInfo()->getName() + "_" + CurrentStmtName + "_expanded"; + + // Set the tuple id for the out dimension + NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, CurrentOutId); + + // Create the size vector + // For now, use getSE but will use ISL in a future revision + // Waiting for methods from FlattenAlgo to be available + std::vector SCEVSizes; + auto ScopStmt = MA->getStatement(); + for (unsigned i = 0; i < ScopStmt->getNumIterators(); i++) { + auto Loop = ScopStmt->getLoopForDimension(i); + auto SCEV = S.getSE()->getMaxBackedgeTakenCount( + Loop); // +1 but this will change later to the ISL version + SCEVSizes.push_back(SCEV); + } + + // Get the ElementType of the current SAI + auto ElementType = MA->getOriginalScopArrayInfo()->getElementType(); + + // Create (or get if already existing) the new expanded SAI + auto ExpandedSAI = + S.getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes, + MemoryKind::Array, CurrentOutIdString.c_str()); + ExpandedSAI->setIsOnHeap(true); + + // Get the out Id of the expanded Array + auto NewOutId = isl::give(ExpandedSAI->getBasePtrId()); + + // Set the out id of the new AM to the new SAI id + NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, NewOutId); + + // Add constraints to linked output with input id + auto SpaceMap = NewAccessMap.get_space(); + auto ls = isl::local_space(SpaceMap); + for (unsigned dim = 0; dim < in_dimensions; dim++) { + auto Constraints = isl::constraint::alloc_equality(ls); + Constraints = Constraints.set_coefficient_si(isl::dim::out, dim, 1); + Constraints = Constraints.set_coefficient_si(isl::dim::in, dim, -1); + NewAccessMap = NewAccessMap.add_constraint(Constraints); + } + + // Set the new access relation map + MA->setNewAccessRelation(NewAccessMap.copy()); +} + +bool MaximalStaticExpander::runOnScop(Scop &S) { + + // Get the RAW Dependences + auto &DI = getAnalysis(); + auto &D = DI.getDependences(Dependences::AL_Statement); + auto Dependences = isl::give(D.getDependences(Dependences::TYPE_RAW)); + + // Check for each SAI if we can expand it + checkExpendability(S, Dependences); + + // Writes MemoryAccess + std::set Writes; + + // Expand all expandable write MemoryAccesses + for (ScopStmt &Stmt : S) { + for (MemoryAccess *MA : Stmt) { + + // Check if we can expand this MemoryAccess + auto SAI = MA->getLatestScopArrayInfo(); + if (!isExpandable(SAI)) { + continue; + } + + if (MA->isWrite()) { + expandWrite(S, MA); + Writes.insert(MA); + } + } + } + + // Expand all expandable read MemoryAccesses + for (ScopStmt &Stmt : S) { + for (MemoryAccess *MA : Stmt) { + + // Check if we can expand this MemoryAccess + auto SAI = MA->getLatestScopArrayInfo(); + if (!isExpandable(SAI)) { + continue; + } + + if (MA->isRead()) { + expandRead(S, MA, Writes, Dependences); + } + } + } + + return false; +} + +void MaximalStaticExpander::printScop(raw_ostream &OS, Scop &S) const { + S.print(OS); +} + +void MaximalStaticExpander::getAnalysisUsage(AnalysisUsage &AU) const { + ScopPass::getAnalysisUsage(AU); + AU.addRequired(); +} + +Pass *polly::createMaximalStaticExpansionPass() { + return new MaximalStaticExpander(); +} + +INITIALIZE_PASS_BEGIN(MaximalStaticExpander, "polly-opt-mse", + "Polly - Maximal static expansion of SCoP", false, false); +INITIALIZE_PASS_DEPENDENCY(DependenceInfo); +INITIALIZE_PASS_END(MaximalStaticExpander, "polly-opt-mse", + "Polly - Maximal static expansion of SCoP", false, false) Index: test/MaximalStaticExpansion/partial_access.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/partial_access.ll @@ -0,0 +1,106 @@ +; RUN: opt -polly-canonicalize %loadPolly -analyze -polly-opt-mse < %s 2>&1 | FileCheck %s +; +; Verify that Polly detects problems and does not expand the array +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; double mse(double A[Ni], double B[Nj]) { +; int i; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; for (int j = 2; j MemRef_B_Stmt_for_body3_expanded[i0, i1] }; +; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define double @mse(double* %A, double* %B) { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + %j = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store double 6.000000e+00, double* %tmp, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc8, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 2000 + br i1 %cmp, label %for.body, label %for.end10 + +for.body: ; preds = %for.cond + store i32 2, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 3000 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %2 = load i32, i32* %j, align 4 + %conv = sitofp i32 %2 to double + %3 = load double*, double** %B.addr, align 8 + %4 = load i32, i32* %j, align 4 + %sub = sub nsw i32 %4, 1 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom + store double %conv, double* %arrayidx, align 8 + br label %for.inc + +for.inc: ; preds = %for.body3 + %5 = load i32, i32* %j, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %6 = load double*, double** %B.addr, align 8 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4 + %8 = load double, double* %arrayidx5, align 8 + %9 = load double*, double** %A.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6 + store double %8, double* %arrayidx7, align 8 + br label %for.inc8 + +for.inc8: ; preds = %for.end + %11 = load i32, i32* %i, align 4 + %inc9 = add nsw i32 %11, 1 + store i32 %inc9, i32* %i, align 4 + br label %for.cond + +for.end10: ; preds = %for.cond + %12 = load double, double* %tmp, align 8 + ret double %12 +} + Index: test/MaximalStaticExpansion/too_many_writes.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/too_many_writes.ll @@ -0,0 +1,113 @@ +; RUN: opt -polly-canonicalize %loadPolly -analyze -polly-opt-mse < %s 2>&1 | FileCheck %s +; +; Verify that Polly detects problems and does not expand the array +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; double mse(double A[Ni], double B[Nj]) { +; int i; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; B[i] += 2; +; for (int j = 2; j MemRef_B_Stmt_for_body3_expanded[i0, i1] }; +; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define double @mse(double* %A, double* %B) { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + %j = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store double 6.000000e+00, double* %tmp, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 2000 + br i1 %cmp, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + %1 = load double*, double** %B.addr, align 8 + %2 = load i32, i32* %i, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds double, double* %1, i64 %idxprom + %3 = load double, double* %arrayidx, align 8 + %add = fadd double %3, 2.000000e+00 + store double %add, double* %arrayidx, align 8 + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %4 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %4, 3000 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %5 = load i32, i32* %j, align 4 + %conv = sitofp i32 %5 to double + %6 = load double*, double** %B.addr, align 8 + %7 = load i32, i32* %j, align 4 + %idxprom4 = sext i32 %7 to i64 + %arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4 + store double %conv, double* %arrayidx5, align 8 + br label %for.inc + +for.inc: ; preds = %for.body3 + %8 = load i32, i32* %j, align 4 + %inc = add nsw i32 %8, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %9 = load double*, double** %B.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6 + %11 = load double, double* %arrayidx7, align 8 + %12 = load double*, double** %A.addr, align 8 + %13 = load i32, i32* %i, align 4 + %idxprom8 = sext i32 %13 to i64 + %arrayidx9 = getelementptr inbounds double, double* %12, i64 %idxprom8 + store double %11, double* %arrayidx9, align 8 + br label %for.inc10 + +for.inc10: ; preds = %for.end + %14 = load i32, i32* %i, align 4 + %inc11 = add nsw i32 %14, 1 + store i32 %inc11, i32* %i, align 4 + br label %for.cond + +for.end12: ; preds = %for.cond + %15 = load double, double* %tmp, align 8 + ret double %15 +} Index: test/MaximalStaticExpansion/working_expansion.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/working_expansion.ll @@ -0,0 +1,101 @@ +; RUN: opt -polly-canonicalize %loadPolly -analyze -polly-opt-mse < %s | FileCheck %s +; +; Verify that the accesses are correctly expanded +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; double mse(double A[Ni], double B[Nj]) { +; int i; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; for (int j = 0; j MemRef_B_Stmt_for_body3_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded[i0, i0] }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define double @mse(double* %A, double* %B) { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + %j = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store double 6.000000e+00, double* %tmp, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc8, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 2000 + br i1 %cmp, label %for.body, label %for.end10 + +for.body: ; preds = %for.cond + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 3000 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %2 = load i32, i32* %j, align 4 + %conv = sitofp i32 %2 to double + %3 = load double*, double** %B.addr, align 8 + %4 = load i32, i32* %j, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom + store double %conv, double* %arrayidx, align 8 + br label %for.inc + +for.inc: ; preds = %for.body3 + %5 = load i32, i32* %j, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %6 = load double*, double** %B.addr, align 8 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4 + %8 = load double, double* %arrayidx5, align 8 + %9 = load double*, double** %A.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6 + store double %8, double* %arrayidx7, align 8 + br label %for.inc8 + +for.inc8: ; preds = %for.end + %11 = load i32, i32* %i, align 4 + %inc9 = add nsw i32 %11, 1 + store i32 %inc9, i32* %i, align 4 + br label %for.cond + +for.end10: ; preds = %for.cond + %12 = load double, double* %tmp, align 8 + ret double %12 +} +