Index: include/polly/DeLICM.h =================================================================== --- /dev/null +++ include/polly/DeLICM.h @@ -0,0 +1,264 @@ +//===------ DeLICM.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Undo the effect of Loop Invariant Code Motion (LICM) and +// GVN Partial Redundancy Elimination (PRE) on SCoP-level. +// +// Namely, remove register/scalar dependencies by mapping them back to array +// elements. +// +//===----------------------------------------------------------------------===// + +#ifndef POLLY_DELICM_H +#define POLLY_DELICM_H + +#include "polly/Support/GICHelper.h" + +namespace llvm { +class PassRegistry; +class Pass; +} // anonymous namespace + +namespace polly { +class Scop; + +/// Compute the reaching definition statement for each definition of an array +/// element. +/// +/// The reaching definition of an array element at a specific timepoint is the +/// statement instance that had written the current element's content. This +/// function computes all reaching definitions for all array elements and +/// timepoints. For example: +/// +/// Schedule := { Write[] -> [0]; Overwrite[] -> [10] } +/// Defs := { Write[] -> A[5]; Overwrite[] -> A[5] } +/// +/// That is, index 5 of array A will be written to at timepoint 0 and 10. The +/// result will be: +/// +/// { [A[5] -> [i]] -> Write[] : 0 < i < 10; +/// [A[5] -> [i]] -> Overwrite[] : 10 < i } +/// +/// That is, between timepoint 0 (Write[]) and timepoint 10 (Overwrite[]), the +/// content of A[5] was written by statement instance Write[] and after +/// timepoint 10 by Overwrite[]. Values not defined in the map have no known +/// definition. This includes the statements instance timepoints themselves, +/// because reads in those timepoints could either read the old or the new +/// value, defined only by the statement itself. But this can be changed by @p +/// InclDef and @p InclRedef. InclDef=false and InclRedef=true will return a +/// zone. Unless @p InclDef and @p InclRedef are both true, there is only one +/// unique definition per element and timepoint. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// Schedule of (at least) all array writes. Instances not in +/// @p Writes will be ignored. +/// @param Writes { DomainWrite[] -> Element[] } +/// Elements written to by the statement instances. +/// @param InclDef Whether to include the definition's timepoint as where the +/// element is well-defined (any load at that timepoint would +/// happen the writes). In the example, enabling adds +/// { [A[5] -> [0]] -> Write[]; [A[5] -> [10]] -> Overwrite[] } +/// to the result. +/// @param InclRedef Whether to assume that at the timepoint where an element +/// is overwritten, it still contains the old value (any load +/// at +/// that timepoint would happen before the overwrite). In this +/// example, enabling this adds +/// { [A[] -> [10]] -> Write[] } to the result. +/// +/// @return { [Element[] -> Scatter[]] -> DomainWrite[] } +/// The reaching definitions as described above, or nullptr if either @p +/// Schedule or @p Writes is nullptr, or the ISL max operations count +/// has exceeded. +IslPtr computeReachingDefinition(IslPtr Schedule, + IslPtr Writes, + bool InclDef, bool InclRedef); + +/// Compute the timepoints from a write to its (last) use. +/// +/// Example: +/// Schedule := { Def[] -> [0]; Read[] -> [10]; } +/// Writes := { Def[] -> A[5] } +/// Reads := { Read[] -> A[5] } +/// +/// Result: +/// { [A[5] -> Write[]] -> [i] : 0 < i < 10 } +/// +/// Note: Lifetimes are expressed in terms of the preceding write. Hence, reads +/// before the first read cannot expressed by this function. +/// +/// @param Schedule { Domain[] -> Scatter[] } +/// The schedule of (at least) all statement instances +/// occurring in @p Writes or @p Reads. All other +/// instances will be ignored. +/// @param Writes { DomainDef[] -> Element[] } +/// Elements written to by the statement instances. +/// @param Reads { DomainRead[] -> Element[] } +/// Elements read from by the statement instances. +/// @param ReadEltInSameInst Whether a load will read the value from a write +/// that is scheduled at the same timepoint (Writes +/// happen before reads). +/// @param InclWrite Whether to also include the timepoint where a value +/// is written to the lifetime. If enabled for the example, it changes to { +/// [A[5] -> Def[]] -> [i] : 0 <= i < 10 }. +/// @param InclLastRead Whether to also include the timepoint where with +/// the last use to the lifetime. If enabled for the example, it changes to { +/// [A[5] -> Def[]] -> [i] : 0 < i <= 10 }. +/// @param ExitReads Whether to extend the lifetimes that are not +/// overwritten into infinity. This corresponds to the assumption that the +/// values must be available after the scop. If enabled, the example changes to +/// { [A[5] -> Def[]] -> [i] : 0 < i } +/// +/// @return { [Element[] -> DomainWrite[]] -> Zone[] } +IslPtr computeArrayLifetime(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool InclWrite, bool InclLastRead, + bool ExitReads); + +/// Compute the next overwrite for each array element. +/// +/// This is computeReachingDefinition() "in reverse"; Instead of looking to the +/// most recent write to an element, look for the next (over)write. For example: +/// +/// Schedule := { Write[] -> [0]; Overwrite[] -> [10] } +/// Writes := { Write[] -> A[5]; Overwrite[] -> A[5] } +/// +/// will result in: +/// +/// { [A[5] -> [i]] -> Write[] : i < 0; +/// [A[5] -> [i]] -> Overwrite[] : 0 < i < 10 } +/// +/// That is, A[5] will be overwritten next by Write[] when before timepoint 0, +/// or by Overwrite[] when between timepoints 0 and 10. Use InclPrevWrite=false +/// and InclOverwrite=true to interpret the result as a Zone. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// Schedule of (at least) all array writes. Instances not +/// in @p Writes will be ignored. +/// @param Writes { DomainWrite[] -> Element[] } +/// Elements written to by the statement instances. +/// @param InclPrevWrite Whether to extend an overwrite timepoints to include +/// the timepoint where the previous write happens (the +/// previous write would happen at the beginning of its +/// timepoint). In this example, this adds +/// { [A[5] -> [0]] -> Overwrite[] } to the result. +/// @param InclOverwrite Whether the reaching overwrite includes the timepoint +/// of the overwrite itself (so the overwrite would happen +/// at then end of its timepoint). In the example, this +/// adds +/// { [A[5] -> [0]] -> Write[]; +/// [A[5] -> [10]] -> Overwrite[] } +/// to the result. +/// +/// @return { [Element[] -> Scatter[]] -> DomainWrite[] } +/// The reaching overwrite as defined above, or nullptr if either @p +/// Schedule or @p Writes is nullptr, or the ISL max operations count +/// has exceeded. +IslPtr computeReachingOverwrite(IslPtr Schedule, + IslPtr Writes, + bool InclPrevWrite, + bool InclOverwrite); + +/// Compute the timepoints where the contents of an array element are not used. +/// +/// An element is unused at an timepoint when the element will be overwritten in +/// the future, but it is no read in between. Another way to express this: the +/// time from when the element is written, to the most recent read before it, or +/// infinitely into the past if there no read before. Such unused elements can +/// be overwritten by any value without changing the Scop's semantics. An +/// example: +/// +/// Schedule := { Read[] -> [0]; Write[] -> [10]; Def[] -> [20] } +/// Writes := { Write[] -> A[5]; Def[] -> A[6] } +/// Reads := { Read[] -> A[5] } +/// +/// The result will be: +/// +/// { A[5] -> [i] : 0 < i < 10; +/// A[6] -> [i] : i < 20 } +/// +/// That is, A[5] is unused between timepoint 0 (the read) and timepoint 10 (the +/// write). A[6] is unused before timepoint 20, but might be used after the +/// scop's execution (A[5] and any other A[i] as well). Use InclLastRead=false +/// and InclWrite=true to interpret the result as zone. +/// +/// @param Schedule { Domain[] -> Scatter[] } +/// The schedule of (at least) all statement instances +/// occurring in @p Writes or @p Reads. All other +/// instances will be ignored. +/// @param Writes { DomainWrite[] -> Element[] } +/// Elements written to by the statement instances. +/// @param Reads { DomainRead[] -> Element[] } +/// Elements read from by the statement instances. +/// @param ReadEltInSameInst Whether a load will read the value from a write +/// that is scheduled at the same timepoint (Writes +/// happen before reads). Otherwise, loads will use the +/// value of an element it had before the timepoint +/// (Reads before writes). For example: +/// { Read[] -> [0]; Write[] -> [0] } +/// With ReadEltInSameInst=false it is assumed that the +/// read happens before the write, such that the +/// element is never unused, or just at timepoint 0, +/// depending on InclLastRead/InclWrite. +/// With ReadEltInSameInst=false it assumes that the +/// value just written will be used. Anything before +/// timepoint 0 will be considered unused. +/// @param InclLastRead Whether the timepoint where an element is last read +/// counts as unused (the read happens at the beginning +/// of its timepoint, and nothing (else) can use it +/// during the timepoint). In the example, this option +/// adds { A[5] -> [0] } to the result. +/// @param InclWrite Whether the timepoint where an element is written +/// itself counts as unused (the write happens at the +/// end of its timepoint; no (other) operations uses +/// the element during the timepoint). In this example, +/// this adds +/// { A[5] -> [10]; A[6] -> [20] } to the result. +/// +/// @return { Element[] -> Scatter[] } +/// The unused timepoints as defined above, or nullptr if either @p +/// Schedule, @p Writes are @p Reads is nullptr, or the ISL max +/// operations count is exceeded. +IslPtr computeArrayUnused(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool InclLastRead, bool InclWrite); + +/// Determine whether two lifetimes are conflicting. +/// +/// Used to unit testing. +/// +/// @param ExistingLifetime { [Element[] -> Zone[]] -> ValInst[] } +/// @param ExistingImplicitLifetimeIsUnknown +/// @param ExistingWritten { [Element[] -> Scatter[]] -> ValInst[] } +/// @param ProposedLifetime { [Element[] -> Zone[]] -> ValInst[] } +/// @param ProposedImplicitLifetimeIsUnknown +/// @param ProposedWritten { [Element[] -> Scatter[]] -> ValInst[] } +/// +/// @param False, iff the lifetimes and writes can me merged. +/// +/// @see polly::Knowledge +bool isConflicting(IslPtr ExistingLifetime, + bool ExistingmplicitLifetimeIsUnknown, + IslPtr ExistingWrites, + IslPtr ProposedLifetime, + bool ProposedImplicitLifetimeIsUnknown, + IslPtr ProposedWrites); + +llvm::Pass *createDeLICMPass(); +} // namespace polly + +namespace llvm { +void initializeDeLICMPass(llvm::PassRegistry &); +} // namespace llvm + +#endif /* POLLY_DELICM_H */ Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -48,6 +48,7 @@ #endif llvm::Pass *createIslScheduleOptimizerPass(); llvm::Pass *createFlattenSchedulePass(); +llvm::Pass *createDeLICMPass(); extern char &CodePreparationID; } // namespace polly @@ -82,6 +83,7 @@ #endif polly::createIslScheduleOptimizerPass(); polly::createFlattenSchedulePass(); + polly::createDeLICMPass(); } } PollyForcePassLinking; // Force link by creating a global definition. } // namespace @@ -100,6 +102,7 @@ void initializeIslScheduleOptimizerPass(llvm::PassRegistry &); void initializePollyCanonicalizePass(llvm::PassRegistry &); void initializeFlattenSchedulePass(llvm::PassRegistry &); +void initializeDeLICMPass(llvm::PassRegistry &); } // namespace llvm #endif Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1037,6 +1037,10 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, MemoryAccess::ReductionType RT); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const ScopArrayInfo &SAI); + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const MemoryAccess &MA); + /// Ordered list type to hold accesses. using MemoryAccessList = std::forward_list; @@ -1363,6 +1367,11 @@ /// be eliminated too. void removeMemoryAccess(MemoryAccess *MA); + /// Remove @p MA from this statement. + /// + /// In contrast to removeMemoryAccess(), no other access will be eliminated. + void removeSingleMemoryAccess(MemoryAccess *MA); + typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; Index: include/polly/Support/GICHelper.h =================================================================== --- include/polly/Support/GICHelper.h +++ include/polly/Support/GICHelper.h @@ -18,6 +18,7 @@ #include "llvm/Support/raw_ostream.h" #include "isl/aff.h" #include "isl/ctx.h" +#include "isl/id.h" #include "isl/map.h" #include "isl/options.h" #include "isl/set.h" @@ -29,6 +30,8 @@ struct isl_schedule; struct isl_multi_aff; +char *isl_id_to_str(__isl_keep isl_id *mat); + namespace llvm { class Value; } // namespace llvm @@ -207,6 +210,7 @@ } \ }; +DECLARE_TRAITS(id) DECLARE_TRAITS(val) DECLARE_TRAITS(space) DECLARE_TRAITS(basic_map) @@ -216,6 +220,7 @@ DECLARE_TRAITS(set) DECLARE_TRAITS(union_set) DECLARE_TRAITS(aff) +DECLARE_TRAITS(multi_aff) DECLARE_TRAITS(pw_aff) DECLARE_TRAITS(union_pw_aff) DECLARE_TRAITS(multi_union_pw_aff) @@ -363,6 +368,12 @@ return OS; } +void foreachElt(NonowningIslPtr Map, + const std::function)> &F); + +void foreachElt(NonowningIslPtr Set, + const std::function)> &F); + /// Enumerate all isl_maps of an isl_union_map. /// /// This basically wraps isl_union_map_foreach_map() and allows to call back @@ -370,6 +381,9 @@ void foreachElt(NonowningIslPtr UMap, const std::function Map)> &F); +void foreachElt(NonowningIslPtr USet, + const std::function Set)> &F); + /// Enumerate all isl_pw_aff of an isl_union_pw_aff. /// /// This basically wraps isl_union_pw_aff(), but also allows to call back C++11 Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -107,7 +107,7 @@ static cl::opt UnprofitableScalarAccs( "polly-unprofitable-scalar-accs", cl::desc("Count statements with scalar accesses as not optimizable"), - cl::Hidden, cl::init(true), cl::cat(PollyCategory)); + cl::Hidden, cl::init(false), cl::cat(PollyCategory)); //===----------------------------------------------------------------------===// @@ -917,6 +917,68 @@ return OS; } +llvm::raw_ostream &polly::operator<<(llvm::raw_ostream &OS, + const ScopArrayInfo &SAI) { + OS << "ScopArrayInfo"; + return OS; +} + +llvm::raw_ostream &polly::operator<<(llvm::raw_ostream &OS, + const MemoryAccess &MA) { + OS << MA.getStatement()->getBaseName(); + + auto OrigKind = MA.getOriginalKind(); + switch (OrigKind) { + case ScopArrayInfo::MK_Value: + OS << " MK_Value"; + if (MA.isWrite()) { + OS << " Define " << MA.getScopArrayInfo()->getName() << " as "; + MA.getAccessValue()->printAsOperand(OS, false); + } else { + OS << " Use " << MA.getScopArrayInfo()->getName(); + } + break; + case ScopArrayInfo::MK_PHI: + case ScopArrayInfo::MK_ExitPHI: + OS << (OrigKind == ScopArrayInfo::MK_ExitPHI ? " MK_ExitPHI" : " MK_PHI"); + if (MA.isWrite()) { + OS << " Incoming " << MA.getScopArrayInfo()->getName() << " value "; + bool First = true; + for (auto Incoming : MA.getIncoming()) { + if (!First) + OS << " or "; + Incoming.second->printAsOperand(OS, false); + First = false; + } + } else { + OS << " Merge " << MA.getScopArrayInfo()->getName() << " as "; + MA.getAccessInstruction()->printAsOperand(OS, false); + } + break; + case ScopArrayInfo::MK_Array: + OS << " MK_Array"; + if (MA.isWrite()) { + OS << " Store "; + MA.getAccessValue()->printAsOperand(OS, false); + OS << " to " << give(MA.getAccessRelation()); + } else { + OS << " Load "; + MA.getAccessInstruction()->printAsOperand(OS, false); + OS << " from " << give(MA.getAccessRelation()); + } + break; + } + + if (MA.hasNewAccessRelation()) { + assert(MA.isLatestArrayKind()); + if (MA.isWrite()) + OS << " [new: " << give(MA.getAccessRelation()) << "]"; + else + OS << " [new: " << give(MA.getAccessRelation()) << "]"; + } + return OS; +} + void MemoryAccess::print(raw_ostream &OS) const { switch (AccType) { case READ: @@ -1028,15 +1090,14 @@ void MemoryAccess::setNewAccessRelation(__isl_take isl_map *NewAccess) { assert(NewAccess); + auto *OriginalDomainSpace = getStatement()->getDomainSpace(); #ifndef NDEBUG // Check domain space compatibility. auto *NewSpace = isl_map_get_space(NewAccess); auto *NewDomainSpace = isl_space_domain(isl_space_copy(NewSpace)); - auto *OriginalDomainSpace = getStatement()->getDomainSpace(); assert(isl_space_has_equal_tuples(OriginalDomainSpace, NewDomainSpace)); isl_space_free(NewDomainSpace); - isl_space_free(OriginalDomainSpace); // Check whether there is an access for every statement instance. auto *StmtDomain = getStatement()->getDomain(); @@ -1066,7 +1127,7 @@ #endif isl_map_free(NewAccessRelation); - NewAccessRelation = NewAccess; + NewAccessRelation = isl_map_align_params(NewAccess, OriginalDomainSpace); } //===----------------------------------------------------------------------===// @@ -1718,6 +1779,19 @@ InstructionToAccess.erase(MA->getAccessInstruction()); } +void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA) { + auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA); + assert(MAIt != MemAccs.end()); + MemAccs.erase(MAIt); + + auto It = InstructionToAccess.find(MA->getAccessInstruction()); + if (It != InstructionToAccess.end()) { + It->second.remove(MA); + if (It->second.empty()) + InstructionToAccess.erase(MA->getAccessInstruction()); + } +} + //===----------------------------------------------------------------------===// /// Scop class implement Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -57,6 +57,7 @@ Transform/ScheduleOptimizer.cpp Transform/FlattenSchedule.cpp Transform/FlattenAlgo.cpp + Transform/DeLICM.cpp ${POLLY_HEADER_FILES} ) Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -467,7 +467,7 @@ DT.dominates(cast(Address)->getParent(), Builder.GetInsertBlock())) && "Domination violation"); - BBMap[MA->getBaseAddr()] = + BBMap[MA->getAccessValue()] = Builder.CreateLoad(Address, Address->getName() + ".reload"); } } Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -724,6 +724,8 @@ "Generating new index expressions to indirect arrays not working"); auto Schedule = isl_ast_build_get_schedule(Build); + assert(!MA->getLatestScopArrayInfo()->getBasePtrOriginSAI() && + "Indirect access codegen not supported"); auto PWAccRel = MA->applyScheduleToAccessRelation(Schedule); auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); Index: lib/Support/GICHelper.cpp =================================================================== --- lib/Support/GICHelper.cpp +++ lib/Support/GICHelper.cpp @@ -177,6 +177,7 @@ replace(str, "\"", "_"); replace(str, " ", "__"); replace(str, "=>", "TO"); + replace(str, "+", "_"); } std::string polly::getIslCompatibleName(const std::string &Prefix, @@ -217,12 +218,41 @@ DEFINE_ISLPTR(set) DEFINE_ISLPTR(union_set) DEFINE_ISLPTR(aff) +DEFINE_ISLPTR(multi_aff) DEFINE_ISLPTR(pw_aff) // DEFINE_ISLPTR(union_pw_aff) /* There is no isl_union_pw_aff_dump() */ DEFINE_ISLPTR(multi_union_pw_aff) DEFINE_ISLPTR(union_pw_multi_aff) } +void polly::foreachElt(NonowningIslPtr Map, + const std::function)> &F) { + isl_map_foreach_basic_map( + Map.keep(), + [](__isl_take isl_basic_map *BMap, void *User) -> isl_stat { + auto &F = + *static_cast)> *>( + User); + F(give(BMap)); + return isl_stat_ok; + }, + const_cast(static_cast(&F))); +} + +void polly::foreachElt(NonowningIslPtr Set, + const std::function)> &F) { + isl_set_foreach_basic_set( + Set.keep(), + [](__isl_take isl_basic_set *BSet, void *User) -> isl_stat { + auto &F = + *static_cast)> *>( + User); + F(give(BSet)); + return isl_stat_ok; + }, + const_cast(static_cast(&F))); +} + void polly::foreachElt(NonowningIslPtr UMap, const std::function Map)> &F) { isl_union_map_foreach_map( @@ -236,6 +266,19 @@ const_cast(static_cast(&F))); } +void polly::foreachElt(NonowningIslPtr USet, + const std::function Set)> &F) { + isl_union_set_foreach_set( + USet.keep(), + [](__isl_take isl_set *Set, void *User) -> isl_stat { + auto &F = + *static_cast)> *>(User); + F(give(Set)); + return isl_stat_ok; + }, + const_cast(static_cast(&F))); +} + void polly::foreachElt(NonowningIslPtr UPwAff, const std::function)> &F) { isl_union_pw_aff_foreach_pw_aff( Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -23,6 +23,7 @@ #include "polly/Canonicalization.h" #include "polly/CodeGen/CodeGeneration.h" #include "polly/CodeGen/CodegenCleanup.h" +#include "polly/DeLICM.h" #include "polly/DependenceInfo.h" #include "polly/FlattenSchedule.h" #include "polly/LinkAllPasses.h" @@ -159,6 +160,11 @@ cl::desc("Enable polyhedral interface of Polly"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); +static cl::opt + EnableDeLICM("polly-enable-delicm", + cl::desc("Eliminate scalar loop carried dependences"), + cl::Hidden, cl::init(true), cl::cat(PollyCategory)); + namespace polly { void initializePollyPasses(PassRegistry &Registry) { initializeCodeGenerationPass(Registry); @@ -181,6 +187,7 @@ initializeScopInfoWrapperPassPass(Registry); initializeCodegenCleanupPass(Registry); initializeFlattenSchedulePass(Registry); + initializeDeLICMPass(Registry); } /// Register Polly passes such that they form a polyhedral optimizer. @@ -228,6 +235,9 @@ if (EnablePolyhedralInfo) PM.add(polly::createPolyhedralInfoPass()); + if (EnableDeLICM) + PM.add(polly::createDeLICMPass()); + if (ImportJScop) PM.add(polly::createJSONImporterPass()); Index: lib/Transform/DeLICM.cpp =================================================================== --- /dev/null +++ lib/Transform/DeLICM.cpp @@ -0,0 +1,2817 @@ +//===------ DeLICM.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Undo the effect of Loop Invariant Code Motion (LICM) and +// GVN Partial Redundancy Elimination (PRE) on SCoP-level. +// +// Namely, remove register/scalar dependencies by mapping them back to array +// elements. +// +// The algorithms here work on the scatter space - the image space of the +// schedule returned by Scop::getSchedule(). We call an element in that space an +// "timepoint". Timepoints are lexicographically ordered such that we can +// defined ranges in the scatter space. We use two flavors of such ranges: +// Timepoint sets and zones. A timepoint set is simply a subset of the scatter +// space and is directly stored as isl_set. +// +// Zones are used to describe the space between timepoints as open sets, ie. +// they +// do not contain the extrema. Using ISL rational sets to express these would be +// overkill. We also cannot store them as the integer timepoints they contain; +// the (nonempty) zone between 1 and 2 would be empty and not differentiable +// from eg. the zone between 3 and 4. Also, we cannot store the integer set +// including the extrema; the set ]1,2[ + ]3,4[ could be coalesced to ]1,3[, +// although we defined the range [2,3] to be not in the set. Instead, we store +// the "half-open" integer extrema, including the lower bound, but excluding the +// upper bound. Examples: +// +// * The set { [i] : 1 <= i <= 3 } represents the zone ]0,3[ (which contains the +// integer points 1 and 2) +// +// * { [1] } represents the zone ]0,1[ +// +// * { [i] : i = 1 or i = 3 } represents the zone ]0,1[ + ]2,3[ +// +// Therefore, an integer i in the set represents the zone ]i-1,i[, ie. strictly +// speaking the integer points never belong to the zone. However, depending an +// the interpretation, one might want to include them: +// +// * The timepoints adjacent to two unit zones: zoneToInsideInstances() +// +// * The timepoints before a unit zone begins: +// shiftDim(, isl_dim_in, -1, 1) +// +// * The timepoints that directly follow a unit zone: Reinterpret the zone as +// a set of timepoints. +// +// It sometimes helps think about a value of i stored in an isl_set to represent +// the timepoint i-0.5 between two integer-valued timepoints. +// @see zoneToInsideInstances() +// +// +// The code makes use of maps and sets in many different spaces. To not loose +// track in which space a set or map is expected to be in, variables holding an +// ISL reference are usually annotated in the comments. They roughly follow ISL +// syntax for spaces, but only the tuples, not the dimensions. The tuples have a +// meaning as follows: +// +// * Space[] - An unspecified tuple. Used for function parameters such that the +// function caller can use it for anything they like. +// +// * Domain[] - A statement instance as returned by ScopStmt::getDomain() +// isl_id_get_name: Stmt_ +// isl_id_get_user: Pointer to ScopStmt +// +// * Element[] - An array element as in the range part of +// MemoryAccess::getAccessRelation() +// isl_id_get_name: MemRef_ +// isl_id_get_user: Pointer to ScopArrayInfo +// +// * Scatter[] - Scatter space or space of timepoints +// Has no tuple id +// +// * Zone[] - Range between timepoints as described above +// Has no tuple id +// +// * ValInst[] - An llvm::Value as defined at a specific timepoint. +// A ValInst[] itself can be structured as one of: +// * [] - An unknown value +// Always zero dimensions +// Has no tuple id +// * Undef[] - Value is not used +// Always zero dimensions +// isl_id_get_name: Undef +// isl_id_get_user: A pointer to an llvm::UndefValue +// * Value[] - An llvm::Value that is read-only in the Scop, ie. its +// runtime content does not depend on the timepoint. +// Always zero dimensions +// isl_id_get_name: Val_ +// isl_id_get_user: A pointer to an llvm::Value +// * [Domain[] -> Value[]] - An llvm::Value that may change during the +// Scop's execution +// The tuple itself has no id, but it wraps a map space holding a +// statement instance which defines the llvm::Value as the map's domain +// and llvm::Value itself as range. +// @see makeValInst() +// +// An annotation "{ Domain[] -> Scatter[] }" therefore means: A map from a +// statement instance to a timepoint, aka. a schedule. There is only one scatter +// space, but most of the time multiple statements are processed in one set. +// This is why most of the time isl_union_map has to be used. +// +//===----------------------------------------------------------------------===// + +#include "polly/DeLICM.h" +#include "polly/Options.h" +#include "polly/ScopInfo.h" +#include "polly/ScopPass.h" +#include "polly/Support/GICHelper.h" +#include "llvm/ADT/Statistic.h" +#include "isl/aff.h" +#include "isl/aff_type.h" +#include "isl/map.h" +#include "isl/options.h" +#include "isl/printer.h" +#include "isl/union_map.h" +#include "isl/union_set.h" +#include +#include +#define DEBUG_TYPE "polly-delicm" + +using namespace polly; +using namespace llvm; + +namespace { + +cl::opt + DelicmMaxOps("polly-delicm-max-ops", + cl::desc("Maximum number of ISL operations to invest for " + "lifetime analysis; 0=no limit"), + cl::init(1000000), cl::cat(PollyCategory)); + +STATISTIC(MappedValueScalars, "Number of mapped Value scalars"); +STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars"); +STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); + +/// Return the range elements that are lexicographically smaller. +/// +/// @param Map { Space[] -> Scatter[] } +/// @param Strict True for strictly lexicographically smaller elements. (exclude +/// same timepoints from the result) +/// +/// @return { Space[] -> Scatter[] } +/// A map to all timepoints that happen before the timepoints the input +/// mapped to. +IslPtr beforeScatter(IslPtr Map, bool Strict) { + auto RangeSpace = give(isl_space_range(isl_map_get_space(Map.keep()))); + auto ScatterRel = give(Strict ? isl_map_lex_gt(RangeSpace.take()) + : isl_map_lex_ge(RangeSpace.take())); + return give(isl_map_apply_range(Map.take(), ScatterRel.take())); +} + +/// Piecewise beforeScatter(IslPtr,bool). +IslPtr beforeScatter(IslPtr UMap, bool Strict) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + auto After = beforeScatter(Map, Strict); + Result = give(isl_union_map_add_map(Result.take(), After.take())); + }); + return Result; +} + +/// Return the range elements that are lexicographically larger. +/// +/// @param Map { Space[] -> Scatter[] } +/// @param Strict True for strictly lexicographically larger elements. (exclude +/// same timepoints from the result) +/// +/// @return { Space[] -> Scatter[] } +/// A map to all timepoints that happen after the timepoints the input +/// map originally mapped to. +IslPtr afterScatter(IslPtr Map, bool Strict) { + auto RangeSpace = give(isl_space_range(isl_map_get_space(Map.keep()))); + auto ScatterRel = give(Strict ? isl_map_lex_lt(RangeSpace.take()) + : isl_map_lex_le(RangeSpace.take())); + return give(isl_map_apply_range(Map.take(), ScatterRel.take())); +} + +/// Piecewise afterScatter(IslPtr,bool). +IslPtr afterScatter(NonowningIslPtr UMap, + bool Strict) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + auto After = afterScatter(Map, Strict); + Result = give(isl_union_map_add_map(Result.take(), After.take())); + }); + return Result; +} + +/// Construct a range of timepoints between two timepoints. +/// +/// Example: +/// From := { A[] -> [0]; B[] -> [0] } +/// To := { B[] -> [10]; C[] -> [20] } +/// +/// Result: +/// { B[] -> [i] : 0 < i < 10 } +/// +/// Note that A[] and C[] are not in the result because they do not have a start +/// or end timepoint. If a start (or end) timepoint is not unique, the first +/// (respectively last) is chosen. +/// +/// @param From { Space[] -> Scatter[] } +/// Map to start timepoints. +/// @param To { Space[] -> Scatter[] } +/// Map to end timepoints. +/// @param InclFrom Whether to include the start timepoints to the result. In +/// the example, this would add { B[] -> [0] } +/// @param InclTo Whether to include the end timepoints to the result. In this +/// example, this would add { B[] -> [10] } +/// +/// @return { Space[] -> Scatter[] } +/// A map for each domain element of timepoints between two extreme +/// points, or nullptr if @p From or @p To is nullptr, or the ISL max +/// operations is exceeded. +IslPtr betweenScatter(IslPtr From, IslPtr To, + bool InclFrom, bool InclTo) { + auto AfterFrom = afterScatter(From, !InclFrom); + auto BeforeTo = beforeScatter(To, !InclTo); + + return give(isl_map_intersect(AfterFrom.take(), BeforeTo.take())); +} + +/// Piecewise betweenScatter(IslPtr,IslPtr,bool,bool). +IslPtr betweenScatter(IslPtr From, + IslPtr To, bool IncludeFrom, + bool IncludeTo) { + auto AfterFrom = afterScatter(From, !IncludeFrom); + auto BeforeTo = beforeScatter(To, !IncludeTo); + + return give(isl_union_map_intersect(AfterFrom.take(), BeforeTo.take())); +} + +/// If by construction a union map is known to contain only a single map, return +/// it. +/// +/// This function combines isl_map_from_union_map() and +/// isl_union_map_extract_map(). isl_map_from_union_map() fails if the map is +/// empty because it doesn't not know which space it would be in. +/// isl_union_map_extract_map() on the other hand does not check whether there +/// is (at most) one isl_map in the union, ie. how it has been constructed is +/// probably wrong. +IslPtr singleton(IslPtr UMap, + IslPtr ExpectedSpace) { + if (!UMap) + return nullptr; + + if (isl_union_map_n_map(UMap.keep()) == 0) + return give(isl_map_empty(ExpectedSpace.take())); + + auto Result = give(isl_map_from_union_map(UMap.take())); + assert( + !Result || + isl_space_has_equal_tuples(give(isl_map_get_space(Result.keep())).keep(), + ExpectedSpace.keep()) == isl_bool_true); + return Result; +} + +/// If by construction an isl_union_set is known to contain only a single +/// isl_set, return it. +/// +/// This function combines isl_set_from_union_set() and +/// isl_union_set_extract_set(). isl_map_from_union_set() fails if the set is +/// empty because it doesn't not know which space it would be in. +/// isl_union_set_extract_set() on the other hand does not check whether there +/// is (at most) one isl_set in the union, ie. how it has been constructed is +/// probably wrong. +IslPtr singleton(IslPtr USet, + IslPtr ExpectedSpace) { + if (!USet) + return nullptr; + + if (isl_union_set_n_set(USet.keep()) == 0) + return give(isl_set_empty(ExpectedSpace.copy())); + + auto Result = give(isl_set_from_union_set(USet.take())); + assert( + !Result || + isl_space_has_equal_tuples(give(isl_set_get_space(Result.keep())).keep(), + ExpectedSpace.keep()) == isl_bool_true); + return Result; +} + +/// Returns whether @p Map has a mapping for at least all elements of @p Domain. +isl_bool isMapDomainSubsetOf(IslPtr Map, + NonowningIslPtr Domain) { + auto Subset = give(isl_map_domain(Map.take())); + return isl_set_is_subset(Subset.keep(), Domain.keep()); +} + +/// Determine how many dimensions the scatter space of @p Schedule has. +/// +/// The schedule must not be empty and have equal number of dimensions of any +/// subspace it contains. +/// +/// The implementation currently returns the maximum number of dimensions it +/// encounters, if different, and 0 if none is encountered. However, most other +/// code will most likely fail if one of these happen. +unsigned getNumScatterDims(NonowningIslPtr Schedule) { + unsigned Dims = 0; + foreachElt(Schedule, [=, &Dims](IslPtr Map) { + Dims = std::max(Dims, isl_map_dim(Map.keep(), isl_dim_out)); + }); + return Dims; +} + +/// Return the scatter space of a @p Schedule. +/// +/// This is basically the range space of the schedule map, but harder to +/// determine because it is an isl_union_map. +IslPtr getScatterSpace(NonowningIslPtr Schedule) { + if (!Schedule) + return nullptr; + auto Dims = getNumScatterDims(Schedule); + auto ScatterSpace = + give(isl_space_set_from_params(isl_union_map_get_space(Schedule.keep()))); + return give(isl_space_add_dims(ScatterSpace.take(), isl_dim_set, Dims)); +} + +/// Construct an identity map for the given domain values. +/// +/// There is no type resembling isl_union_space, hence we have to pass an +/// isl_union_set as the map's domain and range space. +/// +/// @param USet { Space[] } +/// The returned map's domain and range. +/// @param RestrictDomain If true, the returned map only maps elements contained +/// in @p USet and no other. If false, it returns an +/// overapproximation with the identity maps of any space +/// in @p USet, not just the elements in it. +/// +/// @return { Space[] -> Space[] } +/// A map that maps each value of @p USet to itself. +IslPtr makeIdentityMap(NonowningIslPtr USet, + bool RestrictDomain) { + auto Result = give(isl_union_map_empty(isl_union_set_get_space(USet.keep()))); + foreachElt(USet, [=, &Result](IslPtr Set) { + auto IdentityMap = give(isl_map_identity( + isl_space_map_from_set(isl_set_get_space(Set.keep())))); + if (RestrictDomain) + IdentityMap = + give(isl_map_intersect_domain(IdentityMap.take(), Set.take())); + Result = give(isl_union_map_add_map(Result.take(), IdentityMap.take())); + }); + return Result; +} + +/// Constructs a map that swaps two nested tuples. +/// +/// @param FromSpace1 { Space1[] } +/// @param FromSpace2 { Space2[] } +/// +/// @result { [Space1[] -> Space2[]] -> [Space2[] -> Space1[]] } +IslPtr makeTupleSwapBasicMap(IslPtr FromSpace1, + IslPtr FromSpace2) { + assert(isl_space_is_set(FromSpace1.keep()) != isl_bool_false); + assert(isl_space_is_set(FromSpace2.keep()) != isl_bool_false); + + auto Dims1 = isl_space_dim(FromSpace1.keep(), isl_dim_set); + auto Dims2 = isl_space_dim(FromSpace2.keep(), isl_dim_set); + auto FromSpace = give(isl_space_wrap(isl_space_map_from_domain_and_range( + FromSpace1.copy(), FromSpace2.copy()))); + auto ToSpace = give(isl_space_wrap(isl_space_map_from_domain_and_range( + FromSpace2.take(), FromSpace1.take()))); + auto MapSpace = give( + isl_space_map_from_domain_and_range(FromSpace.take(), ToSpace.take())); + + auto Result = give(isl_basic_map_universe(MapSpace.take())); + for (auto i = Dims1 - Dims1; i < Dims1; i += 1) { + Result = give(isl_basic_map_equate(Result.take(), isl_dim_in, i, + isl_dim_out, Dims2 + i)); + } + for (auto i = Dims2 - Dims2; i < Dims2; i += 1) { + Result = give(isl_basic_map_equate(Result.take(), isl_dim_in, Dims1 + i, + isl_dim_out, i)); + } + + return Result; +} + +/// Like makeTupleSwapBasicMap(IslPtr,IslPtr), but returns +/// an isl_map. +IslPtr makeTupleSwapMap(IslPtr FromSpace1, + IslPtr FromSpace2) { + auto BMapResult = + makeTupleSwapBasicMap(std::move(FromSpace1), std::move(FromSpace2)); + return give(isl_map_from_basic_map(BMapResult.take())); +} + +/// Reverse the nested map tuple in @p Map's domain. +/// +/// @param Map { [Space1[] -> Space2[]] -> Space3[] } +/// +/// @return { [Space2[] -> Space1[]] -> Space3[] } +IslPtr reverseDomain(IslPtr Map) { + auto DomSpace = + give(isl_space_unwrap(isl_space_domain(isl_map_get_space(Map.keep())))); + auto Space1 = give(isl_space_domain(DomSpace.copy())); + auto Space2 = give(isl_space_range(DomSpace.take())); + auto Swap = makeTupleSwapMap(std::move(Space1), std::move(Space2)); + return give(isl_map_apply_domain(Map.take(), Swap.take())); +} + +/// Piecewise reverseDomain(IslPtr). +IslPtr reverseDomain(NonowningIslPtr UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + auto Reversed = reverseDomain(std::move(Map)); + Result = give(isl_union_map_add_map(Result.take(), Reversed.take())); + }); + return Result; +} + +/// Compute the next overwrite for a scalar. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// Schedule of (at least) all writes. Instances not in @p +/// Writes will be ignored. +/// @param Writes { DomainWrite[] } +/// The element instances that write to the scalar. +/// @param InclPrevWrite Whether to extend an overwrite timepoints to include +/// the timepoint where the previous write happens. +/// @param InclOverwrite Whether the reaching overwrite includes the timepoint +/// of the overwrite itself. +/// +/// @return { Scatter[] -> DomainDef[] } +IslPtr +computeScalarReachingOverwrite(IslPtr Schedule, + IslPtr Writes, bool InclPrevWrite, + bool InclOverwrite) { + + // { DomainWrite[] } + auto WritesMap = give(isl_union_map_from_domain(Writes.take())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto Result = computeReachingOverwrite( + std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite); + + return give(isl_union_map_domain_factor_range(Result.take())); +} + +/// Overload of computeScalarReachingOverwrite, with only one writing statement. +/// Consequently, the result consists of only one map space. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclPrevWrite Include the previous write to result. +/// @param InclOverwrite Include the overwrite to the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr computeScalarReachingOverwrite(IslPtr Schedule, + IslPtr Writes, + bool InclPrevWrite, + bool InclOverwrite) { + auto ScatterSpace = getScatterSpace(Schedule); + auto DomSpace = give(isl_set_get_space(Writes.keep())); + + auto ReachOverwrite = computeScalarReachingOverwrite( + Schedule, give(isl_union_set_from_set(Writes.take())), InclPrevWrite, + InclOverwrite); + + auto ResultSpace = give(isl_space_map_from_domain_and_range( + ScatterSpace.take(), DomSpace.take())); + return singleton(std::move(ReachOverwrite), ResultSpace); +} + +// Just a wrapper around computeScalarReachingDefinition if there is just +// one element. +// { Scatter[] -> Domain[] } + +/// Compute the reaching definition of a scalar. +/// +/// Compared to computeReachingDefinition, there is just one element which is +/// accessed an therefore doesn't need to be specified. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclDef Include the timepoint of the definition to the result. +/// @param InclRedef Include the timepoint of the overwrite into the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr +computeScalarReachingDefinition(IslPtr Schedule, + IslPtr Writes, bool InclDef, + bool InclRedef) { + + // { DomainWrite[] -> Element[] } + auto Defs = give(isl_union_map_from_domain(Writes.take())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReachDefs = + computeReachingDefinition(Schedule, Defs, InclDef, InclRedef); + + // { Scatter[] -> DomainWrite[] } + return give(isl_union_set_unwrap( + isl_union_map_range(isl_union_map_curry(ReachDefs.take())))); +} + +/// Compute the reaching definition of a scalar. +/// +/// This overload accepts only a single writing statement as an isl_map, +/// consequently the result also is only a single isl_map. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclDef Include the timepoint of the definition to the result. +/// @param InclRedef Include the timepoint of the overwrite into the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr computeScalarReachingDefinition( // { Domain[] -> Zone[] } + IslPtr Schedule, IslPtr Writes, bool InclDef, + bool InclRedef) { + auto DomainSpace = give(isl_set_get_space(Writes.keep())); + auto ScatterSpace = getScatterSpace(Schedule); + + // { Scatter[] -> DomainWrite[] } + auto UMap = computeScalarReachingDefinition( + Schedule, give(isl_union_set_from_set(Writes.take())), InclDef, + InclRedef); + + auto ResultSpace = give(isl_space_map_from_domain_and_range( + ScatterSpace.take(), DomainSpace.take())); + return singleton(UMap, ResultSpace); +} + +/// Create a map that shifts one dimension by an offset. +/// +/// Example: +/// makeShiftDimAff({ [i0, i1] -> [o0, o1] }, 1, -2) +/// = { [i0, i1] -> [i0, i1 - 1] } +/// +/// @param Space The map space of the result. Must have equal number of in- and +/// out-dimensions. +/// @param Pos Position to shift. +/// @param Amount Value added to the shifted dimension. +/// +/// @return An isl_multi_aff for the map with this shifted dimension. +IslPtr makeShiftDimAff(IslPtr Space, int Pos, + int Amount) { + auto Identity = give(isl_multi_aff_identity(Space.take())); + if (Amount == 0) + return Identity; + auto ShiftAff = give(isl_multi_aff_get_aff(Identity.keep(), Pos)); + ShiftAff = give(isl_aff_set_constant_si(ShiftAff.take(), Amount)); + return give(isl_multi_aff_set_aff(Identity.take(), Pos, ShiftAff.take())); +} + +/// Add a constant to one dimension of a map. +/// +/// @param Map The map to shift a dimension in. +/// @param Type A tuple of @p Map which contains the dimension to shift. +/// @param Pos The dimension to shift. If negative, the dimensions are +/// counted from the end instead from the beginning. Eg. -1 is the last +/// dimension in the tuple. +/// @param Amount The offset to add to the specified dimension. +/// +/// @return The modified map. +IslPtr shiftDim(IslPtr Map, isl_dim_type Type, int Pos, + int Amount) { + assert((Type == isl_dim_in || Type == isl_dim_out) && + "Cannot shift parameter dims"); + int NumDims = isl_map_dim(Map.keep(), Type); + if (Pos < 0) + Pos = NumDims + Pos; + assert(Pos < NumDims && "Dimension index must be in range"); + auto Space = give(isl_map_get_space(Map.keep())); + Space = give((Type == isl_dim_in) ? isl_space_domain(Space.take()) + : isl_space_range(Space.take())); + Space = give(isl_space_map_from_domain_and_range(Space.copy(), Space.copy())); + auto Translator = makeShiftDimAff(std::move(Space), Pos, Amount); + auto TranslatorMap = give(isl_map_from_multi_aff(Translator.take())); + return give((Type == isl_dim_in) + ? isl_map_apply_domain(Map.take(), TranslatorMap.take()) + : isl_map_apply_range(Map.take(), TranslatorMap.take())); +} + +/// Add a constant to one dimension of a each map in a union map. +/// +/// @param UMap The maps to shift a dimension in. +/// @param Type The tuple which contains the dimension to shift. +/// @param Pos The dimension to shift. If negative, the dimensions are +/// counted from the ends of each map of union instead from their beginning. Eg. +/// -1 is the last dimension of any map. +/// @param Amount The offset to add to the specified dimension. +/// +/// @return The union of all modified maps. +IslPtr shiftDim(IslPtr UMap, isl_dim_type Type, + int Pos, int Amount) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + auto Shifted = shiftDim(Map, Type, Pos, Amount); + Result = give(isl_union_map_add_map(Result.take(), Shifted.take())); + }); + return Result; +} + +/// Compute the lifetime of a scalar. +/// +/// In contrast to computeArrayLifetime, only considers just one variable +/// instead of multiple arrays. Hence, the element is not required for input and +/// is not part of the output. +/// +/// @param Schedule { Domain[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param Reads { DomainRead[] } +/// @param ReadEltInSameInst A read at the same timepoint as a write will read +/// that value instead of the previous's. +/// @param InclWrite Include the write itself to the lifetime. +/// @param InclLastRead Include the last write to the lifetime. +/// @param ExitReads Extend the last definition to infinity. +/// +/// @return { DomainWrite[] -> Zone[] } +/// The scalar's lifetime. +/// +/// @see computeArrayLifetime +IslPtr computeScalarLifetime(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool InclWrite, bool InclLastRead, + bool ExitReads) { + + // { DomainWrite[] -> Element[] } + auto WritesElt = give(isl_union_map_from_domain(Writes.take())); + + // { DomainRead[] -> Element[] } + auto ReadsElt = give(isl_union_map_from_domain(Reads.take())); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto Result = + computeArrayLifetime(Schedule, WritesElt, ReadsElt, ReadEltInSameInst, + InclWrite, InclLastRead, ExitReads); + + return give(isl_union_map_domain_factor_range(Result.take())); +} + +/// Compute the lifetime of a scalar with a single definition. +/// +/// Because there is only one defining statement, takes only an isl_map for +/// writes and only returns an isl_map. +/// +/// @param Schedule { Domain[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param Reads { DomainRead[] } +/// @param ReadEltInSameInst A read at the same timepoint as a write will read +/// that value instead of the previous's. +/// @param InclWrite Include the write itself to the lifetime. +/// @param InclLastRead Include the last write to the lifetime. +/// @param ExitReads Extend the last definition to infinity. +/// +/// @return { DomainWrite[] -> Zone[] } +/// The scalar's lifetime. +/// +/// @see computeArrayLifetime +IslPtr computeScalarLifetime(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, bool InclWrite, + bool InclLastRead, bool ExitsReads) { + return give(isl_map_from_union_map( + computeScalarLifetime( + Schedule, give(isl_union_set_from_set(Writes.take())), Reads, + ReadEltInSameInst, InclWrite, InclLastRead, ExitsReads) + .take())); +} + +/// Simplify the a map inplace. +void simplify(IslPtr &Map) { + Map = give(isl_map_compute_divs(Map.take())); + Map = give(isl_map_detect_equalities(Map.take())); + Map = give(isl_map_coalesce(Map.take())); +} + +/// Simplify the a union map inplace. +void simplify(IslPtr &Map) { + Map = give(isl_union_map_compute_divs(Map.take())); + Map = give(isl_union_map_detect_equalities(Map.take())); + Map = give(isl_union_map_coalesce(Map.take())); +} + +/// If InputVal is not defined in the stmt itself, return the MemoryAccess that +/// reads the scalar. Return nullptr otherwise (if the value is defined in the +/// scop, or is synthesizable) +MemoryAccess *getInputAccessOf(Value *InputVal, ScopStmt *Stmt) { + for (auto *MA : *Stmt) { + if (!MA->isRead()) + continue; + if (!MA->isLatestScalarKind()) + continue; + + assert(MA->getAccessValue() == MA->getBaseAddr()); + if (MA->getAccessValue() == InputVal) + return MA; + } + return nullptr; +} + +/// Try to find a 'natural' extension of a mapped to elements outside its +/// domain. +/// +/// @param Relevant The map with mapping that may not be modified. +/// @param Universe The domain to which @p Relevant needs to be extended. +/// +/// @return A map with that associates the domain elements of @p Relevant to the +/// same elements and in addition the elements of @p Universe to some undefined +/// elements. The function prefers to return simple maps. +IslPtr expandMapping(IslPtr Relevant, + IslPtr Universe) { + Relevant = give(isl_union_map_coalesce(Relevant.take())); + auto RelevantDomain = give(isl_union_map_domain(Relevant.copy())); + auto Simplified = + give(isl_union_map_gist_domain(Relevant.take(), RelevantDomain.take())); + Simplified = give(isl_union_map_coalesce(Simplified.take())); + return give( + isl_union_map_intersect_domain(Simplified.take(), Universe.take())); +} + +/// Determine whether an access touches at most one element. +/// +/// The accessed element could be a scalar or accessing an array with constant +/// subscript, st. all instances access only that element. +/// +/// @param Map { Domain[] -> Element[] } +/// The access's access relation. +/// +/// @return True, if zero or one elements are accessed; False if at least two +/// different elements are accessed. +bool isScalarAccess(IslPtr Map) { + auto Set = give(isl_map_range(Map.take())); + return isl_set_is_singleton(Set.keep()); +} + +/// Return whether @p Map maps to llvm::Undef. +/// +/// @param Map { [] -> ValInst[] } +bool isMapToUndef(NonowningIslPtr Map) { + if (!isl_map_has_tuple_id(Map.keep(), isl_dim_out)) + return false; + + auto Id = give(isl_map_get_tuple_id(Map.keep(), isl_dim_out)); + auto Val = static_cast(isl_id_get_user(Id.keep())); + return Val && isa(Val); +} + +/// Return whether @p Map maps to an unknown value. +/// +/// @param { [] -> ValInst[] } +bool isMapToUnknown(NonowningIslPtr Map) { + auto Space = give(isl_space_range(isl_map_get_space(Map.keep()))); + return !isl_map_has_tuple_id(Map.keep(), isl_dim_set) && + !isl_space_is_wrapping(Space.keep()); +} + +/// Remove unknown values from the mapping, leaving only mappings to +/// llvm::Value's and llvm::Undef. +/// +/// @param UMap { [] -> ValInst[] } +/// +/// @return { [] -> ValInst[] } +IslPtr +removeUnknownValInst(NonowningIslPtr UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (!isMapToUnknown(Map)) + Result = give(isl_union_map_add_map(Result.take(), Map.take())); + }); + return Result; +} + +/// Return the domain of everything that maps to an unknown value. +/// +/// @param UMap { Domain[] -> ValInst[] } +/// +/// @return { Domain[] } +IslPtr +getUnknownValInstDomain(NonowningIslPtr UMap) { + auto Result = give(isl_union_set_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (isMapToUnknown(Map)) + Result = give( + isl_union_set_add_set(Result.take(), isl_map_domain(Map.take()))); + }); + return Result; +} + +/// Return the domain of everything that maps to Undef. +/// +/// @param UMap { Domain[] -> ValInst[] } +/// +/// @return { Domain[] } +IslPtr +getUndefValInstDomain(NonowningIslPtr UMap) { + auto Result = give(isl_union_set_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (isMapToUndef(Map)) + Result = give( + isl_union_set_add_set(Result.take(), isl_map_domain(Map.take()))); + }); + return Result; +} + +/// Remove everything that maps to llvm::Undef. +/// +/// @param UMap { [] -> ValInst[] } +/// +/// @return { [] -> ValInst[] } +IslPtr removeUndefValInst(NonowningIslPtr UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (!isMapToUndef(Map)) + Result = give(isl_union_map_add_map(Result.take(), Map.take())); + }); + return Result; +} + +/// Return only the mappings that map to known values. +/// +/// @param UMap { [] -> ValInst[] } +/// +/// @return { [] -> ValInst[] } +IslPtr filterKnownValInst(NonowningIslPtr UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (!isMapToUnknown(Map) && !isMapToUndef(Map)) + Result = give(isl_union_map_add_map(Result.take(), Map.take())); + }); + return Result; +} + +/// Represent the knowledge of the contents any array elements in any zone or +/// the knowledge we would add when mapping a scalar to an array element. +/// +/// Every array element at every zone unit has one three states: +/// - Undef: Not occupied by any value so transformation can change it to other +/// values. +/// - Known: Contains an llvm::Value instance, the instance is stored as the +/// domain of the statement instance defining it. +/// - Unknown: The element contains a value, but it is not a determinable +/// llvm::Value instance. +/// +/// There are two uses for the Knowledge class: +/// 1) To represent the knowledge of the current state of ScopInfo. The Undef +/// state means that an element is currently unused: there is no read of it +/// before the next overwrite. Also called 'existing'. +/// 2) To represent the requirements for mapping a scalar to array elements. The +/// undef state means that there is no change/requirement. Also called +/// 'proposed'. +/// +/// In addition to the these states at unit zones, Knowledge need to know when +/// values are written. This is because written values may have no lifetime (one +/// reason is that the value is never read). Such writes would therefore never +/// conflict, but overwrite values that might still be required. Another source +/// of problems problem are multiple writes to the same element at the same +/// timepoint, because their order is undefined. Writes can either write know or +/// unknown states. An 'undef' write would a non-existing write. +class Knowledge { +private: + /// { [Element[] -> Zone[]] -> ValInst[] } + /// The state of every array element at every unit zone. + IslPtr Lifetime; + + /// An array element at any time has one of the three states. For efficiency, + /// one of them can be represented implicitly by assuming that state when it + /// maps to nothing. Which one is more efficient depends on the use case. + /// + /// If ImplicitLifetimeIsUnknown == false, unmapped zones are assumed to be + /// Unknown. This is more efficient for use case 1) because anything that + /// cannot be determined to be Known or Undef is Unknown. + /// + /// If ImplicitLifetimeIsUnknown == true, unmapped zones are assumed to be + /// Undef. This is more efficient for use case 2) because scalar mapping only + /// constraints zones that are in scalar's lifetime. + bool ImplicitLifetimeIsUnknown = false; + + /// { [Element[] -> Scatter[]] -> ValInst[] } + /// The write actions currently in the scop or that would be added when + /// mapping a scalar. + IslPtr Written; + + /// Check whether this Knowledge object is well-formed. + void checkConsistency() const { + // Default-initialized object + if (!Lifetime && !Written) + return; + + assert(Lifetime); + assert(Written); + assert(isl_union_map_is_single_valued(Lifetime.keep()) == isl_bool_true); + } + + /// Ensure an unique representation depending on ImplicitLifetimeIsUnknown. + void canonicalize() { + if (isImplicitLifetimeUndef()) + Lifetime = removeUndefValInst(this->Lifetime); + if (isImplicitLifetimeUnknown()) + Lifetime = removeUnknownValInst(this->Lifetime); + } + + /// Accessor for ImplicitLifetimeIsUnknown. + bool isImplicitLifetimeUnknown() const { return ImplicitLifetimeIsUnknown; } + + /// Accessor for ImplicitLifetimeIsUnknown. + bool isImplicitLifetimeUndef() const { return !ImplicitLifetimeIsUnknown; } + +public: + /// Initialize an nullptr-Knowledge. This is only provided for convenience; do + /// not use such an object. + Knowledge() {} + + /// Create a new object with the given members. + Knowledge(IslPtr Lifetime, bool ImplicitLifetimeIsUnknown, + IslPtr Written) + : Lifetime(std::move(Lifetime)), + ImplicitLifetimeIsUnknown(ImplicitLifetimeIsUnknown), + Written(std::move(Written)) { + canonicalize(); + checkConsistency(); + } + + /// Alternative ctor taking isl_maps instead isl_union_map. + Knowledge(IslPtr Lifetime, bool ImplicitLifetimeIsUnknown, + IslPtr Written) + : Knowledge(give(isl_union_map_from_map(Lifetime.take())), + ImplicitLifetimeIsUnknown, + give(isl_union_map_from_map(Written.take()))) {} + + /// Return whether this object was default-constructed. + bool isUsable() const { return Lifetime && Written; } + + /// Print the content of this object to @p OS. + void print(llvm::raw_ostream &OS, unsigned indent = 0) const { + if (isImplicitLifetimeUnknown()) + OS.indent(indent) << "Lifetime: " << Lifetime << " + Unknown\n"; + else + OS.indent(indent) << "Lifetime: " << Lifetime << " + Undef\n"; + OS.indent(indent) << "Written : " << Written << '\n'; + } + + /// Dump the object content stderr. Meant to be called in a debugger. + void dump() const; + + /// Combine two knowledges, this and @p That. + /// + /// The two knowledges must not conflict each other. Only combining 'implicit + /// unknown' (use case 1) with 'implicit undef' (use case 2) knowledges is + /// implemented. + void merge_inplace(Knowledge That) { + assert(!isConflicting(*this, That)); + assert(this->isImplicitLifetimeUnknown()); + assert(That.isImplicitLifetimeUndef()); + + auto ThatKnowDomain = filterKnownValInst(That.Lifetime); + auto ThatDomain = give(isl_union_map_domain(That.Lifetime.take())); + + Lifetime = + give(isl_union_map_subtract_domain(Lifetime.take(), ThatDomain.take())); + Lifetime = + give(isl_union_map_union(Lifetime.take(), ThatKnowDomain.take())); + + Written = give(isl_union_map_union(Written.take(), That.Written.take())); + + checkConsistency(); + } + + /// Determine whether two Knowledges conflict each other. + /// + /// In theory @p This and @p That are symmetric, but the implementation is + /// constrained by the implicit interpretation. + /// + /// A conflict is defined as non-preserved semantics when they are merged. For + /// instance, when for the same array and zone they assume different + /// llvm::Values. + /// + /// @param Existing One of the knowledges; current implementation requires it + /// to be 'implicit unknown' (use case 1). + /// @param Proposed One of the knowledges; current implementation requires it + /// to be 'implicit undef' (use case 2). + /// @param OS Dump the conflict reason to this output stream; use nullptr to + /// not output anything. + /// @param Indent Indention for the conflict reason. + /// + /// @return True, iff the two knowledges are conflicting. + static bool isConflicting(const Knowledge &Existing, + const Knowledge &Proposed, + llvm::raw_ostream *OS = nullptr, + unsigned Indent = 0) { + assert(Existing.isImplicitLifetimeUnknown()); + assert(Proposed.isImplicitLifetimeUndef()); + + // The following domain intersections conflict: + // 1) Unknown vs Unknown + // 2a) Known vs Unknown, 2b) Unknown vs Known + // 3) Known vs Known that do not map to the same llvm::Value instance + // 4a) Written vs Unknown, 4b) Unknown vs Written + // 5a) Written Unknown vs Known, 5b) Known vs Written Unknown + // 6a) Written Known vs Known, 6b) Known vs Written Known that do not write + // the same llvm::Value instance + // 7) Written Known/Unknown vs Written Known/Unknown, where the first writes + // to the same location and at the same timepoint as the latter + // TODO: Exception to 7) if the same value is written + + // Check 1) and 2a) + auto ExistingUndef = getUndefValInstDomain(Existing.Lifetime); + auto ProposedUnknownDomain = getUnknownValInstDomain(Proposed.Lifetime); + if (!isl_union_set_is_subset(ProposedUnknownDomain.keep(), + ExistingUndef.keep())) { + if (OS) + OS->indent(Indent) << "Conflict with proposed unknown\n"; + return true; + } + + // Check 2b) + auto ProposedKnown = filterKnownValInst(Proposed.Lifetime); + auto ProposedKnownDomain = give(isl_union_map_domain(ProposedKnown.copy())); + auto ExistingKnownOrUndefDomain = + give(isl_union_map_domain(Existing.Lifetime.copy())); + if (!isl_union_set_is_subset(ProposedKnownDomain.keep(), + ExistingKnownOrUndefDomain.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of existing unknown\n"; + return true; + } + + // Check 3) + auto ExistingKnown = filterKnownValInst(Existing.Lifetime); + auto ExistingKnownDomain = give(isl_union_map_domain(ExistingKnown.copy())); + auto CommonOverlapKnown = give(isl_union_set_intersect( + ExistingKnownDomain.copy(), ProposedKnownDomain.copy())); + auto ExistingOverlapKnown = give(isl_union_map_intersect_domain( + ExistingKnown.copy(), CommonOverlapKnown.copy())); + auto ProposedOverlapKnown = give(isl_union_map_intersect_domain( + ProposedKnown.copy(), CommonOverlapKnown.copy())); + if (!isl_union_map_is_equal(ExistingOverlapKnown.keep(), + ProposedOverlapKnown.keep())) { + if (OS) { + OS->indent(Indent) + << "Conflict of lifetime-to-map known with existing known\n"; + auto ExistingConflict = give(isl_union_map_subtract( + ExistingOverlapKnown.copy(), ProposedOverlapKnown.copy())); + auto ProposedConflict = give(isl_union_map_subtract( + ProposedOverlapKnown.copy(), ExistingOverlapKnown.copy())); + OS->indent(Indent + 2) << "Existing wants: " << ExistingConflict + << '\n'; + OS->indent(Indent + 2) << "Proposed wants: " << ProposedConflict + << '\n'; + } + return true; + } + + // Check 4a) + auto ExistingWritten = shiftDim(Existing.Written, isl_dim_in, -1, 1); + auto ExistingWrittenZone = + give(isl_union_map_domain(ExistingWritten.copy())); + if (!isl_union_set_is_disjoint(ExistingWrittenZone.keep(), + ProposedUnknownDomain.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of current write to proposed unknown\n"; + return true; + } + + // Check 4b) + auto ProposedWritten = shiftDim(Proposed.Written, isl_dim_in, -1, 1); + auto ProposedWrittenZone = + give(isl_union_map_domain(ProposedWritten.copy())); + if (!isl_union_set_is_subset(ProposedWrittenZone.keep(), + ExistingKnownOrUndefDomain.keep())) { + if (OS) + dbgs().indent(Indent) + << "Conflict of proposed write to current unknown\n"; + return true; + } + + // Check 5a) + auto ExistingWrittenUnknownZone = getUnknownValInstDomain(ExistingWritten); + if (!isl_union_set_is_disjoint(ExistingWrittenUnknownZone.keep(), + ProposedKnownDomain.keep())) { + if (OS) + dbgs().indent(Indent) + << "Conflict of current unknown write to proposed\n"; + return true; + } + + // Check 5b) + auto ProposedWrittenUnknownZone = getUnknownValInstDomain(ProposedWritten); + if (!isl_union_set_is_disjoint(ProposedWrittenUnknownZone.keep(), + ExistingKnownDomain.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of proposed unknown write to current\n"; + return true; + } + + // Check 6a) + auto ExistingWrittenOverlap = give(isl_union_map_intersect_domain( + ExistingWritten.copy(), ProposedKnownDomain.copy())); + if (!isl_union_map_is_subset(ExistingWrittenOverlap.keep(), + ProposedKnown.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of current write to proposed known\n"; + return true; + } + + // Check 6b) + auto ProposedWrittenOverlap = give(isl_union_map_intersect_domain( + ProposedWritten.copy(), ExistingKnownDomain.copy())); + if (!isl_union_map_is_subset(ProposedWrittenOverlap.keep(), + ExistingKnown.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of proposed write to current known\n"; + return true; + } + + // Check 7) + auto ExistingWrittenDomain = + give(isl_union_map_domain(Existing.Written.copy())); + auto ProposedWrittenDomain = + give(isl_union_map_domain(Proposed.Written.copy())); + if (!isl_union_set_is_disjoint(ExistingWrittenDomain.keep(), + ProposedWrittenDomain.keep())) { + if (OS) { + OS->indent(Indent) + << "Conflict because of existing/proposed undefined write order\n"; + auto Overlap = give(isl_union_set_intersect( + ExistingWrittenDomain.take(), ProposedWrittenDomain.take())); + auto ExistingOverwrite = give(isl_union_map_intersect_domain( + Existing.Written.copy(), Overlap.copy())); + auto ProposedOverwrite = give(isl_union_map_intersect_domain( + Proposed.Written.copy(), Overlap.take())); + OS->indent(Indent + 2) + << "Existing wants to write: " << ExistingOverwrite << '\n'; + OS->indent(Indent + 2) + << "Proposed wants to write: " << ProposedOverwrite << '\n'; + } + return true; + } + + return false; + } +}; +} // anonymous namespace + +void Knowledge::dump() const { print(llvm::errs()); } + +IslPtr +polly::computeReachingDefinition(IslPtr Schedule, + IslPtr Writes, bool InclDef, + bool InclRedef) { + // { Scatter[] } + auto ScatterSpace = getScatterSpace(Schedule); + + // { Element[] -> ScatterWrite[] } + auto DefSched = + give(isl_union_map_apply_domain(Schedule.copy(), Writes.take())); + + // { ScatterRead[] -> ScatterWrite[] } + auto Before = give(InclRedef ? isl_map_lex_gt(ScatterSpace.take()) + : isl_map_lex_ge(ScatterSpace.take())); + + // { ScatterWrite[] -> [ScatterRead[] -> ScatterWrite[]] } + auto BeforeMap = give(isl_map_reverse(isl_map_range_map(Before.take()))); + + // { Element[] -> [ScatterUse[] -> ScatterWrite[]] } + auto DefSchedBefore = + give(isl_union_map_apply_domain(isl_union_map_from_map(BeforeMap.take()), + isl_union_map_reverse(DefSched.copy()))); + + // For each element, at every point in time, map to the times of previous + // definitions. + // { [Element[] -> ScatterRead[]] -> ScatterWrite[] } + auto ReachableDefs = give(isl_union_map_uncurry(DefSchedBefore.take())); + auto LastReachableDef = give(isl_union_map_lexmax(ReachableDefs.copy())); + + // { [Element[] -> ScatterWrite[]] -> ScatterWrite[] } + auto SelfUse = give(isl_union_map_range_map(DefSched.take())); + + if (InclDef && InclRedef) { + // Add the Def itself to the solution. + LastReachableDef = + give(isl_union_map_union(LastReachableDef.take(), SelfUse.take())); + LastReachableDef = give(isl_union_map_coalesce(LastReachableDef.take())); + } else if (!InclDef && !InclRedef) { + // Remove Def itself from the solution. + LastReachableDef = + give(isl_union_map_subtract(LastReachableDef.take(), SelfUse.take())); + } + + // { [Element[] -> ScatterRead[]] -> Domain[] } + return give(isl_union_map_apply_range( + LastReachableDef.take(), isl_union_map_reverse(Schedule.take()))); +} + +IslPtr +polly::computeArrayLifetime(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, bool ReadEltInSameInst, + bool InclWrite, bool InclLastRead, bool ExitReads) { + IslPtr ExitRays; + if (ExitReads) { + // Add all writes that are never overwritten as rays. + + // { Element[] } + auto WriteElements = give(isl_union_map_range(Writes.copy())); + + // { DomainWrite[] -> Scatter[] } + auto WriteSched = give(isl_union_map_intersect_domain( + Schedule.copy(), isl_union_map_domain(Writes.copy()))); + + // { Element[] -> Scatter[] } + auto WriteActions = give(isl_union_map_apply_range( + isl_union_map_reverse(Writes.copy()), Schedule.copy())); + auto LastWrites = give(isl_union_map_lexmax(WriteActions.take())); + + // { [Element[] -> Scatter[]] -> Zone[] } + auto AfterLastWrite = afterScatter( + give(isl_union_map_range_map(LastWrites.take())), !InclWrite); + + // { [Element[] -> DomainWrite[]] -> Zone[] } + ExitRays = give(isl_union_map_apply_domain( + AfterLastWrite.take(), + isl_union_map_product(makeIdentityMap(WriteElements, false).take(), + isl_union_map_reverse(WriteSched.take())))); + } + + // { [Element[] -> DomainWrite[] -> Scatter[] } + auto Defs = give(isl_union_map_apply_range( + isl_union_map_range_map(isl_union_map_reverse(Writes.copy())), + Schedule.copy())); + + // { [Element[] -> Zone[]] -> DomainWrite[] } + auto ReachDef = computeReachingDefinition(Schedule, Writes, ReadEltInSameInst, + !ReadEltInSameInst); + + // { Element[] -> Scatter[] } + auto ReadActions = + give(isl_union_map_apply_domain(Schedule.take(), Reads.take())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto WhatIsItReading = give(isl_union_map_intersect_domain( + ReachDef.take(), isl_union_map_wrap(ReadActions.take()))); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto Uses = give(isl_union_map_reverse( + isl_union_map_curry(reverseDomain(WhatIsItReading).take()))); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto Result = betweenScatter(Defs, Uses, InclWrite, InclLastRead); + + if (ExitRays) + Result = give(isl_union_map_union(Result.take(), ExitRays.take())); + + return Result; +} + +IslPtr +polly::computeReachingOverwrite(IslPtr Schedule, + IslPtr Writes, + bool InclPrevWrite, bool IncludeOverwrite) { + assert(isl_union_map_is_bijective(Schedule.keep())); + + // { Scatter[] } + auto ScatterSpace = getScatterSpace(Schedule); + + // { Element[] -> ScatterWrite[] } + auto WriteAction = + give(isl_union_map_apply_domain(Schedule.copy(), Writes.take())); + + // { ScatterWrite[] -> Element[] } + auto WriteActionRev = give(isl_union_map_reverse(WriteAction.copy())); + + // { ScatterRead[] -> ScatterWrite[] } + auto After = give(InclPrevWrite ? isl_map_lex_lt(ScatterSpace.take()) + : isl_map_lex_le(ScatterSpace.take())); + + // { ScatterWrite[] -> [ScatterRead[] -> ScatterWrite[]] } + auto BeforeMap = give(isl_map_reverse(isl_map_range_map(After.take()))); + + // { Element[] -> [ScatterRead[] -> ScatterWrite[]] } + auto DefSchedBefore = give(isl_union_map_apply_domain( + isl_union_map_from_map(BeforeMap.take()), WriteActionRev.take())); + + // For each element, at every point in time, map to the times of previous + // definitions. + // { [Element[] -> ScatterRead[]] -> ScatterWrite[] } + auto ReachableDefs = give(isl_union_map_uncurry(DefSchedBefore.take())); + auto LastReachableDef = give(isl_union_map_lexmin(ReachableDefs.take())); + + if (InclPrevWrite && IncludeOverwrite) { + // Add the def itself to the solution + + // { [Element[] -> ScatterWrite[]] -> ScatterWrite[] } + auto SelfUse = give(isl_union_map_range_map(WriteAction.take())); + LastReachableDef = + give(isl_union_map_union(LastReachableDef.take(), SelfUse.take())); + LastReachableDef = give(isl_union_map_coalesce(LastReachableDef.take())); + } else if (!InclPrevWrite && !IncludeOverwrite) { + // Remove def itself from the solution + + // { [Element[] -> ScatterWrite[]] -> ScatterWrite[] } + auto SelfUse = give(isl_union_map_range_map(WriteAction.take())); + LastReachableDef = + give(isl_union_map_subtract(LastReachableDef.take(), SelfUse.take())); + } + + // { [Element[] -> ScatterRead[]] -> Domain[] } + auto LastReachableDefDomain = give(isl_union_map_apply_range( + LastReachableDef.take(), isl_union_map_reverse(Schedule.take()))); + + return LastReachableDefDomain; +} + +IslPtr polly::computeArrayUnused(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool IncludeLastRead, + bool IncludeWrite) { + + // { Element[] -> Scatter[] } + auto ReadActions = + give(isl_union_map_apply_domain(Schedule.copy(), Reads.take())); + auto WriteActions = + give(isl_union_map_apply_domain(Schedule.copy(), Writes.copy())); + + // { [Element[] -> Scatter[] } + auto AfterReads = afterScatter(ReadActions, ReadEltInSameInst); + auto WritesBeforeAnyReads = + give(isl_union_map_subtract(WriteActions.take(), AfterReads.take())); + auto BeforeWritesBeforeAnyReads = + beforeScatter(WritesBeforeAnyReads, !IncludeWrite); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto EltDomWrites = give(isl_union_map_apply_range( + isl_union_map_range_map(isl_union_map_reverse(Writes.copy())), + Schedule.copy())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReachingOverwrite = computeReachingOverwrite( + Schedule, Writes, ReadEltInSameInst, !ReadEltInSameInst); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReadsOverwritten = give(isl_union_map_intersect_domain( + ReachingOverwrite.take(), isl_union_map_wrap(ReadActions.take()))); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto ReadsOverwrittenRotated = give(isl_union_map_reverse( + isl_union_map_curry(reverseDomain(ReadsOverwritten).take()))); + auto LastOverwrittenRead = + give(isl_union_map_lexmax(ReadsOverwrittenRotated.take())); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto BetweenLastReadOverwrite = betweenScatter( + LastOverwrittenRead, EltDomWrites, IncludeLastRead, IncludeWrite); + + return give(isl_union_map_union( + BeforeWritesBeforeAnyReads.take(), + isl_union_map_domain_factor_domain(BetweenLastReadOverwrite.take()))); +} + +bool polly::isConflicting(IslPtr ExistingLifetime, + bool ExistingImplicitLifetimeIsUnknown, + IslPtr ExistingWritten, + IslPtr ProposedLifetime, + bool ProposedImplicitLifetimeIsUnknown, + IslPtr ProposedWritten) { + Knowledge Existing(std::move(ExistingLifetime), + ExistingImplicitLifetimeIsUnknown, + std::move(ExistingWritten)); + Knowledge Proposed(std::move(ProposedLifetime), + ProposedImplicitLifetimeIsUnknown, + std::move(ProposedWritten)); + + return Knowledge::isConflicting(Existing, Proposed); +} + +namespace { + +/// Base class for algorithms based on zones, like DeLICM. +class ZoneAlgorithm { +protected: + Scop *S; + isl_ctx *IslCtx; + + IslPtr Schedule; + IslPtr ParamSpace; + IslPtr ScatterSpace; + + IslPtr EmptyUnionMap; + IslPtr EmptyUnionSet; + + /// Prepare the object before computing the zones of @p S. + ZoneAlgorithm(Scop *S) + : S(S), IslCtx(S->getIslCtx()), Schedule(give(S->getSchedule())), + ParamSpace(give(isl_union_map_get_space(Schedule.keep()))) { + + auto Domains = give(S->getDomains()); + ParamSpace = give(isl_space_align_params( + ParamSpace.take(), isl_union_set_get_space(Domains.keep()))); + Schedule = + give(isl_union_map_intersect_domain(Schedule.take(), Domains.take())); + ScatterSpace = getScatterSpace(Schedule); + + EmptyUnionMap = give(isl_union_map_empty(ParamSpace.copy())); + EmptyUnionSet = give(isl_union_set_empty(ParamSpace.copy())); + } + + /// Create an isl_id that means 'don't know the value'. + IslPtr makeUnknownId() const { return nullptr; } + + /// Create an isl_space for unknown values. + IslPtr makeUnknownSpace() const { + return give(isl_space_set_from_params(ParamSpace.copy())); + } + + /// Create a set with an unknown value in it. + IslPtr makeUnknownSet() const { + auto Space = makeUnknownSpace(); + return give(isl_set_universe(Space.take())); + } + + /// Create a union set with an unknown value in it. + IslPtr makeUnknownUSet() const { + return give(isl_union_set_from_set(makeUnknownSet().take())); + } + + /// Create an isl_id that represent 'unused storage'. + IslPtr makeUndefId() const { + auto &LLVMContext = S->getFunction().getContext(); + auto Ty = IntegerType::get(LLVMContext, 1); + auto Val = UndefValue::get(Ty); + return give(isl_id_alloc(IslCtx, "Undef", Val)); + } + + /// Create an isl_space for an undefined value. + IslPtr makeUndefSpace() const { + auto Result = give(isl_space_set_from_params(ParamSpace.copy())); + return give(isl_space_set_tuple_id(Result.take(), isl_dim_set, + makeUndefId().take())); + } + + /// Create a set with an undefined value in it. + IslPtr makeUndefSet() const { + auto Space = makeUndefSpace(); + return give(isl_set_universe(Space.take())); + } + + /// Create a union set with an undefined value in it. + IslPtr makeUndefUSet() const { + return give(isl_union_set_from_set(makeUndefSet().take())); + } + + IslPtr makeValueId(Value *V) const { + if (!V) + return makeUnknownId(); + if (isa(V)) + return makeUndefId(); + auto Name = getIslCompatibleName("Val_", V, std::string()); + return give(isl_id_alloc(IslCtx, Name.c_str(), V)); + } + IslPtr makeValueSpace(Value *V) const { + auto Result = give(isl_space_set_from_params(ParamSpace.copy())); + return give(isl_space_set_tuple_id(Result.take(), isl_dim_set, + makeValueId(V).take())); + } + + IslPtr makeValueSet(Value *V) const { + auto Space = makeValueSpace(V); + return give(isl_set_universe(Space.take())); + } + + // { UserDomain[] -> ValInst[] } + IslPtr makeValInst(Value *Val, IslPtr DefDomain, + ScopStmt *UserStmt, bool IsCertain) { + return makeValInst(Val, std::move(DefDomain), UserStmt, + getDomainFor(UserStmt), IsCertain); + } + + // { UserDomain[] -> ValInst[] } + IslPtr makeValInst(Value *Val, ScopStmt *UserStmt, + bool IsCertain = true) { + return makeValInst(Val, nullptr, UserStmt, getDomainFor(UserStmt), + IsCertain); + } + + // { UserDomain[] -> ValInst[] } + IslPtr makeValInst(Value *V, IslPtr DefDomain, + ScopStmt *UserStmt, IslPtr UserDomain, + bool IsCertain) { + assert(UserDomain); + + if (!V && !UserStmt) + return give(isl_map_from_domain(UserDomain.take())); + + if (!IsCertain) + return give(isl_map_from_domain(UserDomain.take())); + + if (!isa(V)) { + // Available anywhere + auto ValSet = makeValueSet(V); + return give( + isl_map_from_domain_and_range(UserDomain.take(), ValSet.take())); + } + + // FIXME: Should use transitivity in case of LCSSA (ReachingDefinition for + // each %phi) + // It doesn't really work well if the LCSSA %phi is intra-stmt, but the + // incoming value is extra-phi. + // TODO: Actually, in the SCop, we should be able to determine the + // predecessor for _every_ PHI. + auto NormV = deLCSSA(V); + + if (!DefDomain && !isXtraStmtUse(V, UserStmt)) { + // assert(V==NormV && "Cannot yet handle the case where NormV is + // extra-stmt"); + // Easy case: the definition is in the using Stmt itself, so use UserDom[] + // for the Value's instance. + // Not that the non-isIntraStmtUse assumes extra-Stmt use, ie. a use would + // use the definition from a previous instance. + + // { llvm::Value } + auto ValSet = makeValueSet(NormV); + + // { UserDomain[] -> llvm::Value } + auto ValInstSet = + give(isl_map_from_domain_and_range(UserDomain.take(), ValSet.take())); + + // { UserDomain[] -> [UserDomain[] - >llvm::Value] } + auto Result = + give(isl_map_reverse(isl_map_domain_map(ValInstSet.take()))); + simplify(Result); + return Result; + } + + if (!DefDomain) { + auto Inst = dyn_cast(NormV); + auto ValStmt = S->getStmtFor(Inst); + + if (!ValStmt) { + // Cannot associate with an instance; return as if unknown + // TODO: Maybe look for successor/predecessor and link to its instance. + // { Domain[] -> [] } + return give(isl_map_from_domain(UserDomain.take())); + } + + // { DefDomain[] -> Scatter[] } + DefDomain = getDomainFor(ValStmt); + } + + // { Scatter[] -> DefDomain[] } + auto ReachDef = + computeScalarReachingDefinition(Schedule, DefDomain, false, true); + + // { UserDomain[] -> Scatter[] } + auto UserSched = getScatterFor(UserDomain); + + // { UserDomain[] -> DefDomain[] } + auto UsedInstance = + give(isl_map_apply_range(UserSched.take(), ReachDef.take())); + + // { llvm::Value } + auto ValSet = makeValueSet(NormV); + + // { UserDomain[] -> llvm::Value } + auto ValInstSet = + give(isl_map_from_domain_and_range(UserDomain.take(), ValSet.take())); + + // { UserDomain[] -> [DefDomain[] -> llvm::Value] } + auto Result = + give(isl_map_range_product(UsedInstance.take(), ValInstSet.take())); + simplify(Result); + return Result; + } + +private: + /// Of all the llvm::Values that represent the same content, try to find an + /// unique one. + /// + /// PHI nodes with just one incoming block are introduced by LCSSA. All other + /// exact copy instructions (eg. bitwise 'or' with zero) should be removed by + /// InstCombine. + /// + /// Without this normalization, the two values would be considered different, + /// leading to less optimization opportunities. + Value *deLCSSA(Value *Val) { + if (!Val) + return Val; + + if (auto *PHI = dyn_cast(Val)) { + Value *NormVal = nullptr; + for (auto &Use : PHI->incoming_values()) { + auto InVal = Use.get(); + assert(InVal); + + if (isa(InVal)) + continue; + + if (NormVal && NormVal != InVal) + return Val; + + NormVal = Val; + } + if (NormVal) + return NormVal; + } + + return Val; + } + +#if 0 + /// Determine whether an instruction is defined in the same statement instance as in which it is used. + /// + /// @param Val The instruction defining a value. + /// @param UserStmt The statement using @p Val. The use must not be a PHI, they must handled separately. + /// + /// @return True iff @p Val is defined in @p UserStmt. + bool isIntraStmtUse(Value *Val, ScopStmt *UserStmt) const { + assert(UserStmt); + auto *Inst = dyn_cast(Val); + + // Non-instructions (eg. literals) are not adding to complexity + if (!Inst) { + assert(false); + return false; + } + + auto *DefStmt = S->getStmtFor(Inst); + + // This assumes that there must be a PHI in the same statement if we are + // going to use a value from a previous execution of the same statement. + return DefStmt == UserStmt; + } +#endif + + /// Determine whether an instruction is defined in a different statement + /// instance as in which it is used. + /// + /// @param Val The instruction defining a value. + /// @param UserStmt The statement using @p Val. The use must not be a PHI, + /// they must handled separately. + /// + /// @return True iff a use of @p Val in @p UserStmt introduces a + /// flow-dependency. + bool isXtraStmtUse(Value *Val, ScopStmt *UserStmt) const { + assert(UserStmt); + auto *Inst = dyn_cast(Val); + + // Non-instruction like literals do not add inter-stmt dependencies. + if (!Inst) + return false; + + auto *DefStmt = S->getStmtFor(Inst); + + // Read-only uses do not add inter-stmt dependencies. + if (!DefStmt) + return false; + + // This assumes that there must be a PHI in the same statement if we are + // going to use a value from a previous execution of the same statement. + return DefStmt != UserStmt; + } + + /// Check whether @p Stmt can be accurately analyzed by zones. + /// + /// What violates our assumptions: + /// - A load after a write of the same location; we assume that all reads + /// occur before the writes. + /// - Two writes to the same location; we cannot model the order in which + /// these occur. + /// + /// Scalar reads implicitly always occur before other access therefore never + /// violate the first condition. There is also at most one write to a scalar, + /// satisfying the second condition. + bool isAcceptableStmt(ScopStmt *Stmt) { + auto Stores = EmptyUnionMap; + auto Loads = EmptyUnionMap; + + // This assumes that the MK_Array MemoryAccesses are iterated in order. + for (auto *MA : *Stmt) { + if (!MA->isLatestArrayKind()) + continue; + + auto AccRel = give(isl_union_map_from_map(MA->getAccessRelation())); + + if (MA->isRead()) { + // Reject store after load to same location. + if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) + return false; + + Loads = give(isl_union_map_union(Loads.take(), AccRel.take())); + } + + if (MA->isWrite()) { + // In region statements the order is less clear, eg. the load and store + // might be in a boxed loop. + if (Stmt->isRegionStmt() && + !isl_union_map_is_disjoint(Loads.keep(), AccRel.keep())) + return false; + + // Do not allow more than one store to the same location. + if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) + return false; + + Stores = give(isl_union_map_union(Stores.take(), AccRel.take())); + } + } + + return true; + } + +protected: + // { [Element[] -> Zone[]] -> DomainWrite[] } + IslPtr WriteReachDefZone; + + // { DomainMayWrite[] -> Element[] } + IslPtr AllMayWrites; + + // { DomainMustWrite[] -> Element[] } + IslPtr AllMustWrites; + + // { Element[] -> Zone[] } + IslPtr UniverseZone; + + // { [Element[] -> DomainWrite[]] -> ValInst[] } + IslPtr AllWriteValInst; + + // { [Element[] -> DomainRead[]] -> ValInst[] } + IslPtr AllReadValInst; + + // { DomainRead[] -> Element[] } + IslPtr AllReads; + + // { DomainWrite[] -> Element[] } + IslPtr AllWrites; + + // { Element[] -> Element[] } + IslPtr AllElementsId; + + bool computeCommon() { + // Check that nothing strange occurs. + for (auto &Stmt : *S) { + if (!isAcceptableStmt(&Stmt)) + return false; + } + + // { DomainRead[] -> Element[] } + AllReads = EmptyUnionMap; + + // { DomainMayWrite[] -> Element[] } + AllMayWrites = EmptyUnionMap; + + // { DomainMustWrite[] -> Element[] } + AllMustWrites = EmptyUnionMap; + + // { [Element[] -> DomainWrite[]] -> ValInst[] } + AllWriteValInst = EmptyUnionMap; + + // { [Element[] -> DomainRead[]] -> ValInst[] } + AllReadValInst = EmptyUnionMap; + + // TODO: Identify and ensure non-usability of possible dependency within the + // statement. + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + if (!MA->isLatestArrayKind()) + continue; + + if (MA->isRead()) { + // { DomainRead[] -> Element[] } + auto AccRel = getAccessRelationFor(MA); + AllReads = + give(isl_union_map_add_map(AllReads.take(), AccRel.copy())); + + auto LI = dyn_cast_or_null(MA->getAccessInstruction()); + if (LI) { // TODO: Maybe use MA->getAccessValue() + // { DomainRead[] -> ValInst[] } + auto LoadValInst = makeValInst( + LI, &Stmt, + Stmt.isBlockStmt() /* In Region Statements could be a 'MayRead' */); + + // { DomainRead[] -> [Element[] -> DomainRead[]] } + auto IncludeElement = + give(isl_map_curry(isl_map_domain_map(AccRel.take()))); + + // { [Element[] -> DomainRead[]] -> ValInst[] } + auto EltLoadValInst = give(isl_map_apply_domain( + LoadValInst.take(), IncludeElement.take())); + + AllReadValInst = give(isl_union_map_add_map(AllReadValInst.take(), + EltLoadValInst.take())); + } + + continue; + } + + auto SI = dyn_cast(MA->getAccessInstruction()); + if (!SI) { + DEBUG(dbgs() << "WRITE that is not a StoreInst not supported\n"); + return false; + } + + // { Domain[] -> Element[] } + auto AccRel = getAccessRelationFor(MA); + + if (MA->isMustWrite()) { + AllMustWrites = + give(isl_union_map_add_map(AllMustWrites.take(), AccRel.copy())); + } + if (MA->isMayWrite()) { + AllMayWrites = + give(isl_union_map_add_map(AllMayWrites.take(), AccRel.copy())); + } + + // { Domain[] -> ValInst[] } + auto WriteValInstance = + makeValInst(MA->getAccessValue(), &Stmt, MA->isMustWrite()); + + // { Domain[] -> [Element[] -> Domain[]] } + auto IncludeElement = + give(isl_map_curry(isl_map_domain_map(AccRel.copy()))); + + // { [Element[] -> DomainWrite[]] -> ValInst[] } + auto EltWriteValInst = give(isl_map_apply_domain( + WriteValInstance.take(), IncludeElement.take())); + + AllWriteValInst = give(isl_union_map_add_map(AllWriteValInst.take(), + EltWriteValInst.take())); + } + } + + // { DomainRead[] -> ValInst[] } + AllReadValInst = filterKnownValInst(AllReadValInst); + + // { DomainWrite[] -> Element[] } + AllWrites = + give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy())); + + // { Element[] } + auto AllElements = give(isl_union_set_empty(ParamSpace.copy())); + foreachElt(AllWrites, [=, &AllElements](IslPtr Write) { + auto Space = give(isl_map_get_space(Write.keep())); + auto EltSpace = give(isl_space_range(Space.take())); + auto EltUniv = give(isl_set_universe(EltSpace.take())); + AllElements = + give(isl_union_set_add_set(AllElements.take(), EltUniv.take())); + }); + + // { Element[] -> Element[] } + AllElementsId = makeIdentityMap(AllElements, false); + + // { Element[] -> Zone[] } + UniverseZone = give(isl_union_map_from_domain_and_range( + AllElements.copy(), + isl_union_set_from_set(isl_set_universe(ScatterSpace.copy())))); + + // { [Element[] -> Zone[]] -> DomainWrite[] } + WriteReachDefZone = + computeReachingDefinition(Schedule, AllWrites, false, true); + + simplify(WriteReachDefZone); + return true; + } + +protected: + // { [Element[] -> Domain[]] -> Scatter[] } + IslPtr getEltDomScatter(MemoryAccess *MA) const { + // { Domain[] -> Element[] } + auto Acc = getAccessRelationFor(MA); + + // { Domain[] -> Scatter[] } + auto Scatter = getScatterFor(MA); + + // { [Element[] -> Domain[]] -> Domain[] } + auto EltDomDom = give(isl_map_range_map(isl_map_reverse(Acc.take()))); + + // { [Element[] -> Domain[]] -> Scatter[] } + return give(isl_map_apply_range(EltDomDom.take(), Scatter.take())); + } + + IslPtr getScatterFor(ScopStmt *Stmt) const { + auto Dom = getDomainFor(Stmt); + auto Space = give(isl_set_get_space(Dom.keep())); + auto Domain = give(isl_union_set_from_set(Dom.take())); + // TODO: getSchedule() is already intersected with all domains(?), no need + // to do again. + auto Sched = + give(isl_union_map_intersect_domain(Schedule.copy(), Domain.take())); + return singleton(Sched, give(isl_space_map_from_domain_and_range( + Space.take(), ScatterSpace.copy()))); + } + + IslPtr getScatterFor(ScopStmt &Stmt) const { + return getScatterFor(&Stmt); + } + + // TODO: There is currently no notion of where within a statement an access + // occurs. This leads to problem such as the lifetime of a scalar: + // Def at [0,0,0] + // Use at [0,0,1] + // Because a scalar write occurs after everything in the statement, and a read + // before, neither [0,0,0] nor [0,0,1] belong to the lifetime. Therefore the + // lifetime would be empty, equivalent to not requiring storage at all. + // The current workaround is the write belongs to the lifetime. This will + // make mapping of the result of LoadInst of the location mapped-to (typical + // for reductions) impossible since at timestep [0,0,0] the array element it + // reads from still occupies the memory. + // Better solutions: + // Multiply every scatter by two (or more); adds the odd numbers between two + // scatters + // - Append another dimensions such that there's something between [0,0,0] and + // [0,0,1] (namely, eg. [0,0,0,1]) + // - Adding a dimension also allow a notation of "before" the statement + // [0,0,0,-1] and "after" it [0,0,0,1] + // - Might give each instruction its own 'order number' + // - Allow rational solutions. + // - Treat a zone coordinate [0,0,0] as "right after statement instance + // [0,0,0]"/"between instance [0,0,0] and [0,0,1]" (staggered grid); will + // require adding/subtraction 1 to convert between before and after; this + // works along the line of the current workaround. + IslPtr getScatterFor(MemoryAccess *MA) const { + return getScatterFor(MA->getStatement()); + } + + IslPtr getScatterFor(IslPtr Domain) const { + auto Space = give(isl_space_map_from_domain_and_range( + isl_set_get_space(Domain.keep()), ScatterSpace.copy())); + auto Sched = give(isl_union_map_extract_map(Schedule.keep(), Space.take())); + assert(isMapDomainSubsetOf(Sched, Domain) != isl_bool_false); + return give(isl_map_intersect_domain(Sched.take(), Domain.take())); + } + + IslPtr getDomainFor(ScopStmt *Stmt) const { + return give(Stmt->getDomain()); + } + + IslPtr getDomainFor(MemoryAccess *MA) const { + return getDomainFor(MA->getStatement()); + } + + IslPtr getAccessRelationFor(MemoryAccess *MA) const { + auto Domain = give(MA->getStatement()->getDomain()); + auto AccRel = give(MA->getLatestAccessRelation()); + return give(isl_map_intersect_domain(AccRel.take(), Domain.take())); + } + IslPtr getSchedule() const { return Schedule; } + + // friend class Knowledge; + +public: + /// Return the SCoP this object is analyzing. + Scop *getScop() const { return S; } +}; + +struct MapReport { + MemoryAccess *NormMA; + int NumAccesses; + IslPtr Target; + IslPtr Lifetime; + Knowledge Zone; + + MapReport(MemoryAccess *NormMA, int NumAccesses, IslPtr Target, + IslPtr Lifetime, Knowledge Zone) + : NormMA(NormMA), NumAccesses(NumAccesses), Target(std::move(Target)), + Lifetime(std::move(Lifetime)), Zone(std::move(Zone)) { + DEBUG(print(llvm::dbgs(), 0)); + } + + MapReport(const MapReport &That) = delete; + MapReport(MapReport &&That) + : NormMA(That.NormMA), NumAccesses(That.NumAccesses), Target(That.Target), + Lifetime(That.Lifetime), Zone(std::move(That.Zone)) { + That.Target = nullptr; + That.Lifetime = nullptr; + } + + void print(llvm::raw_ostream &OS, int indent = 0) const { + OS.indent(indent) << "Scalar access " << *NormMA << ":\n"; + OS.indent(indent + 4) << "Accesses: " << NumAccesses << "\n"; + OS.indent(indent + 4) << "Target: " << Target << "\n"; + OS.indent(indent + 4) << "Lifetime: " << Lifetime << "\n"; + OS.indent(indent + 4) << "Zone:\n"; + Zone.print(OS, indent + 8); + } +}; + +class DeLICMImpl : public ZoneAlgorithm { +private: + Knowledge OriginalZone; + Knowledge Zone; + + SmallVector MapReports; + + bool isConflicting(const Knowledge &Proposed) { + raw_ostream *OS = nullptr; + DEBUG(OS = &llvm::dbgs()); + return Knowledge::isConflicting(Zone, Proposed, OS, 4); + } + + MemoryAccess *getDefAccsFor(const ScopArrayInfo *SAI) { + assert(SAI); + auto *MA = ValueDefAccs.lookup(SAI); + assert(MA); + assert(MA->isOriginalValueKind()); + assert(MA->isWrite()); + return MA; + } + + MemoryAccess *getPHIAccsFor(const ScopArrayInfo *SAI) { + assert(SAI); + auto *MA = PHIReadAccs.lookup(SAI); + assert(MA); + assert(MA->isOriginalAnyPHIKind()); + assert(MA->isRead()); + return MA; + } + + bool isMappable(const ScopArrayInfo *SAI) { + assert(SAI); + + if (SAI->isValueKind()) { + auto *MA = ValueDefAccs.lookup(SAI); + if (!MA) { + DEBUG(dbgs() + << " Reject because value is read-only within the scop\n"); + return false; + } + + // Mapping if value is used after scop is not supported.s + auto Inst = MA->getAccessInstruction(); + for (auto User : Inst->users()) { + if (!isa(User)) + return false; // Play safe with strange user + auto UserInst = cast(User); + + if (!S->contains(UserInst)) { + DEBUG(dbgs() << " Reject because value is escaping\n"); + return false; + } + } + return true; + } + + if (SAI->isPHIKind()) { + auto *MA = getPHIAccsFor(SAI); + + // Mapping if PHI has incoming block from before the block is not + // supported. + auto PHI = cast(MA->getAccessInstruction()); + for (auto Incoming : PHI->blocks()) { + if (!S->contains(Incoming)) { + DEBUG(dbgs() << " Reject because at least one incoming block is " + "not in the scop region\n"); + return false; + } + } + + return true; + } + + DEBUG(dbgs() << " Reject ExitPHI or other non-Value\n"); + return false; + } + + // { DomainValueDef[] -> Zone[] } + // Get the zone from a scalar's definition to its last use. + // TODO: Cache results? + // TODO: Values might be escaping to after the scop; need to exclude such + // scalars or extend their lifetime as ray + /// Compute the lifetime of an MK_Value (from the definition to the last use) + /// Pass the WRITE MemoryAccess that defines the llvm::Value + /// { DomainDef[] -> Zone[] } + IslPtr computeValueLifetime(const ScopArrayInfo *WA) { + assert(WA->isValueKind()); + + // { DomainRead[] } + auto Reads = give(isl_union_set_empty(ParamSpace.copy())); + + // Find all uses + // TODO: Create a map of defs and uses + + for (auto *MA : ValueUseAccs.lookup(WA)) + Reads = + give(isl_union_set_add_set(Reads.take(), getDomainFor(MA).take())); + + // { DomainDef[] } + auto Writes = getDomainFor(ValueDefAccs.lookup(WA)); + + // { DomainDef[] -> Zone[] } + auto Lifetime = computeScalarLifetime(Schedule, Writes, Reads, false, false, + true, false); + + // TODO: This does not consider uses of the scalar after the scop; we + // currently bail out if there is such a use. + return Lifetime; + } + + // { DomainPHIRead[] -> DomainPHIWrite[] } + IslPtr computePerPHI(const ScopArrayInfo *SAI) { + assert(SAI->isPHIKind()); + + // { DomainPHIWrite[] -> Scatter[] } + auto PHIWriteScatter = give(isl_union_map_empty(ParamSpace.copy())); + + for (auto *MA : PHIIncomingAccs.lookup(SAI)) { + auto Scatter = getScatterFor(MA); + PHIWriteScatter = + give(isl_union_map_add_map(PHIWriteScatter.take(), Scatter.take())); + } + + // For each PHIRead instance, find the PHI instance which writes it. + // We assume that there must be exactly one! + + // { DomainPHIRead[] -> Scatter[] } + auto PHIReadScatter = getScatterFor(PHIReadAccs.lookup(SAI)); + + // { DomainPHIRead[] -> Scatter[] } + auto BeforeRead = beforeScatter(PHIReadScatter, true); + + // { Scatter[] } + auto WriteTimes = singleton( + give(isl_union_map_range(PHIWriteScatter.copy())), ScatterSpace); + + // { DomainPHIRead[] -> Scatter[] } + auto PHIWriteTimes = + give(isl_map_intersect_range(BeforeRead.take(), WriteTimes.take())); + auto LastPerPHIWrites = give(isl_map_lexmax(PHIWriteTimes.take())); + + // { DomainPHIRead[] -> DomainPHIWrite[] } + auto PerPHIWrite = give(isl_union_map_apply_range( + isl_union_map_from_map(LastPerPHIWrites.take()), + isl_union_map_reverse(PHIWriteScatter.take()))); + + return PerPHIWrite; + } + + // TODO: No need for RefMA + /// @param RefMA The MemoryAccess that is sought to be mapped to @p TargetElt + /// @param DefMA The definition MemoryAccess MUST_WRITE for the same value + /// (unique). + bool tryMapValue(const ScopArrayInfo *SAI, MemoryAccess *RefMA, + // { Zone[] -> Element[] } + IslPtr TargetElt) { + assert(SAI->isValueKind()); + auto *DefMA = ValueDefAccs.lookup(SAI); + assert(DefMA->isValueKind()); + assert(DefMA->isMustWrite()); + assert(RefMA->isValueKind()); + // assert(isLatestAccessingSameScalar(DefMA, RefMA)); + // TODO: Check same(or at least more) byte size + // Means it already has been mapped + if (!DefMA->getLatestScopArrayInfo()->isValueKind()) + return false; + + auto V = DefMA->getAccessValue(); + + // { DomainDef[] -> Scatter[] } + auto DefSched = getScatterFor(DefMA); + + // Where each write is mapped to + // { DomainDef[] -> Element[] } + auto DomDefTargetElt = give(isl_map_apply_domain( + TargetElt.copy(), isl_map_reverse(DefSched.copy()))); + + simplify(DomDefTargetElt); + DEBUG(dbgs() << " Mapping: " << DomDefTargetElt << "\n"); + + auto OrigDomain = getDomainFor(DefMA); + auto MappedDomain = give(isl_map_domain(DomDefTargetElt.copy())); + if (!isl_set_is_subset(OrigDomain.keep(), MappedDomain.keep())) { + DEBUG(dbgs().indent(4) + << "Reject because mapping does not encompass all instances.\n"); + return false; + } + + // { DomainDef[] -> Zone[] } + auto Lifetime = computeValueLifetime(SAI); + if (!Lifetime) + return false; + + simplify(Lifetime); + DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); + + // { DefDomain[] -> [Element[] -> Zone[]] } + auto EltLifetimeTranslator = + give(isl_map_range_product(DomDefTargetElt.copy(), Lifetime.copy())); + + // { DefDomain[] -> [Element[] -> Scatter[]] } + auto WrittenTranslator = + give(isl_map_range_product(DomDefTargetElt.copy(), DefSched.take())); + + // { DefDomain[] -> ValInst[] } + auto ValInst = makeValInst(V, DefMA->getStatement(), true); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto EltLifetime = give( + isl_map_apply_domain(ValInst.copy(), EltLifetimeTranslator.take())); + + // { [Element[] -> Scatter[]] -> ValInst[] } + auto EltWriteAction = + give(isl_map_apply_domain(ValInst.copy(), WrittenTranslator.take())); + + Knowledge Proposed(EltLifetime, false, EltWriteAction); + + if (isConflicting(Proposed)) + return false; + + // OK to map + mapValue(SAI, DomDefTargetElt, Lifetime, std::move(Proposed)); + return true; + } + + void applyLifetime(Knowledge Proposed) { + Zone.merge_inplace(std::move(Proposed)); + } + + void mapValue(const ScopArrayInfo *SAI, + // { DomainDef[] -> Element[] } + IslPtr Mapping, + // { DomainDef[] -> Zone[] } + IslPtr Lifetime, Knowledge Proposed) { + auto *WA = ValueDefAccs.lookup(SAI); + // TODO: This must already have been computed before: reuse + // { Scatter[] -> DomainDef[] } + auto ReachDef = computeScalarReachingDefinition(Schedule, getDomainFor(WA), + false, true); + assert(ReachDef && "Operations count exceeded?"); + + // { Scatter[] -> Element[] } + auto TargetZone = + give(isl_map_apply_range(ReachDef.take(), Mapping.copy())); + simplify(TargetZone); + int NumMappedAccessed = 1; + + for (auto *MA : ValueUseAccs.lookup(SAI)) { + // { Domain[] -> Scatter[] } + auto Sched = getScatterFor(MA); + + // { Domain[] -> Element[] } + auto NewAccRel = + give(isl_map_apply_range(Sched.take(), TargetZone.copy())); + + simplify(NewAccRel); + MA->setNewAccessRelation(NewAccRel.take()); + NumMappedAccessed += 1; + } + + WA->setNewAccessRelation(Mapping.copy()); + + applyLifetime(Proposed); + + MappedValueScalars++; + MapReports.emplace_back(WA, NumMappedAccessed, Mapping, Lifetime, + std::move(Proposed)); + } + + IslPtr + wholeStmtDomain(NonowningIslPtr RelevantDomain) { + auto Universe = give(isl_union_set_empty(ParamSpace.copy())); + foreachElt(RelevantDomain, [&Universe](IslPtr Dom) { + auto Space = give(isl_set_get_space(Dom.keep())); + auto DomUniv = give(isl_set_universe(Space.take())); + Universe = give(isl_union_set_add_set(Universe.take(), DomUniv.take())); + }); + return give(isl_union_set_intersect(Universe.take(), + isl_union_map_domain(Schedule.copy()))); + } + + IslPtr expandMapping(IslPtr Relevant) { + auto RelevantDomain = give(isl_union_map_domain(Relevant.copy())); + auto Universe = wholeStmtDomain(RelevantDomain); + return ::expandMapping(Relevant, Universe); + } + + // { PHIWriteDomain[] -> ValInst[] } + IslPtr determinePHIWrittenValues(const ScopArrayInfo *SAI) { + auto *PHIRead = this->PHIReadAccs.lookup(SAI); + auto Result = EmptyUnionMap; + // auto PHIInst = cast( PHIRead->getAccessValue()); + auto *ReadStmt = PHIRead->getStatement(); + + for (auto *MA : PHIIncomingAccs.lookup(SAI)) { + // { PHIWriteDomain[] -> ValInst[] } + IslPtr ValInst; + auto *WriteStmt = MA->getStatement(); + + auto Incoming = MA->getIncoming(); + assert(!Incoming.empty()); + if (Incoming.size() == 1) { + ValInst = makeValInst(Incoming[0].second, WriteStmt, true); + } else { + // If the PHI is in a subregion's exit node it can have multiple + // incoming values (+ maybe another incoming edge from an unrelated + // block. Since we cannot directly represent it as a single + // llvm::Value from multiple exiting block, it is represented using + // the PHI itself. + // We currently model it as unknown value, but modeling as the PHIInst + // itself could be OK, too. + ValInst = give(isl_map_from_domain(getDomainFor(WriteStmt).take())); + } + + Result = give(isl_union_map_add_map(Result.take(), ValInst.take())); + } + + assert(isl_union_map_is_single_valued(Result.keep())); + return Result; + } + + bool tryMapPHI(const ScopArrayInfo *SAI, + // { Zone[] -> Element[] } + IslPtr Target) { + auto PHIRead = this->PHIReadAccs.lookup(SAI); + assert(PHIRead->isPHIKind()); + assert(PHIRead->isRead()); + // TODO: Check same(or at least more) byte size + // Means it already has been mapped + if (!PHIRead->getLatestScopArrayInfo()->isPHIKind()) + return false; + + // { DomainPHIRead[] -> Scatter[] } + auto PHISched = getScatterFor(PHIRead); + + // { DomainPHIRead[] -> Element[] } + auto PHITarget = give(isl_map_apply_range(PHISched.copy(), Target.copy())); + + simplify(PHITarget); + DEBUG(dbgs() << " Mapping: " << PHITarget << "\n"); + + auto OrigDomain = getDomainFor(PHIRead); + auto MappedDomain = give(isl_map_domain(PHITarget.copy())); + // TODO: use expandMapping() as well? Are there any non-relevant PHIReads? + // Can we get a lifetime? + if (!isl_set_is_subset(OrigDomain.keep(), MappedDomain.keep())) { + DEBUG(dbgs().indent(4) + << "Reject because mapping does not encompass all instances.\n"); + return false; + } + + // { DomainPHIRead[] -> DomainPHIWrite[] } + auto PerPHIWrites = computePerPHI(SAI); + assert(isl_union_map_is_single_valued(PerPHIWrites.keep())); + assert(isl_union_map_is_injective(PerPHIWrites.keep())); + + // { DomainPHIWrite[] -> Element[] } + auto RelevantWritesTarget = + give(isl_union_map_reverse(isl_union_map_apply_domain( + PerPHIWrites.copy(), isl_union_map_from_map(PHITarget.copy())))); + simplify(RelevantWritesTarget); + auto ExpandedTargetWrites = expandMapping(RelevantWritesTarget); + simplify(ExpandedTargetWrites); + + // { DomainPHIWrite[] } + auto ExpandedWritesDom = + give(isl_union_map_domain(ExpandedTargetWrites.copy())); + auto UniverseWritesDom = give(isl_union_set_empty(ParamSpace.copy())); + // wholeStmtDomain(isl_union_set_copy(ExpandedWritesDom)); + + for (auto *MA : PHIIncomingAccs.lookup(SAI)) { + UniverseWritesDom = give(isl_union_set_add_set(UniverseWritesDom.take(), + getDomainFor(MA).take())); + } + + if (!isl_union_set_is_subset(UniverseWritesDom.keep(), + ExpandedWritesDom.keep())) { + DEBUG(dbgs() << " Reject because did not find PHI write mapping for " + "all instances.\n"); + DEBUG(dbgs() << " Relevant mapping: " << RelevantWritesTarget + << "\n"); + DEBUG(dbgs() << " Extrapolated mapping: " << ExpandedTargetWrites + << "\n"); + DEBUG(dbgs() << " Missing instances: " + << give(isl_union_set_subtract(UniverseWritesDom.copy(), + ExpandedWritesDom.copy())) + << "\n"); + return false; + } + + // { DomainPHIRead[] -> Scatter[] } + auto PerPHIWriteScatter = give(isl_map_from_union_map( + isl_union_map_apply_range(PerPHIWrites.copy(), Schedule.copy()))); + + // { DomainPHIRead[] -> Zone[] } + auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true); + + // { DomainPHIWrite[] -> Zone[] } + auto WriteLifetime = give(isl_union_map_apply_domain( + isl_union_map_from_map(Lifetime.copy()), PerPHIWrites.copy())); + + // { DomainPHIWrite[] -> ValInst[] } + auto WrittenValue = determinePHIWrittenValues(SAI); + + // { DomainPHIWrite[] -> [Element[] -> Scatter[]] } + auto WrittenTranslator = give(isl_union_map_range_product( + ExpandedTargetWrites.copy(), Schedule.copy())); + + simplify(Lifetime); + DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); + + // { Element[] -> Zone[] } + auto EltLifetime = + give(isl_map_apply_domain(Lifetime.copy(), PHITarget.copy())); + + simplify(EltLifetime); + DEBUG(dbgs() << " Occupancy: " << EltLifetime << "\n"); + + // { DomainPHIWrite[] -> [Element[], Zone[]] } + auto LifetimeTranslator = give(isl_union_map_range_product( + ExpandedTargetWrites.copy(), WriteLifetime.take())); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto EltLifetimeInst = give(isl_union_map_apply_domain( + WrittenValue.copy(), LifetimeTranslator.take())); + + // { [Element[] -> Scatter[]] -> ValInst[] } + auto EltWritten = give(isl_union_map_apply_domain( + WrittenValue.copy(), WrittenTranslator.take())); + + Knowledge Proposed(EltLifetimeInst, false, EltWritten); + + if (isConflicting(Proposed)) + return false; + + mapPHI(SAI, std::move(PHITarget), std::move(ExpandedTargetWrites), + std::move(Lifetime), std::move(Proposed)); + return true; + } + + void mapPHI(const ScopArrayInfo *SAI, + // { DomainPHIRead[] -> Element[] } + IslPtr ReadMapping, + // { DomainPHIWrite[] -> Element[] } + IslPtr WriteMapping, + // { DomainPHIRead[] -> Zone[] } + IslPtr Lifetime, Knowledge Proposed) { + auto *PHIRead = this->PHIReadAccs.lookup(SAI); + + // { Element[] } + auto ElementSpace = + give(isl_space_range(isl_map_get_space(ReadMapping.keep()))); + + int NumMappedAccessed = 1; + + for (auto *MA : PHIIncomingAccs.lookup(SAI)) { + auto DomSpace = give(MA->getStatement()->getDomainSpace()); + auto WriteTargetSpace = give(isl_space_map_from_domain_and_range( + DomSpace.take(), ElementSpace.copy())); + auto NewAccRel = give(isl_union_map_extract_map(WriteMapping.keep(), + WriteTargetSpace.take())); + + MA->setNewAccessRelation(NewAccRel.take()); + NumMappedAccessed += 1; + } + + PHIRead->setNewAccessRelation(ReadMapping.copy()); + + MappedPHIScalars++; + MapReports.emplace_back(PHIRead, NumMappedAccessed, std::move(ReadMapping), + std::move(Lifetime), std::move(Proposed)); + } + + void collapseScalarsToStore(MemoryAccess *TargetStoreMA) { + assert(TargetStoreMA->isLatestArrayKind()); + assert(TargetStoreMA->isMustWrite()); + + auto TargetStmt = TargetStoreMA->getStatement(); + + // { DomTarget[] } + auto TargetDom = getDomainFor(TargetStmt); + + // { DomTarget[] -> Element[] } + auto TargetAccRel = getAccessRelationFor(TargetStoreMA); + + // { Zone[] -> DomTarget[] } + auto Target = + computeScalarReachingOverwrite(Schedule, TargetDom, false, true); + + // { Zone[] -> Element[] } + auto EltTarget = give( + isl_map_apply_range(Target.take(), TargetAccRel.take())); // TODO: gist + simplify(EltTarget); + DEBUG(dbgs() << " Target mapping is " << EltTarget << "\n"); + + // TODO: Mark LoadInst of same element and its derivatives as preferable to + // map. + + SmallVector Worklist; + + auto ProcessAllIncoming = [&](ScopStmt *Stmt) { + for (auto *MA : *Stmt) { + if (!MA->isLatestScalarKind()) + continue; + if (!MA->isRead()) + continue; + + Worklist.push_back(MA); + } + }; + + auto WrittenVal = TargetStoreMA->getAccessValue(); + if (auto InputAcc = getInputAccessOf(WrittenVal, TargetStmt)) + Worklist.push_back(InputAcc); + else + ProcessAllIncoming(TargetStmt); + + auto AnyMapped = false; + auto &DL = + S->getRegion().getEntry()->getParent()->getParent()->getDataLayout(); + auto StoreSize = + DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType()); + + while (!Worklist.empty()) { + auto *MA = Worklist.pop_back_val(); + + if (MA->isLatestArrayKind()) + continue; + + auto *SAI = MA->getScopArrayInfo(); + assert(SAI); + DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI + << ")\n"); + + auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType()); + if (MASize > StoreSize) { + DEBUG(dbgs() << " Reject because storage size is insufficient.\n"); + continue; + } + + if (!isMappable(SAI)) + continue; + + if (SAI->isValueKind() && tryMapValue(SAI, MA, EltTarget)) { + ProcessAllIncoming(MA->getStatement()); + AnyMapped = true; + continue; + } + + if (SAI->isPHIKind() && tryMapPHI(SAI, EltTarget)) { + + for (auto *PHIWrite : PHIIncomingAccs.lookup(SAI)) + ProcessAllIncoming(PHIWrite->getStatement()); + + AnyMapped = true; + continue; + } + } + + if (AnyMapped) + TargetsMapped++; + } + + void printBefore(llvm::raw_ostream &OS, int indent = 0) { + OS.indent(indent) << "Original zone:\n"; + OriginalZone.print(OS, indent + 4); + } + + void printMappedScalars(llvm::raw_ostream &OS, int indent = 0) { + OS.indent(indent) << "Mapped scalars {\n"; + for (auto &Report : MapReports) { + Report.print(OS, indent + 4); + } + OS.indent(indent) << "}\n"; + } + + void printAfter(llvm::raw_ostream &OS, int indent = 0) { + OS.indent(indent) << "After zone:\n"; + Zone.print(OS, indent + 4); + } + + void printAccesses(llvm::raw_ostream &OS, int indent = 0) { + OS.indent(indent) << "After Statements {\n"; + for (auto &Stmt : *S) { + OS.indent(indent + 4) << Stmt.getBaseName() << "\n"; + for (auto *MA : Stmt) + MA->print(OS); // No indent parameters + } + OS.indent(indent) << "}\n"; + } + +#if 0 + DenseMap ValueDefs; + DenseMap PHIReads; +#endif + + DenseMap ValueDefAccs; + DenseMap> ValueUseAccs; + DenseMap PHIReadAccs; + DenseMap> + PHIIncomingAccs; + + // Find the MemoryAccess that defines an MK_Value/reads and MK_PHI. + // MK_ExitPHIs are never mapped. + void findScalarAccesses() { + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + + if (MA->isOriginalValueKind() && MA->isWrite()) { + auto *SAI = MA->getScopArrayInfo(); + assert(!ValueDefAccs.count(SAI) && + "There must be at most one definition per MK_Value scalar"); + ValueDefAccs[SAI] = MA; + } + + if (MA->isOriginalValueKind() && MA->isRead()) + ValueUseAccs[MA->getScopArrayInfo()].push_back(MA); + + if (MA->isOriginalAnyPHIKind() && MA->isRead()) { + auto *SAI = MA->getScopArrayInfo(); + assert(!PHIReadAccs.count(SAI) && + "There must be exactly one read by " + "PHI (that's where the PHINode is)"); + PHIReadAccs[SAI] = MA; + } + + if (MA->isOriginalAnyPHIKind() && MA->isWrite()) + PHIIncomingAccs[MA->getScopArrayInfo()].push_back(MA); + } + } + } + + /// Compute when an array element is alive (Its value will be read in the + /// future) and its value at that time. + /// + /// @return { [Element[] -> Zone[]] -> ValInst[] } + IslPtr computeLifetime() const { + // { [Element[] -> Zone[]] } + auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads, + false, false, true); + + // { [Element[] -> Zone[]] } + auto UnusedZone = give(isl_union_map_wrap(ArrayUnused.take())); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto UnusedUndef = give(isl_union_map_from_domain_and_range( + UnusedZone.copy(), makeUndefUSet().take())); + + // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] } + auto EltReachdDef = give(isl_union_map_range_product( + isl_union_map_domain_map(UniverseZone.copy()), + WriteReachDefZone.copy())); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto EltLifetime = give(isl_union_map_apply_domain( + AllWriteValInst.copy(), isl_union_map_reverse(EltReachdDef.take()))); + + // Remove the zones that are guaranteed to be overwritten - they do not + // belong to a lifetime. + EltLifetime = give( + isl_union_map_subtract_domain(EltLifetime.take(), UnusedZone.take())); + EltLifetime = + give(isl_union_map_union(EltLifetime.take(), UnusedUndef.take())); + EltLifetime = removeUnknownValInst(EltLifetime); + + // TODO: If EltLifetime at a point maps to two (non-undef) values, replace + // by unknown. + simplify(EltLifetime); + return EltLifetime; + } + + /// Compute when an array element is written to. + /// + /// @return { [Element[] -> Scatter[]] -> ValInst[] } + IslPtr computeWritten() const { + // { [Element[] -> DomainWrite[]] -> [Element[] -> Scatter[]] } + auto EltWriteTranslator = + give(isl_union_map_product(AllElementsId.copy(), Schedule.copy())); + + // { [Element[] -> Scatter[]] -> ValInst[] } + auto EltWritten = give(isl_union_map_apply_domain( + AllWriteValInst.copy(), EltWriteTranslator.take())); + + simplify(EltWritten); + return EltWritten; + } + +public: + DeLICMImpl(Scop *S) : ZoneAlgorithm(S) {} + + /// Calculate the lifetime (definition to last use) of every array element. + /// + /// @return True if the computed lifetimes (#Zone) is usable. + bool computeZone() { + IslMaxOperationsGuard MaxOpGuard(IslCtx, DelicmMaxOps); + + if (!computeCommon()) + return false; + + findScalarAccesses(); + + auto EltLifetime = computeLifetime(); + auto EltWritten = computeWritten(); + OriginalZone = + Knowledge(std::move(EltLifetime), true, std::move(EltWritten)); + DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); + + Zone = OriginalZone; + return DelicmMaxOps == 0 || Zone.isUsable(); + } + + /// Try to map as many scalars to unused array elements as possible. + /// + /// Multiple scalars might be mappable to intersecting unused array element + /// zones, but we can only chose one. This is a greedy algorithm, therefore + /// the first processed will claim it. + void greedyCollapse() { + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + if (!MA->isLatestArrayKind()) + continue; + if (!MA->isWrite()) + continue; + + if (MA->isMayWrite()) { + DEBUG(dbgs() << "Access " << *MA + << " pruned because it is a MAY_WRITE\n"); + continue; + } + + if (Stmt.getNumIterators() == 0) { + DEBUG(dbgs() << "Access " << *MA + << " pruned because it is not in a loop\n"); + continue; + } + + if (isScalarAccess(getAccessRelationFor(MA))) { + DEBUG(dbgs() << "Access " << *MA + << " pruned because it writes only a single element\n"); + continue; + } + + DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); + collapseScalarsToStore(MA); + } + } + } + + void print(llvm::raw_ostream &OS, int indent = 0) { + printBefore(OS, indent); + printMappedScalars(OS, indent); + printAfter(OS, indent); + printAccesses(OS, indent); + } +}; + +class DeLICM : public ScopPass { +private: + DeLICM(const DeLICM &) = delete; + const DeLICM &operator=(const DeLICM &) = delete; + + /// Hold a reference to the isl_ctx to avoid it being freed before we released + /// all of the ISL objects. + std::shared_ptr IslCtx; + + /// The pass implementation, also holding per-scop data. + std::unique_ptr ZoneComputer; + + void collapseToUnused(Scop &S) { + ZoneComputer = make_unique(&S); + + if (!ZoneComputer->computeZone()) { + DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); + return; + } + + DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n"); + ZoneComputer->greedyCollapse(); + + DEBUG(dbgs() << "Simplifying...\n"); + S.simplifySCoP(true); + + DEBUG(dbgs() << "\nFinal Scop:\n"); + DEBUG(S.print(dbgs())); + } + +public: + static char ID; + explicit DeLICM() : ScopPass(ID) {} + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredTransitive(); + AU.setPreservesAll(); + } + virtual bool runOnScop(Scop &S) override { + // Free resources for previous scop's computation, if not yet done. + releaseMemory(); + + IslCtx = S.getSharedIslCtx(); + collapseToUnused(S); + + return false; + } + + virtual void printScop(raw_ostream &OS, Scop &S) const override { + if (!ZoneComputer) + return; + + assert(ZoneComputer->getScop() == &S); + ZoneComputer->print(OS); + } + + virtual void releaseMemory() override { + ZoneComputer.reset(); + + // It is important to release the isl_ctx last, to ensure it is not free'd + // before any other ISL object held. + IslCtx.reset(); + } +}; + +char DeLICM::ID; +} // anonymous namespace + +Pass *polly::createDeLICMPass() { return new DeLICM(); } + +INITIALIZE_PASS_BEGIN(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, + false) +INITIALIZE_PASS_END(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, + false) Index: test/CMakeLists.txt =================================================================== --- test/CMakeLists.txt +++ test/CMakeLists.txt @@ -165,19 +165,19 @@ ) set_target_properties(check-polly-isl PROPERTIES FOLDER "Polly") - # Run polly-check-format as part of polly-check only if we are compiling with - # clang, so clang-format is available. - # if (TARGET clang-format) would be preferable, but this target is only added - # after Polly, i.e. there is no such target yet at this point. The CMake cache - # entry LLVM_TOOL_CLANG_BUILD will only be defined after clang has been - # configured at least once, i.e. format will be checked only after a rerun of - # CMake's configure. - if (LLVM_TOOL_CLANG_BUILD) - add_dependencies(check-polly polly-check-format) - endif () - endif (NOT DEFINED LLVM_MAIN_SRC_DIR) +# Run polly-check-format as part of polly-check only if we are compiling with + # clang, so clang-format is available. +# if (TARGET clang-format) would be preferable, but this target is only added +# after Polly, i.e. there is no such target yet at this point. The CMake cache +# entry LLVM_TOOL_CLANG_BUILD will only be defined after clang has been +# configured at least once, i.e. format will be checked only after a rerun of +# CMake's configure. +if (LLVM_TOOL_CLANG_BUILD) + add_dependencies(check-polly polly-check-format) +endif () + configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/update_check.py ${CMAKE_CURRENT_BINARY_DIR}/update_check.py) @@ -190,3 +190,38 @@ add_custom_target(polly-test) set_target_properties(polly-test PROPERTIES FOLDER "Polly") add_dependencies(polly-test check-polly) + + +set(updateable_tests_depends) +macro(updateable_test infile) + get_filename_component(filepath "${infile}" ABSOLUTE) + file(RELATIVE_PATH relpath "${CMAKE_CURRENT_SOURCE_DIR}" "${filepath}") + set(targetfile "${CMAKE_CURRENT_BINARY_DIR}/${relpath}") + get_filename_component(targetdir "${targetfile}" DIRECTORY) + file(MAKE_DIRECTORY "${targetdir}") + add_custom_command( + OUTPUT "${targetfile}.updated" + DEPENDS "${filepath}" "${LLVM_TOOLS_DIR}/update_check.py" opt + COMMAND "${PYTHON_EXECUTABLE}" "${LLVM_TOOLS_DIR}/update_check.py" --autorule "${filepath}" --output "${targetfile}.updated" + COMMENT "Updating test ${filepath} step 1" + VERBATIM + ) + add_custom_command( + OUTPUT "${targetfile}.timestamp" + DEPENDS "${targetfile}.updated" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${targetfile}.updated" "${filepath}" + COMMAND ${CMAKE_COMMAND} -E copy "${targetfile}.updated" "${targetfile}.timestamp" + COMMENT "Updating test ${filepath} step 2" + VERBATIM + ) + list(APPEND updateable_tests_depends "${targetfile}.timestamp") +endmacro() + +file( GLOB_RECURSE testfiles *.ll) +foreach (testfile IN LISTS testfiles) + updateable_test("${testfile}") +endforeach () + + +add_custom_target(polly-update-tests DEPENDS ${updateable_tests_depends}) +set_target_properties(polly-update-tests PROPERTIES FOLDER "Polly") Index: test/DeLICM/block.ll =================================================================== --- /dev/null +++ test/DeLICM/block.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Applications/JM/lencod/block.c + +; ModuleID = '/tmp/bugpoint-6hqpl5e0/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-6a61b2a.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind uwtable +define void @dct_luma_sp2(i32* nocapture %coeff_cost) local_unnamed_addr #0 { +entry: + br label %for.body186 + +for.body186: ; preds = %if.end252, %entry + br label %if.end + +if.end: ; preds = %for.body186 + br label %if.then233 + +if.then233: ; preds = %if.end + %0 = load i32, i32* %coeff_cost, align 4 + br i1 false, label %if.end245, label %if.else238 + +if.else238: ; preds = %if.then233 + br label %if.end245 + +if.end245: ; preds = %if.else238, %if.then233 + %storemerge = add nsw i32 undef, %0 + store i32 %storemerge, i32* %coeff_cost, align 4 + br label %if.end252 + +if.end252: ; preds = %if.end245 + br i1 undef, label %for.end274, label %for.body186 + +for.end274: ; preds = %if.end252 + ret void +} + +attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 278052) (llvm/trunk 278053)"} Index: test/DeLICM/block0.ll =================================================================== --- /dev/null +++ test/DeLICM/block0.ll @@ -0,0 +1,173 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Applications/JM/ldecod/block.c + +; ModuleID = '/tmp/bugpoint-c8z4ptvp/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-9cd7075.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470 = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.Slice.17.206.395.500.647.836.983.1130.1319.1508.1697.2138.2243.2348.2495.2684.2789.2978.3167.3314.3503.3608.3797.4028.4049.4070.4091.4112.4133.4154.4175.4196.4280.4301.4322.4343.4364.4427.4448.4511.4553.4595.4637.4868.5036.5351.5414.5561.5603.5771.5960.6359.6611.6653.6695.6716.6737.6758.6905.6989.7010.7031.7346.9194.9215.9236.9257.9278.9299.9320.9572.9593.9656.9698.9719.9950.10034.10139.10202.10466*, %struct.macroblock.18.207.396.501.648.837.984.1131.1320.1509.1698.2139.2244.2349.2496.2685.2790.2979.3168.3315.3504.3609.3798.4029.4050.4071.4092.4113.4134.4155.4176.4197.4281.4302.4323.4344.4365.4428.4449.4512.4554.4596.4638.4869.5037.5352.5415.5562.5604.5772.5961.6360.6612.6654.6696.6717.6738.6759.6906.6990.7011.7032.7347.9195.9216.9237.9258.9279.9300.9321.9573.9594.9657.9699.9720.9951.10035.10140.10203.10467*, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.0.189.378.483.630.819.966.1113.1302.1491.1680.2121.2226.2331.2478.2667.2772.2961.3150.3297.3486.3591.3780.4011.4032.4053.4074.4095.4116.4137.4158.4179.4263.4284.4305.4326.4347.4410.4431.4494.4536.4578.4620.4851.5019.5334.5397.5544.5586.5754.5943.6342.6594.6636.6678.6699.6720.6741.6888.6972.6993.7014.7329.9177.9198.9219.9240.9261.9282.9303.9555.9576.9639.9681.9702.9933.10017.10122.10185.10468*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i64, i64, %struct.timeb.19.208.397.502.649.838.985.1132.1321.1510.1699.2140.2245.2350.2497.2686.2791.2980.3169.3316.3505.3610.3799.4030.4051.4072.4093.4114.4135.4156.4177.4198.4282.4303.4324.4345.4366.4429.4450.4513.4555.4597.4639.4870.5038.5353.5416.5563.5605.5773.5962.6361.6613.6655.6697.6718.6739.6760.6907.6991.7012.7033.7348.9196.9217.9238.9259.9280.9301.9322.9574.9595.9658.9700.9721.9952.10036.10141.10204.10469, %struct.timeb.19.208.397.502.649.838.985.1132.1321.1510.1699.2140.2245.2350.2497.2686.2791.2980.3169.3316.3505.3610.3799.4030.4051.4072.4093.4114.4135.4156.4177.4198.4282.4303.4324.4345.4366.4429.4450.4513.4555.4597.4639.4870.5038.5353.5416.5563.5605.5773.5962.6361.6613.6655.6697.6718.6739.6760.6907.6991.7012.7033.7348.9196.9217.9238.9259.9280.9301.9322.9574.9595.9658.9700.9721.9952.10036.10141.10204.10469, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.Slice.17.206.395.500.647.836.983.1130.1319.1508.1697.2138.2243.2348.2495.2684.2789.2978.3167.3314.3503.3608.3797.4028.4049.4070.4091.4112.4133.4154.4175.4196.4280.4301.4322.4343.4364.4427.4448.4511.4553.4595.4637.4868.5036.5351.5414.5561.5603.5771.5960.6359.6611.6653.6695.6716.6737.6758.6905.6989.7010.7031.7346.9194.9215.9236.9257.9278.9299.9320.9572.9593.9656.9698.9719.9950.10034.10139.10202.10466 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.datapartition.12.201.390.495.642.831.978.1125.1314.1503.1692.2133.2238.2343.2490.2679.2784.2973.3162.3309.3498.3603.3792.4023.4044.4065.4086.4107.4128.4149.4170.4191.4275.4296.4317.4338.4359.4422.4443.4506.4548.4590.4632.4863.5031.5346.5409.5556.5598.5766.5955.6354.6606.6648.6690.6711.6732.6753.6900.6984.7005.7026.7341.9189.9210.9231.9252.9273.9294.9315.9567.9588.9651.9693.9714.9945.10029.10134.10197.10461*, %struct.MotionInfoContexts.14.203.392.497.644.833.980.1127.1316.1505.1694.2135.2240.2345.2492.2681.2786.2975.3164.3311.3500.3605.3794.4025.4046.4067.4088.4109.4130.4151.4172.4193.4277.4298.4319.4340.4361.4424.4445.4508.4550.4592.4634.4865.5033.5348.5411.5558.5600.5768.5957.6356.6608.6650.6692.6713.6734.6755.6902.6986.7007.7028.7343.9191.9212.9233.9254.9275.9296.9317.9569.9590.9653.9695.9716.9947.10031.10136.10199.10463*, %struct.TextureInfoContexts.15.204.393.498.645.834.981.1128.1317.1506.1695.2136.2241.2346.2493.2682.2787.2976.3165.3312.3501.3606.3795.4026.4047.4068.4089.4110.4131.4152.4173.4194.4278.4299.4320.4341.4362.4425.4446.4509.4551.4593.4635.4866.5034.5349.5412.5559.5601.5769.5958.6357.6609.6651.6693.6714.6735.6756.6903.6987.7008.7029.7344.9192.9213.9234.9255.9276.9297.9318.9570.9591.9654.9696.9717.9948.10032.10137.10200.10464*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470*, %struct.inp_par.16.205.394.499.646.835.982.1129.1318.1507.1696.2137.2242.2347.2494.2683.2788.2977.3166.3313.3502.3607.3796.4027.4048.4069.4090.4111.4132.4153.4174.4195.4279.4300.4321.4342.4363.4426.4447.4510.4552.4594.4636.4867.5035.5350.5413.5560.5602.5770.5959.6358.6610.6652.6694.6715.6736.6757.6904.6988.7009.7030.7345.9193.9214.9235.9256.9277.9298.9319.9571.9592.9655.9697.9718.9949.10033.10138.10201.10465*)*, i32, i32, i32, i32 } +%struct.datapartition.12.201.390.495.642.831.978.1125.1314.1503.1692.2133.2238.2343.2490.2679.2784.2973.3162.3309.3498.3603.3792.4023.4044.4065.4086.4107.4128.4149.4170.4191.4275.4296.4317.4338.4359.4422.4443.4506.4548.4590.4632.4863.5031.5346.5409.5556.5598.5766.5955.6354.6606.6648.6690.6711.6732.6753.6900.6984.7005.7026.7341.9189.9210.9231.9252.9273.9294.9315.9567.9588.9651.9693.9714.9945.10029.10134.10197.10461 = type { %struct.Bitstream.9.198.387.492.639.828.975.1122.1311.1500.1689.2130.2235.2340.2487.2676.2781.2970.3159.3306.3495.3600.3789.4020.4041.4062.4083.4104.4125.4146.4167.4188.4272.4293.4314.4335.4356.4419.4440.4503.4545.4587.4629.4860.5028.5343.5406.5553.5595.5763.5952.6351.6603.6645.6687.6708.6729.6750.6897.6981.7002.7023.7338.9186.9207.9228.9249.9270.9291.9312.9564.9585.9648.9690.9711.9942.10026.10131.10194.10458*, %struct.DecodingEnvironment.10.199.388.493.640.829.976.1123.1312.1501.1690.2131.2236.2341.2488.2677.2782.2971.3160.3307.3496.3601.3790.4021.4042.4063.4084.4105.4126.4147.4168.4189.4273.4294.4315.4336.4357.4420.4441.4504.4546.4588.4630.4861.5029.5344.5407.5554.5596.5764.5953.6352.6604.6646.6688.6709.6730.6751.6898.6982.7003.7024.7339.9187.9208.9229.9250.9271.9292.9313.9565.9586.9649.9691.9712.9943.10027.10132.10195.10459, i32 (%struct.syntaxelement.11.200.389.494.641.830.977.1124.1313.1502.1691.2132.2237.2342.2489.2678.2783.2972.3161.3308.3497.3602.3791.4022.4043.4064.4085.4106.4127.4148.4169.4190.4274.4295.4316.4337.4358.4421.4442.4505.4547.4589.4631.4862.5030.5345.5408.5555.5597.5765.5954.6353.6605.6647.6689.6710.6731.6752.6899.6983.7004.7025.7340.9188.9209.9230.9251.9272.9293.9314.9566.9587.9650.9692.9713.9944.10028.10133.10196.10460*, %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470*, %struct.datapartition.12.201.390.495.642.831.978.1125.1314.1503.1692.2133.2238.2343.2490.2679.2784.2973.3162.3309.3498.3603.3792.4023.4044.4065.4086.4107.4128.4149.4170.4191.4275.4296.4317.4338.4359.4422.4443.4506.4548.4590.4632.4863.5031.5346.5409.5556.5598.5766.5955.6354.6606.6648.6690.6711.6732.6753.6900.6984.7005.7026.7341.9189.9210.9231.9252.9273.9294.9315.9567.9588.9651.9693.9714.9945.10029.10134.10197.10461*)* } +%struct.Bitstream.9.198.387.492.639.828.975.1122.1311.1500.1689.2130.2235.2340.2487.2676.2781.2970.3159.3306.3495.3600.3789.4020.4041.4062.4083.4104.4125.4146.4167.4188.4272.4293.4314.4335.4356.4419.4440.4503.4545.4587.4629.4860.5028.5343.5406.5553.5595.5763.5952.6351.6603.6645.6687.6708.6729.6750.6897.6981.7002.7023.7338.9186.9207.9228.9249.9270.9291.9312.9564.9585.9648.9690.9711.9942.10026.10131.10194.10458 = type { i32, i32, i32, i32, i8*, i32 } +%struct.DecodingEnvironment.10.199.388.493.640.829.976.1123.1312.1501.1690.2131.2236.2341.2488.2677.2782.2971.3160.3307.3496.3601.3790.4021.4042.4063.4084.4105.4126.4147.4168.4189.4273.4294.4315.4336.4357.4420.4441.4504.4546.4588.4630.4861.5029.5344.5407.5554.5596.5764.5953.6352.6604.6646.6688.6709.6730.6751.6898.6982.7003.7024.7339.9187.9208.9229.9250.9271.9292.9313.9565.9586.9649.9691.9712.9943.10027.10132.10195.10459 = type { i32, i32, i32, i32, i32, i8*, i32* } +%struct.syntaxelement.11.200.389.494.641.830.977.1124.1313.1502.1691.2132.2237.2342.2489.2678.2783.2972.3161.3308.3497.3602.3791.4022.4043.4064.4085.4106.4127.4148.4169.4190.4274.4295.4316.4337.4358.4421.4442.4505.4547.4589.4631.4862.5030.5345.5408.5555.5597.5765.5954.6353.6605.6647.6689.6710.6731.6752.6899.6983.7004.7025.7340.9188.9209.9230.9251.9272.9293.9314.9566.9587.9650.9692.9713.9944.10028.10133.10196.10460 = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.syntaxelement.11.200.389.494.641.830.977.1124.1313.1502.1691.2132.2237.2342.2489.2678.2783.2972.3161.3308.3497.3602.3791.4022.4043.4064.4085.4106.4127.4148.4169.4190.4274.4295.4316.4337.4358.4421.4442.4505.4547.4589.4631.4862.5030.5345.5408.5555.5597.5765.5954.6353.6605.6647.6689.6710.6731.6752.6899.6983.7004.7025.7340.9188.9209.9230.9251.9272.9293.9314.9566.9587.9650.9692.9713.9944.10028.10133.10196.10460*, %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470*, %struct.DecodingEnvironment.10.199.388.493.640.829.976.1123.1312.1501.1690.2131.2236.2341.2488.2677.2782.2971.3160.3307.3496.3601.3790.4021.4042.4063.4084.4105.4126.4147.4168.4189.4273.4294.4315.4336.4357.4420.4441.4504.4546.4588.4630.4861.5029.5344.5407.5554.5596.5764.5953.6352.6604.6646.6688.6709.6730.6751.6898.6982.7003.7024.7339.9187.9208.9229.9250.9271.9292.9313.9565.9586.9649.9691.9712.9943.10027.10132.10195.10459*)* } +%struct.MotionInfoContexts.14.203.392.497.644.833.980.1127.1316.1505.1694.2135.2240.2345.2492.2681.2786.2975.3164.3311.3500.3605.3794.4025.4046.4067.4088.4109.4130.4151.4172.4193.4277.4298.4319.4340.4361.4424.4445.4508.4550.4592.4634.4865.5033.5348.5411.5558.5600.5768.5957.6356.6608.6650.6692.6713.6734.6755.6902.6986.7007.7028.7343.9191.9212.9233.9254.9275.9296.9317.9569.9590.9653.9695.9716.9947.10031.10136.10199.10463 = type { [4 x [11 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [2 x [9 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [2 x [10 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [2 x [6 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [4 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462], [4 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462], [3 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462] } +%struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462 = type { i16, i8 } +%struct.TextureInfoContexts.15.204.393.498.645.834.981.1128.1317.1506.1695.2136.2241.2346.2493.2682.2787.2976.3165.3312.3501.3606.3795.4026.4047.4068.4089.4110.4131.4152.4173.4194.4278.4299.4320.4341.4362.4425.4446.4509.4551.4593.4635.4866.5034.5349.5412.5559.5601.5769.5958.6357.6609.6651.6693.6714.6735.6756.6903.6987.7008.7029.7344.9192.9213.9234.9255.9276.9297.9318.9570.9591.9654.9696.9717.9948.10032.10137.10200.10464 = type { [2 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462], [4 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462], [3 x [4 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [10 x [4 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [10 x [15 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [10 x [15 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [10 x [5 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [10 x [5 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [10 x [15 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]], [10 x [15 x %struct.BiContextType.13.202.391.496.643.832.979.1126.1315.1504.1693.2134.2239.2344.2491.2680.2785.2974.3163.3310.3499.3604.3793.4024.4045.4066.4087.4108.4129.4150.4171.4192.4276.4297.4318.4339.4360.4423.4444.4507.4549.4591.4633.4864.5032.5347.5410.5557.5599.5767.5956.6355.6607.6649.6691.6712.6733.6754.6901.6985.7006.7027.7342.9190.9211.9232.9253.9274.9295.9316.9568.9589.9652.9694.9715.9946.10030.10135.10198.10462]] } +%struct.inp_par.16.205.394.499.646.835.982.1129.1318.1507.1696.2137.2242.2347.2494.2683.2788.2977.3166.3313.3502.3607.3796.4027.4048.4069.4090.4111.4132.4153.4174.4195.4279.4300.4321.4342.4363.4426.4447.4510.4552.4594.4636.4867.5035.5350.5413.5560.5602.5770.5959.6358.6610.6652.6694.6715.6736.6757.6904.6988.7009.7030.7345.9193.9214.9235.9256.9277.9298.9319.9571.9592.9655.9697.9718.9949.10033.10138.10201.10465 = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.macroblock.18.207.396.501.648.837.984.1131.1320.1509.1698.2139.2244.2349.2496.2685.2790.2979.3168.3315.3504.3609.3798.4029.4050.4071.4092.4113.4134.4155.4176.4197.4281.4302.4323.4344.4365.4428.4449.4512.4554.4596.4638.4869.5037.5352.5415.5562.5604.5772.5961.6360.6612.6654.6696.6717.6738.6759.6906.6990.7011.7032.7347.9195.9216.9237.9258.9279.9300.9321.9573.9594.9657.9699.9720.9951.10035.10140.10203.10467 = type { i32, [2 x i32], i32, i32, %struct.macroblock.18.207.396.501.648.837.984.1131.1320.1509.1698.2139.2244.2349.2496.2685.2790.2979.3168.3315.3504.3609.3798.4029.4050.4071.4092.4113.4134.4155.4176.4197.4281.4302.4323.4344.4365.4428.4449.4512.4554.4596.4638.4869.5037.5352.5415.5562.5604.5772.5961.6360.6612.6654.6696.6717.6738.6759.6906.6990.7011.7032.7347.9195.9216.9237.9258.9279.9300.9321.9573.9594.9657.9699.9720.9951.10035.10140.10203.10467*, %struct.macroblock.18.207.396.501.648.837.984.1131.1320.1509.1698.2139.2244.2349.2496.2685.2790.2979.3168.3315.3504.3609.3798.4029.4050.4071.4092.4113.4134.4155.4176.4197.4281.4302.4323.4344.4365.4428.4449.4512.4554.4596.4638.4869.5037.5352.5415.5562.5604.5772.5961.6360.6612.6654.6696.6717.6738.6759.6906.6990.7011.7032.7347.9195.9216.9237.9258.9279.9300.9321.9573.9594.9657.9699.9720.9951.10035.10140.10203.10467*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.DecRefPicMarking_s.0.189.378.483.630.819.966.1113.1302.1491.1680.2121.2226.2331.2478.2667.2772.2961.3150.3297.3486.3591.3780.4011.4032.4053.4074.4095.4116.4137.4158.4179.4263.4284.4305.4326.4347.4410.4431.4494.4536.4578.4620.4851.5019.5334.5397.5544.5586.5754.5943.6342.6594.6636.6678.6699.6720.6741.6888.6972.6993.7014.7329.9177.9198.9219.9240.9261.9282.9303.9555.9576.9639.9681.9702.9933.10017.10122.10185.10468 = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.0.189.378.483.630.819.966.1113.1302.1491.1680.2121.2226.2331.2478.2667.2772.2961.3150.3297.3486.3591.3780.4011.4032.4053.4074.4095.4116.4137.4158.4179.4263.4284.4305.4326.4347.4410.4431.4494.4536.4578.4620.4851.5019.5334.5397.5544.5586.5754.5943.6342.6594.6636.6678.6699.6720.6741.6888.6972.6993.7014.7329.9177.9198.9219.9240.9261.9282.9303.9555.9576.9639.9681.9702.9933.10017.10122.10185.10468* } +%struct.timeb.19.208.397.502.649.838.985.1132.1321.1510.1699.2140.2245.2350.2497.2686.2791.2980.3169.3316.3505.3610.3799.4030.4051.4072.4093.4114.4135.4156.4177.4198.4282.4303.4324.4345.4366.4429.4450.4513.4555.4597.4639.4870.5038.5353.5416.5563.5605.5773.5962.6361.6613.6655.6697.6718.6739.6760.6907.6991.7012.7033.7348.9196.9217.9238.9259.9280.9301.9322.9574.9595.9658.9700.9721.9952.10036.10141.10204.10469 = type { i64, i16, i16, i16 } + +@subblk_offset_y = external local_unnamed_addr constant [3 x [8 x [4 x i8]]], align 16 +@subblk_offset_x = external local_unnamed_addr constant [3 x [8 x [4 x i8]]], align 16 + +; Function Attrs: nounwind uwtable +define void @intrapred_chroma(%struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470* nocapture %img) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* undef, align 4 + %sub = add nsw i32 %0, -1 + switch i32 undef, label %sw.default [ + i32 0, label %for.cond80.preheader + i32 3, label %if.then422 + i32 1, label %sw.bb709 + i32 2, label %sw.bb774 + ] + +for.cond80.preheader: ; preds = %entry + switch i32 undef, label %for.inc412 [ + i32 0, label %sw.bb + i32 1, label %sw.bb189 + i32 2, label %sw.bb259 + i32 3, label %sw.bb329 + ] + +sw.bb: ; preds = %for.cond80.preheader + unreachable + +sw.bb189: ; preds = %for.cond80.preheader + unreachable + +sw.bb259: ; preds = %for.cond80.preheader + unreachable + +sw.bb329: ; preds = %for.cond80.preheader + unreachable + +for.inc412: ; preds = %for.cond80.preheader + switch i32 undef, label %sw.default [ + i32 3, label %if.then422 + i32 0, label %for.cond653.preheader + i32 1, label %sw.bb709 + i32 2, label %sw.bb774 + ] + +for.cond653.preheader: ; preds = %for.inc412 + %idxprom665 = sext i32 %sub to i64 + %arrayidx668 = getelementptr inbounds [3 x [8 x [4 x i8]]], [3 x [8 x [4 x i8]]]* @subblk_offset_y, i64 0, i64 %idxprom665, i64 0, i64 0 + %1 = add nuw nsw i64 0, 3 + %2 = add nuw nsw i64 0, 3 + br label %for.cond682.preheader + +if.then422: ; preds = %for.inc412, %entry + unreachable + +for.cond682.preheader: ; preds = %for.inc700, %for.cond653.preheader + %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc700 ], [ 0, %for.cond653.preheader ] + br label %for.body686 + +for.body686: ; preds = %for.body686, %for.cond682.preheader + %indvars.iv14 = phi i64 [ %indvars.iv.next15, %for.body686 ], [ 0, %for.cond682.preheader ] + %arrayidx696 = getelementptr inbounds %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470, %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470* %img, i64 0, i32 25, i64 %indvars.iv16, i64 %indvars.iv14 + store i16 undef, i16* %arrayidx696, align 2 + %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1 + %cmp684 = icmp slt i64 %indvars.iv14, %1 + br i1 %cmp684, label %for.body686, label %for.inc700 + +for.inc700: ; preds = %for.body686 + %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1 + %cmp679 = icmp slt i64 %indvars.iv16, %2 + br i1 %cmp679, label %for.cond682.preheader, label %for.inc703 + +for.inc703: ; preds = %for.inc700 + %arrayidx668.1 = getelementptr inbounds [3 x [8 x [4 x i8]]], [3 x [8 x [4 x i8]]]* @subblk_offset_y, i64 0, i64 %idxprom665, i64 0, i64 1 + %arrayidx675.1 = getelementptr inbounds [3 x [8 x [4 x i8]]], [3 x [8 x [4 x i8]]]* @subblk_offset_x, i64 0, i64 %idxprom665, i64 0, i64 1 + %3 = load i8, i8* %arrayidx675.1, align 1 + %4 = zext i8 %3 to i64 + %5 = add nuw nsw i64 0, 3 + br label %for.cond682.preheader.1 + +sw.bb709: ; preds = %for.inc412, %entry + unreachable + +sw.bb774: ; preds = %for.inc412, %entry + unreachable + +sw.default: ; preds = %for.inc412, %entry + unreachable + +for.cond682.preheader.1: ; preds = %for.inc700.1, %for.inc703 + %indvars.iv16.1 = phi i64 [ %indvars.iv.next17.1, %for.inc700.1 ], [ 0, %for.inc703 ] + br label %for.body686.1 + +for.body686.1: ; preds = %for.body686.1, %for.cond682.preheader.1 + %indvars.iv14.1 = phi i64 [ %indvars.iv.next15.1, %for.body686.1 ], [ %4, %for.cond682.preheader.1 ] + %arrayidx696.1 = getelementptr inbounds %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470, %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470* %img, i64 0, i32 25, i64 %indvars.iv16.1, i64 %indvars.iv14.1 + store i16 undef, i16* %arrayidx696.1, align 2 + %indvars.iv.next15.1 = add nuw nsw i64 %indvars.iv14.1, 1 + %cmp684.1 = icmp slt i64 %indvars.iv14.1, 0 + br i1 %cmp684.1, label %for.body686.1, label %for.inc700.1 + +for.inc700.1: ; preds = %for.body686.1 + %indvars.iv.next17.1 = add nuw nsw i64 %indvars.iv16.1, 1 + %cmp679.1 = icmp slt i64 %indvars.iv16.1, %5 + br i1 %cmp679.1, label %for.cond682.preheader.1, label %for.inc703.1 + +for.inc703.1: ; preds = %for.inc700.1 + %arrayidx668.2 = getelementptr inbounds [3 x [8 x [4 x i8]]], [3 x [8 x [4 x i8]]]* @subblk_offset_y, i64 0, i64 %idxprom665, i64 0, i64 2 + %arrayidx675.2 = getelementptr inbounds [3 x [8 x [4 x i8]]], [3 x [8 x [4 x i8]]]* @subblk_offset_x, i64 0, i64 %idxprom665, i64 0, i64 2 + %6 = load i8, i8* %arrayidx675.2, align 2 + %7 = zext i8 %6 to i64 + br label %for.cond682.preheader.2 + +for.cond682.preheader.2: ; preds = %for.inc703.1 + %arrayidx696.2 = getelementptr inbounds %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470, %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470* %img, i64 0, i32 25, i64 0, i64 %7 + store i16 undef, i16* %arrayidx696.2, align 2 + %arrayidx668.3 = getelementptr inbounds [3 x [8 x [4 x i8]]], [3 x [8 x [4 x i8]]]* @subblk_offset_y, i64 0, i64 %idxprom665, i64 0, i64 3 + %8 = load i8, i8* %arrayidx668.3, align 1 + %arrayidx675.3 = getelementptr inbounds [3 x [8 x [4 x i8]]], [3 x [8 x [4 x i8]]]* @subblk_offset_x, i64 0, i64 %idxprom665, i64 0, i64 3 + %9 = load i8, i8* %arrayidx675.3, align 1 + %10 = zext i8 %9 to i64 + %11 = zext i8 %8 to i64 + %12 = add nuw nsw i64 0, 3 + %13 = add nuw nsw i64 0, 3 + br label %for.cond682.preheader.3 + +for.cond682.preheader.3: ; preds = %for.inc700.3, %for.cond682.preheader.2 + %indvars.iv16.3 = phi i64 [ %indvars.iv.next17.3, %for.inc700.3 ], [ %11, %for.cond682.preheader.2 ] + br label %for.body686.3 + +for.body686.3: ; preds = %for.body686.3, %for.cond682.preheader.3 + %indvars.iv14.3 = phi i64 [ %indvars.iv.next15.3, %for.body686.3 ], [ %10, %for.cond682.preheader.3 ] + %arrayidx696.3 = getelementptr inbounds %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470, %struct.img_par.20.209.398.503.650.839.986.1133.1322.1511.1700.2141.2246.2351.2498.2687.2792.2981.3170.3317.3506.3611.3800.4031.4052.4073.4094.4115.4136.4157.4178.4199.4283.4304.4325.4346.4367.4430.4451.4514.4556.4598.4640.4871.5039.5354.5417.5564.5606.5774.5963.6362.6614.6656.6698.6719.6740.6761.6908.6992.7013.7034.7349.9197.9218.9239.9260.9281.9302.9323.9575.9596.9659.9701.9722.9953.10037.10142.10205.10470* %img, i64 0, i32 25, i64 %indvars.iv16.3, i64 %indvars.iv14.3 + store i16 undef, i16* %arrayidx696.3, align 2 + %indvars.iv.next15.3 = add nuw nsw i64 %indvars.iv14.3, 1 + %cmp684.3 = icmp slt i64 %indvars.iv14.3, %12 + br i1 %cmp684.3, label %for.body686.3, label %for.inc700.3 + +for.inc700.3: ; preds = %for.body686.3 + %indvars.iv.next17.3 = add nuw nsw i64 %indvars.iv16.3, 1 + %cmp679.3 = icmp slt i64 %indvars.iv16.3, %13 + br i1 %cmp679.3, label %for.cond682.preheader.3, label %for.inc703.3 + +for.inc703.3: ; preds = %for.inc700.3 + unreachable +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 277335)"} Index: test/DeLICM/delicm_GVN.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_GVN.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const n], int m) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red = A[j]; +; for (int i = 0; i < m; i += 1) { /* reduction loop */ +; red += 4.2; +; A[j] = red; +; } +; } + +; Nested reduction standard case +; After GVN-Pre (LICM cannot because of possible aliasing) +; Possible difficulties: +; - Slot %arrayidx is overwritten in loop, indicating that %arraidx is not available for use + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %reduction.inc] + %phi = phi double [%ld, %parallel.for], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, %m + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %add = fadd double %phi, 4.2 + store double %add, double* %arrayidx + br label %reduction.inc + +reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + +parallel.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_cond.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_cond.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red = A[j]; +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; if (i % 2 == 0) +; red += 4.2; +; A[j] = red; +; } + +; Body executed conditionally +; Possible difficulties: +; - %reduction.inc non-existing, where to put the store? + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m, double* noalias nonnull %B) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %body], [%i.inc, %body.true] + %phi = phi double [%ld, %parallel.for], [%phi, %body], [%add, %body.true] + %i.cmp = icmp slt i32 %i, %m + %i.inc = add nuw nsw i32 %i, 1 + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %rem = and i32 %i, 1 + %cond = icmp eq i32 %rem, 0 + br i1 %cond, label %body.true, label %reduction.for + +body.true: + %add = fadd double %phi, 4.2 + br label %reduction.for + +parallel.inc: + store double %phi, double* %arrayidx + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_conditional_reductions.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_conditional_reductions.ll @@ -0,0 +1,74 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m, int c) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red = A[j]; +; if (c) { +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; red += 4.2; +; else { +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; red *= 1.2; +; } +; A[j] = red; +; } + +; Nested reduction standard case +; After LICM +; Possible difficulties: +; - Two independent loop-carried phis can reuse the same %arrayidx (to be distinguished from the case where it can't) + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m, i32 %c) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %condition, label %return + +condition: + %ccmp = icmp ne i32 %c, 0 + br i1 %ccmp, label %reduction1.for, label %reduction2.for + +reduction1.for: + %i1 = phi i32 [0, %condition], [%i1.inc, %reduction1.inc] + %phi1 = phi double [%ld, %condition], [%add, %reduction1.inc] + %i1.cmp = icmp slt i32 %i1, %m + br i1 %i1.cmp, label %body1, label %parallel.inc + +body1: + %add = fadd double %phi1, 4.2 + br label %reduction1.inc + +reduction1.inc: + %i1.inc = add nuw nsw i32 %i1, 1 + br label %reduction1.for + +reduction2.for: + %i2 = phi i32 [0, %condition], [%i2.inc, %reduction2.inc] + %phi2 = phi double [%ld, %condition], [%mul, %reduction2.inc] + %i2.cmp = icmp slt i32 %i2, %m + br i1 %i2.cmp, label %body2, label %parallel.inc + +body2: + %mul = fmul double %phi2, 1.2 + br label %reduction2.inc + +reduction2.inc: + %i2.inc = add nuw nsw i32 %i2, 1 + br label %reduction2.for + +parallel.inc: + %condphi = phi double [%phi1, %reduction1.for], [%phi2, %reduction2.for] + store double %condphi, double* %arrayidx + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_consecutive_reductions.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_consecutive_reductions.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red = A[j]; +; for (int i1 = 0; i1 < m; i1 += 1) /* reduction loop */ +; red += 4.2; +; for (int i2 = 0; i2 < m; i2 += 1) /* reduction loop */ +; red += 5.3; +; A[j] = red; +; } + +; Two reductions executed sequentially +; Possible difficulties: +; - Two loop-carrying phis that can use the same %arrayidx + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction1.for, label %return + +reduction1.for: + %i1 = phi i32 [0, %parallel.for], [%i1.inc, %reduction1.inc] + %phi1 = phi double [%ld, %parallel.for], [%add1, %reduction1.inc] + %i1.cmp = icmp slt i32 %i1, %m + br i1 %i1.cmp, label %body1, label %reduction2.for + +body1: + %add1 = fadd double %phi1, 4.2 + br label %reduction1.inc + +reduction1.inc: + %i1.inc = add nuw nsw i32 %i1, 1 + br label %reduction1.for + +reduction2.for: + %i2 = phi i32 [0, %reduction1.for], [%i2.inc, %reduction2.inc] + %phi2 = phi double [%phi1, %reduction1.for], [%add2, %reduction2.inc] + %i2.cmp = icmp slt i32 %i2, %m + br i1 %i2.cmp, label %body2, label %parallel.inc + +body2: + %add2 = fadd double %phi2, 5.3 + br label %reduction2.inc + +reduction2.inc: + %i2.inc = add nuw nsw i32 %i2, 1 + br label %reduction2.for + +parallel.inc: + store double %phi2, double* %arrayidx + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_loads.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_loads.ll @@ -0,0 +1,57 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m, double B[static const restrict n], double C[static const restrict n]) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; B[j] = A[j]; +; double red = A[j]; +; red += 5.1; +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; red += 4.2; +; A[j] = red; +; C[j] = A[j]; +; } + +; Value of A[j] used, outside of reduction +; Possible difficulties: +; - Distinguish from case where uses of A[j] is between the reduction initial load and writeback. + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m, double* noalias nonnull %B, double* noalias nonnull %C) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %arrayidxB = getelementptr inbounds double, double* %B, i32 %j + %arrayidxC = getelementptr inbounds double, double* %C, i32 %j + %ld = load double, double* %arrayidx + %prep = fadd double %ld, 5.1 + store double %ld, double* %arrayidxB + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %reduction.inc] + %phi = phi double [%ld, %parallel.for], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, %m + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + +reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + +parallel.inc: + store double %phi, double* %arrayidx + store double %phi, double* %arrayidxC + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_nested_reductions.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_nested_reductions.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red = A[j]; +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; for (int k = 0; k < m; k += 1) /* reduction loop */ +; red += 4.2; +; A[j] = red; +; } + +; Two nested loop-carried phis able to reuse the same %arrayidx +; Possible difficulties: +; - Identify that both phis can use the same %arrayidx as scratch space +; - Needs edge-level writes + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction_outer.for, label %return + +reduction_outer.for: + %i_outer = phi i32 [0, %parallel.for], [%i_outer.inc, %reduction_outer.inc] + %phi_outer = phi double [%ld, %parallel.for], [%phi_inner, %reduction_outer.inc] + %i_outer.cmp = icmp slt i32 %i_outer, %m + br i1 %i_outer.cmp, label %reduction_inner.for, label %parallel.inc + +reduction_inner.for: + %i_inner = phi i32 [0, %reduction_outer.for], [%i_inner.inc, %reduction_inner.inc] + %phi_inner = phi double [%ld, %reduction_outer.for], [%add, %reduction_inner.inc] + %i_inner.cmp = icmp slt i32 %i_inner, %m + br i1 %i_inner.cmp, label %body, label %reduction_outer.inc + +body: + %add = fadd double %phi_inner, 4.2 + br label %reduction_inner.inc + +reduction_inner.inc: + %i_inner.inc = add nuw nsw i32 %i_inner, 1 + br label %reduction_inner.for + +reduction_outer.inc: + %i_outer.inc = add nuw nsw i32 %i_outer, 1 + br label %reduction_outer.for + +parallel.inc: + store double %phi_outer, double* %arrayidx + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_nonaffine.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_nonaffine.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red = A[j]; +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; if (i*i == 0) +; red += 4.2; +; A[j] = red; +; } + +; Body is non-affine subregion +; Possible difficulties: +; - Can move through non-affine subregions? + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %body], [%i.inc, %body.true] + %phi = phi double [%ld, %parallel.for], [%phi, %body], [%add, %body.true] + %i.cmp = icmp slt i32 %i, %m + %i.inc = add nuw nsw i32 %i, 1 + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %sqr = mul i32 %i, %i + %cond = icmp eq i32 %sqr, 0 + br i1 %cond, label %body.true, label %reduction.for + +body.true: + %add = fadd double %phi, 4.2 + br label %reduction.for + +parallel.inc: + store double %phi, double* %arrayidx + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_reduction.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_reduction.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red = A[j]; +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; red += 4.2; +; A[j] = red; +; } + +; Nested reduction standard case; After LICM +; Possible difficulties: +; - Put the store into %body or $reduction.inc +; - Replace %phi in %body with a load, knowing that after the store has been placed in %body or %reduction.inc, it contains %phi +; - Except the store, all instructions are "rematerializable", when applying the same logic to the loop-carried %phi, so the naive algorithm might try to move all instructions into %parallel.inc and remove the one in %body +; - There can be no mapped store added to parallel.for (for the %phi) because it is not postdominated by the store + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %reduction.inc] + %phi = phi double [%ld, %parallel.for], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, %m + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + +reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + +parallel.inc: + store double %phi, double* %arrayidx + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_split.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_split.ll @@ -0,0 +1,63 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m, double B[static const restrict m]) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red = A[j]; +; for (int i = 0; i < m; i += 1) { /* reduction loop */ +; red += B[i]; /* assume B[i] non-rematerializable */ +; +; red += B[m-i]; /* assume B[m-i] non-rematerializable */ +; } +; A[j] = red; +; } + +; Body is split into multiple blocks +; Some more complicated could between body and body.split might keep the block from being joind into one +; Possible difficulties: +; - Does body and body.split get their own stores? +; - If not, does a scalar dependency between them remain? + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m, double* noalias nonnull %B) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %reduction.inc] + %phi = phi double [%ld, %parallel.for], [%add2, %reduction.inc] + %i.cmp = icmp slt i32 %i, %m + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %arrayidxB1 = getelementptr inbounds double, double* %B, i32 %i + %B1 = load double, double* %arrayidxB1 ; assume non-rematerializable + %add1 = fadd double %phi, %B1 + br label %body.split + +body.split: + %minusi = sub nuw nsw i32 %m, %i + %arrayidxB2 = getelementptr inbounds double, double* %B, i32 %minusi + %B2 = load double, double* %arrayidxB2 ; assume non-rematerializable + %add2 = fadd double %add1, %B2 + br label %reduction.inc + +reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + +parallel.inc: + store double %phi, double* %arrayidx + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_two_reductions.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_two_reductions.ll @@ -0,0 +1,57 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; double red1 = A[j]; +; double red2 = 0.0; +; for (int i = 0; i < m; i += 1) { /* reduction loop */ +; red1 += 4.2; +; red2 += 2.4; +; } +; A[j] = red1 + red2; +; } + +; Two reductions in the same loop +; Possible difficulties: +; - Cannot use same %arraryidx for same loop-carried phi; must identify situation and choose one. +; - If doing global analysis first, must somehow mark already used %arrayidx to avoid use by other reduction +; - If transforming while doing analysis, analysis for second phi must work on updated MemoryAccesses + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %reduction.inc] + %phi1 = phi double [%ld, %parallel.for], [%add1, %reduction.inc] + %phi2 = phi double [0.0, %parallel.for], [%add2, %reduction.inc] + %i.cmp = icmp slt i32 %i, %m + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %add1 = fadd double %phi1, 4.2 + %add2 = fadd double %phi2, 2.4 + br label %reduction.inc + +reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + +parallel.inc: + %sum = fadd double %phi1, %phi2 + store double %sum, double* %arrayidx + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_LICM_writes.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_LICM_writes.ll @@ -0,0 +1,55 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; +; void func(int n, double A[static const restrict n], int m, int k) { +; for (int j = 0; j < n; j += 1) { /* parallel loop */ +; A[k] = 2.1; +; double red = A[j]; +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; red += 4.2; +; A[j] = red; +; A[k] = 2.3; +; } + +; %arraidx possibly overwritten, but not in anything relevant for the reduction +; Possible difficulties: +; - Store of 2.1 might overwrite A[j] if order reversed in %parallel.for (distinguish the two cases) +; - Store of 2.3 might overwrite A[j]; distinguish from case where order is reversed in %parallel.inc + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m, i32 %k) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %arrayidxk = getelementptr inbounds double, double* %A, i32 %k + store double 2.1, double* %arrayidxk + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %reduction.inc] + %phi = phi double [%ld, %parallel.for], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, %m + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + +reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + +parallel.inc: + store double %phi, double* %arrayidx + store double 2.3, double* %arrayidxk + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + Index: test/DeLICM/delicm_base.ll =================================================================== --- /dev/null +++ test/DeLICM/delicm_base.ll @@ -0,0 +1,63 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s | FileCheck %s +; +; void func(int n, double A[static const restrict n], int m) { +; for (int j = 0; j < n; j += 1) /* parallel loop */ +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; A[j] += 4.2; +; } +; + +; Nested reduction standard case +; Before DeLICM/LICM-Pre +; Possible difficulties: +; - No changes, code not to be modified + +define void @func(i32 %n, double* noalias nonnull %A, i32 %m) { +entry: + br label %parallel.for + +parallel.for: + %j = phi i32 [0, %entry], [%j.inc, %parallel.inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %reduction.for, label %return + +reduction.for: + %i = phi i32 [0, %parallel.for], [%i.inc, %reduction.inc] + %i.cmp = icmp slt i32 %i, %m + br i1 %i.cmp, label %body, label %parallel.inc + +body: + %arrayidx = getelementptr inbounds double, double* %A, i32 %j + %ld = load double, double* %arrayidx + %add = fadd double %ld, 4.2 + store double %add, double* %arrayidx + br label %reduction.inc + +reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + +parallel.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %parallel.for + +return: + ret void +} + + +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: [n, m] -> { [MemRef_A[i0] -> [i1, i2{{\]\]}} -> [Stmt_body[i0, -1 + m] -> Val_add[{{\]\]}} : m > 0 and 0 <= i0 < n and i1 > i0; [MemRef_A[i0] -> [i0, i2{{\]\]}} -> [Stmt_body[i0, -1 + m] -> Val_add[{{\]\]}} : m > 0 and 0 <= i0 < n and i2 > m; [MemRef_A[i0] -> [i0, i2{{\]\]}} -> [Stmt_body[i0, -1 + i2] -> Val_add[{{\]\]}} : 0 <= i0 < n and 0 < i2 <= m } + Unknown +; CHECK-NEXT: Written : [n, m] -> { [MemRef_A[i0] -> [i0, i2{{\]\]}} -> [Stmt_body[i0, i2] -> Val_add[{{\]\]}} : 0 <= i0 < n and 0 <= i2 < m } +; CHECK: Mapped scalars { +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: [n, m] -> { [MemRef_A[i0] -> [i1, i2{{\]\]}} -> [Stmt_body[i0, -1 + m] -> Val_add[{{\]\]}} : m > 0 and 0 <= i0 < n and i1 > i0; [MemRef_A[i0] -> [i0, i2{{\]\]}} -> [Stmt_body[i0, -1 + m] -> Val_add[{{\]\]}} : m > 0 and 0 <= i0 < n and i2 > m; [MemRef_A[i0] -> [i0, i2{{\]\]}} -> [Stmt_body[i0, -1 + i2] -> Val_add[{{\]\]}} : 0 <= i0 < n and 0 < i2 <= m } + Unknown +; CHECK-NEXT: Written : [n, m] -> { [MemRef_A[i0] -> [i0, i2{{\]\]}} -> [Stmt_body[i0, i2] -> Val_add[{{\]\]}} : 0 <= i0 < n and 0 <= i2 < m } +; CHECK: After Statements { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n, m] -> { Stmt_body[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n, m] -> { Stmt_body[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: } Index: test/DeLICM/doubleacc.ll =================================================================== --- /dev/null +++ test/DeLICM/doubleacc.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s +; RUN: opt %loadPolly -polly-codegen -S < %s +; +; void foo(long n, float A[n]) { +; for (long i = 0; i < n; i += 1) { +; A[i] += 1; +; A[i] += 1; +; } +; } + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(i32 %n, float* %A) #1 { +entry: + %tmp = sext i32 %n to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %add = fadd float %tmp1, 1.000000e+00 + store float %add, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp2 = load float, float* %arrayidx2, align 4 + %add3 = fadd float %tmp2, 1.000000e+00 + store float %add3, float* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } + + + + Index: test/DeLICM/durbin.ll =================================================================== --- /dev/null +++ test/DeLICM/durbin.ll @@ -0,0 +1,86 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/durbin/durbin.c + +; ModuleID = '/tmp/bugpoint-91g4tr4u/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-bd5b139.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @kernel_durbin(i32 %n, [4000 x double]* %y, double* %beta) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br i1 undef, label %for.body.lr.ph, label %for.cond94.preheader + +for.body.lr.ph: ; preds = %entry.split + br label %for.body + +for.cond.for.cond94.preheader_crit_edge: ; preds = %for.end84 + br label %for.cond94.preheader + +for.cond94.preheader: ; preds = %for.cond.for.cond94.preheader_crit_edge, %entry.split + br i1 undef, label %for.body96.lr.ph, label %for.end106 + +for.body96.lr.ph: ; preds = %for.cond94.preheader + br label %for.body96 + +for.body: ; preds = %for.end84, %for.body.lr.ph + %indvars.iv15 = phi i64 [ %indvars.iv.next16, %for.end84 ], [ 1, %for.body.lr.ph ] + %indvars.iv11 = phi i32 [ undef, %for.end84 ], [ 1, %for.body.lr.ph ] + %arrayidx19 = getelementptr inbounds double, double* %beta, i64 %indvars.iv15 + store double undef, double* %arrayidx19, align 8 + br i1 true, label %for.body28.lr.ph, label %for.end + +for.body28.lr.ph: ; preds = %for.body + br label %for.body28 + +for.body28: ; preds = %for.body28, %for.body28.lr.ph + %arrayidx41 = getelementptr inbounds [4000 x double], [4000 x double]* %y, i64 0, i64 0 + %0 = load double, double* %arrayidx41, align 8 + %indvars.iv.next2 = add nuw nsw i64 0, 1 + %lftr.wideiv6 = trunc i64 %indvars.iv.next2 to i32 + br i1 false, label %for.body28, label %for.cond25.for.end_crit_edge + +for.cond25.for.end_crit_edge: ; preds = %for.body28 + br label %for.end + +for.end: ; preds = %for.cond25.for.end_crit_edge, %for.body + br i1 true, label %for.body61.lr.ph, label %for.end84 + +for.body61.lr.ph: ; preds = %for.end + br label %for.body61 + +for.body61: ; preds = %for.body61, %for.body61.lr.ph + %indvars.iv.next9 = add nuw nsw i64 0, 1 + %lftr.wideiv13 = trunc i64 %indvars.iv.next9 to i32 + br i1 false, label %for.body61, label %for.cond58.for.end84_crit_edge + +for.cond58.for.end84_crit_edge: ; preds = %for.body61 + br label %for.end84 + +for.end84: ; preds = %for.cond58.for.end84_crit_edge, %for.end + %.lcssa = phi double [ undef, %for.cond58.for.end84_crit_edge ], [ undef, %for.end ] + %arrayidx90 = getelementptr inbounds [4000 x double], [4000 x double]* %y, i64 %indvars.iv15, i64 %indvars.iv15 + store double %.lcssa, double* %arrayidx90, align 8 + %indvars.iv.next16 = add nuw nsw i64 %indvars.iv15, 1 + %exitcond22 = icmp ne i32 0, %n + br i1 %exitcond22, label %for.body, label %for.cond.for.cond94.preheader_crit_edge + +for.body96: ; preds = %for.body96, %for.body96.lr.ph + br i1 undef, label %for.body96, label %for.cond94.for.end106_crit_edge + +for.cond94.for.end106_crit_edge: ; preds = %for.body96 + br label %for.end106 + +for.end106: ; preds = %for.cond94.for.end106_crit_edge, %for.cond94.preheader + ret void +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 276370)"} Index: test/DeLICM/gemm-before.ll =================================================================== --- /dev/null +++ test/DeLICM/gemm-before.ll @@ -0,0 +1,2873 @@ +; RUN: opt %loadPolly -polly-delicm -polly-ast -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm -polly-ast -analyze < %s | FileCheck %s --check-prefix=FLAT + +; RUN: opt %loadPolly -polly-delicm -polly-codegen -S < %s | FileCheck %s --check-prefix=PREIR +; RUN: opt %loadPolly -polly-delicm -polly-opt-isl -polly-codegen -S < %s | FileCheck %s --check-prefix=IR +; XFAIL: * +; ModuleID = '/mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/gemm/gemm.c' +source_filename = "/mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/gemm/gemm.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@polybench_papi_counters_threadid = local_unnamed_addr global i32 0, align 4 +@polybench_program_total_flops = local_unnamed_addr global double 0.000000e+00, align 8 +@polybench_t_start = common local_unnamed_addr global double 0.000000e+00, align 8 +@polybench_t_end = common local_unnamed_addr global double 0.000000e+00, align 8 +@.str = private unnamed_addr constant [7 x i8] c"%0.6f\0A\00", align 1 +@polybench_c_start = common local_unnamed_addr global i64 0, align 8 +@polybench_c_end = common local_unnamed_addr global i64 0, align 8 +@stderr = external local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str.1 = private unnamed_addr constant [51 x i8] c"[PolyBench] posix_memalign: cannot allocate memory\00", align 1 + +; Function Attrs: norecurse nounwind readnone uwtable +define void @polybench_flush_cache() local_unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare noalias i8* @calloc(i64, i64) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare void @free(i8* nocapture) local_unnamed_addr #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +; Function Attrs: norecurse nounwind readnone uwtable +define void @polybench_prepare_instruments() local_unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + ret void +} + +; Function Attrs: norecurse nounwind uwtable +define void @polybench_timer_start() local_unnamed_addr #3 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + store double 0.000000e+00, double* @polybench_t_start, align 8, !tbaa !1 + ret void +} + +; Function Attrs: norecurse nounwind uwtable +define void @polybench_timer_stop() local_unnamed_addr #3 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + store double 0.000000e+00, double* @polybench_t_end, align 8, !tbaa !1 + ret void +} + +; Function Attrs: nounwind uwtable +define void @polybench_timer_print() local_unnamed_addr #4 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %0 = load double, double* @polybench_t_end, align 8, !tbaa !1 + %1 = load double, double* @polybench_t_start, align 8, !tbaa !1 + %sub = fsub double %0, %1 + %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), double %sub) + ret void +} + +; Function Attrs: nounwind +declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #2 + +; Function Attrs: nounwind uwtable +define i8* @polybench_alloc_data(i64 %n, i32 %elt_size) local_unnamed_addr #4 { +entry: + %new.i = alloca i8*, align 8 + br label %entry.split + +entry.split: ; preds = %entry + %conv = sext i32 %elt_size to i64 + %mul = mul i64 %conv, %n + %0 = bitcast i8** %new.i to i8* + call void @llvm.lifetime.start(i64 8, i8* %0) #6 + store i8* null, i8** %new.i, align 8, !tbaa !5 + %call.i = call i32 @posix_memalign(i8** nonnull %new.i, i64 32, i64 %mul) #6 + %1 = load i8*, i8** %new.i, align 8, !tbaa !5 + %tobool.i = icmp eq i8* %1, null + %tobool1.i = icmp ne i32 %call.i, 0 + %or.cond.i = or i1 %tobool1.i, %tobool.i + br i1 %or.cond.i, label %if.then.i, label %xmalloc.exit + +if.then.i: ; preds = %entry.split + %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %3 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %2) #7 + call void @exit(i32 1) #8 + unreachable + +xmalloc.exit: ; preds = %entry.split + call void @llvm.lifetime.end(i64 8, i8* %0) #6 + ret i8* %1 +} + +; Function Attrs: nounwind uwtable +define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #4 { +entry: + %new.i.i34 = alloca i8*, align 8 + %new.i.i27 = alloca i8*, align 8 + %new.i.i = alloca i8*, align 8 + br label %entry.split + +entry.split: ; preds = %entry + %0 = bitcast i8** %new.i.i to i8* + call void @llvm.lifetime.start(i64 8, i8* %0) #6 + store i8* null, i8** %new.i.i, align 8, !tbaa !5 + %call.i.i = call i32 @posix_memalign(i8** nonnull %new.i.i, i64 32, i64 8388608) #6 + %1 = load i8*, i8** %new.i.i, align 8, !tbaa !5 + %tobool.i.i = icmp eq i8* %1, null + %tobool1.i.i = icmp ne i32 %call.i.i, 0 + %or.cond.i.i = or i1 %tobool1.i.i, %tobool.i.i + br i1 %or.cond.i.i, label %if.then.i.i, label %polybench_alloc_data.exit + +if.then.i.i: ; preds = %entry.split + %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %3 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %2) #7 + call void @exit(i32 1) #8 + unreachable + +polybench_alloc_data.exit: ; preds = %entry.split + call void @llvm.lifetime.end(i64 8, i8* %0) #6 + %4 = bitcast i8** %new.i.i27 to i8* + call void @llvm.lifetime.start(i64 8, i8* %4) #6 + store i8* null, i8** %new.i.i27, align 8, !tbaa !5 + %call.i.i28 = call i32 @posix_memalign(i8** nonnull %new.i.i27, i64 32, i64 8388608) #6 + %5 = load i8*, i8** %new.i.i27, align 8, !tbaa !5 + %tobool.i.i29 = icmp eq i8* %5, null + %tobool1.i.i30 = icmp ne i32 %call.i.i28, 0 + %or.cond.i.i31 = or i1 %tobool1.i.i30, %tobool.i.i29 + br i1 %or.cond.i.i31, label %if.then.i.i32, label %polybench_alloc_data.exit33 + +if.then.i.i32: ; preds = %polybench_alloc_data.exit + %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %7 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %6) #7 + call void @exit(i32 1) #8 + unreachable + +polybench_alloc_data.exit33: ; preds = %polybench_alloc_data.exit + call void @llvm.lifetime.end(i64 8, i8* %4) #6 + %8 = bitcast i8** %new.i.i34 to i8* + call void @llvm.lifetime.start(i64 8, i8* %8) #6 + store i8* null, i8** %new.i.i34, align 8, !tbaa !5 + %call.i.i35 = call i32 @posix_memalign(i8** nonnull %new.i.i34, i64 32, i64 8388608) #6 + %9 = load i8*, i8** %new.i.i34, align 8, !tbaa !5 + %tobool.i.i36 = icmp eq i8* %9, null + %tobool1.i.i37 = icmp ne i32 %call.i.i35, 0 + %or.cond.i.i38 = or i1 %tobool1.i.i37, %tobool.i.i36 + br i1 %or.cond.i.i38, label %if.then.i.i39, label %polybench_alloc_data.exit40 + +if.then.i.i39: ; preds = %polybench_alloc_data.exit33 + %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %11 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %10) #7 + call void @exit(i32 1) #8 + unreachable + +polybench_alloc_data.exit40: ; preds = %polybench_alloc_data.exit33 + call void @llvm.lifetime.end(i64 8, i8* %8) #6 + %arraydecay = bitcast i8* %1 to [1024 x double]* + %arraydecay3 = bitcast i8* %5 to [1024 x double]* + br label %for.cond1.preheader.i + +for.cond1.preheader.i: ; preds = %for.inc8.i, %polybench_alloc_data.exit40 + %indvars.iv19.i = phi i64 [ 0, %polybench_alloc_data.exit40 ], [ %indvars.iv.next20.i, %for.inc8.i ] + %12 = trunc i64 %indvars.iv19.i to i32 + %conv.i = sitofp i32 %12 to double + br label %for.body3.i + +for.body3.i: ; preds = %for.body3.i, %for.cond1.preheader.i + %indvars.iv16.i = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next17.i, %for.body3.i ] + %13 = trunc i64 %indvars.iv16.i to i32 + %conv4.i = sitofp i32 %13 to double + %mul.i = fmul double %conv.i, %conv4.i + %div.i = fmul double %mul.i, 9.765625e-04 + %arrayidx7.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv19.i, i64 %indvars.iv16.i + store double %div.i, double* %arrayidx7.i, align 8, !tbaa !1 + %indvars.iv.next17.i = add nuw nsw i64 %indvars.iv16.i, 1 + %exitcond18.i = icmp eq i64 %indvars.iv.next17.i, 1024 + br i1 %exitcond18.i, label %for.inc8.i, label %for.body3.i + +for.inc8.i: ; preds = %for.body3.i + %indvars.iv.next20.i = add nuw nsw i64 %indvars.iv19.i, 1 + %exitcond21.i = icmp eq i64 %indvars.iv.next20.i, 1024 + br i1 %exitcond21.i, label %for.cond15.preheader.i.preheader, label %for.cond1.preheader.i + +for.cond15.preheader.i.preheader: ; preds = %for.inc8.i + %arraydecay4 = bitcast i8* %9 to [1024 x double]* + br label %for.cond15.preheader.i + +for.cond15.preheader.i: ; preds = %for.cond15.preheader.i.preheader, %for.inc31.i + %indvars.iv13.i = phi i64 [ %indvars.iv.next14.i, %for.inc31.i ], [ 0, %for.cond15.preheader.i.preheader ] + %14 = trunc i64 %indvars.iv13.i to i32 + %conv19.i = sitofp i32 %14 to double + br label %for.body18.i + +for.body18.i: ; preds = %for.body18.i, %for.cond15.preheader.i + %indvars.iv10.i = phi i64 [ 0, %for.cond15.preheader.i ], [ %indvars.iv.next11.i, %for.body18.i ] + %15 = trunc i64 %indvars.iv10.i to i32 + %conv20.i = sitofp i32 %15 to double + %mul21.i = fmul double %conv19.i, %conv20.i + %div23.i = fmul double %mul21.i, 9.765625e-04 + %arrayidx27.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay3, i64 %indvars.iv13.i, i64 %indvars.iv10.i + store double %div23.i, double* %arrayidx27.i, align 8, !tbaa !1 + %indvars.iv.next11.i = add nuw nsw i64 %indvars.iv10.i, 1 + %exitcond12.i = icmp eq i64 %indvars.iv.next11.i, 1024 + br i1 %exitcond12.i, label %for.inc31.i, label %for.body18.i + +for.inc31.i: ; preds = %for.body18.i + %indvars.iv.next14.i = add nuw nsw i64 %indvars.iv13.i, 1 + %exitcond15.i = icmp eq i64 %indvars.iv.next14.i, 1024 + br i1 %exitcond15.i, label %for.cond38.preheader.i, label %for.cond15.preheader.i + +for.cond38.preheader.i: ; preds = %for.inc31.i, %for.inc54.i + %indvars.iv7.i = phi i64 [ %indvars.iv.next8.i, %for.inc54.i ], [ 0, %for.inc31.i ] + %16 = trunc i64 %indvars.iv7.i to i32 + %conv42.i = sitofp i32 %16 to double + br label %for.body41.i + +for.body41.i: ; preds = %for.body41.i, %for.cond38.preheader.i + %indvars.iv.i = phi i64 [ 0, %for.cond38.preheader.i ], [ %indvars.iv.next.i, %for.body41.i ] + %17 = trunc i64 %indvars.iv.i to i32 + %conv43.i = sitofp i32 %17 to double + %mul44.i = fmul double %conv42.i, %conv43.i + %div46.i = fmul double %mul44.i, 9.765625e-04 + %arrayidx50.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay4, i64 %indvars.iv7.i, i64 %indvars.iv.i + store double %div46.i, double* %arrayidx50.i, align 8, !tbaa !1 + %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 + %exitcond.i = icmp eq i64 %indvars.iv.next.i, 1024 + br i1 %exitcond.i, label %for.inc54.i, label %for.body41.i + +for.inc54.i: ; preds = %for.body41.i + %indvars.iv.next8.i = add nuw nsw i64 %indvars.iv7.i, 1 + %exitcond9.i = icmp eq i64 %indvars.iv.next8.i, 1024 + br i1 %exitcond9.i, label %for.cond1.preheader.i42, label %for.cond38.preheader.i + +for.cond1.preheader.i42: ; preds = %for.inc54.i, %for.inc26.i + %indvars.iv7.i41 = phi i64 [ %indvars.iv.next8.i48, %for.inc26.i ], [ 0, %for.inc54.i ] + br label %for.body3.i44 + +for.body3.i44: ; preds = %for.inc23.i, %for.cond1.preheader.i42 + %indvars.iv4.i = phi i64 [ 0, %for.cond1.preheader.i42 ], [ %indvars.iv.next5.i, %for.inc23.i ] + %arrayidx5.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv7.i41, i64 %indvars.iv4.i + %18 = load double, double* %arrayidx5.i, align 8, !tbaa !1 + %mul.i43 = fmul double %18, 2.123000e+03 + store double %mul.i43, double* %arrayidx5.i, align 8, !tbaa !1 + br label %for.body8.i + +for.body8.i: ; preds = %for.body8.i, %for.body3.i44 + %19 = phi double [ %mul.i43, %for.body3.i44 ], [ %add.i, %for.body8.i ] + %indvars.iv.i45 = phi i64 [ 0, %for.body3.i44 ], [ %indvars.iv.next.i46, %for.body8.i ] + %arrayidx12.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay3, i64 %indvars.iv7.i41, i64 %indvars.iv.i45 + %20 = load double, double* %arrayidx12.i, align 8, !tbaa !1 + %mul13.i = fmul double %20, 3.241200e+04 + %arrayidx17.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay4, i64 %indvars.iv.i45, i64 %indvars.iv4.i + %21 = load double, double* %arrayidx17.i, align 8, !tbaa !1 + %mul18.i = fmul double %mul13.i, %21 + %add.i = fadd double %19, %mul18.i + store double %add.i, double* %arrayidx5.i, align 8, !tbaa !1 + %indvars.iv.next.i46 = add nuw nsw i64 %indvars.iv.i45, 1 + %exitcond.i47 = icmp eq i64 %indvars.iv.next.i46, 1024 + br i1 %exitcond.i47, label %for.inc23.i, label %for.body8.i + +for.inc23.i: ; preds = %for.body8.i + %indvars.iv.next5.i = add nuw nsw i64 %indvars.iv4.i, 1 + %exitcond6.i = icmp eq i64 %indvars.iv.next5.i, 1024 + br i1 %exitcond6.i, label %for.inc26.i, label %for.body3.i44 + +for.inc26.i: ; preds = %for.inc23.i + %indvars.iv.next8.i48 = add nuw nsw i64 %indvars.iv7.i41, 1 + %exitcond9.i49 = icmp eq i64 %indvars.iv.next8.i48, 1024 + br i1 %exitcond9.i49, label %kernel_gemm.exit, label %for.cond1.preheader.i42 + +kernel_gemm.exit: ; preds = %for.inc26.i + %call.i = call noalias i8* @malloc(i64 16385) #6 + %arrayidx.i = getelementptr inbounds i8, i8* %call.i, i64 16384 + store i8 0, i8* %arrayidx.i, align 1, !tbaa !7 + br label %for.cond3.preheader.i + +for.cond3.preheader.i: ; preds = %for.end.i, %kernel_gemm.exit + %indvars.iv4.i50 = phi i64 [ 0, %kernel_gemm.exit ], [ %indvars.iv.next5.i54, %for.end.i ] + br label %for.body6.i + +for.body6.i: ; preds = %for.body6.i, %for.cond3.preheader.i + %indvars.iv.i51 = phi i64 [ 0, %for.cond3.preheader.i ], [ %indvars.iv.next.i52, %for.body6.i ] + %arrayidx10.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv4.i50, i64 %indvars.iv.i51 + %22 = bitcast double* %arrayidx10.i to i64* + %23 = load i64, i64* %22, align 8, !tbaa !1 + %24 = shl nsw i64 %indvars.iv.i51, 4 + %block.sroa.0.0.extract.trunc138.i.i = trunc i64 %23 to i8 + %and.i.i = and i8 %block.sroa.0.0.extract.trunc138.i.i, 15 + %add.i.i = or i8 %and.i.i, 48 + %add.ptr.i.i = getelementptr inbounds i8, i8* %call.i, i64 %24 + store i8 %add.i.i, i8* %add.ptr.i.i, align 1, !tbaa !7 + %add.ptr10.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 1 + store i8 %add.i.i, i8* %add.ptr10.i.i, align 1, !tbaa !7 + %block.sroa.0.1.extract.shift.i.i = lshr i64 %23, 8 + %conv13195.i.i = trunc i64 %block.sroa.0.1.extract.shift.i.i to i8 + %and14.i.i = and i8 %conv13195.i.i, 15 + %add15.i.i = or i8 %and14.i.i, 48 + %add.ptr19.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 2 + store i8 %add15.i.i, i8* %add.ptr19.i.i, align 1, !tbaa !7 + %add.ptr28.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 3 + store i8 %add15.i.i, i8* %add.ptr28.i.i, align 1, !tbaa !7 + %block.sroa.0.2.extract.shift.i.i = lshr i64 %23, 16 + %conv31201.i.i = trunc i64 %block.sroa.0.2.extract.shift.i.i to i8 + %and32.i.i = and i8 %conv31201.i.i, 15 + %add33.i.i = or i8 %and32.i.i, 48 + %add.ptr37.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 4 + store i8 %add33.i.i, i8* %add.ptr37.i.i, align 1, !tbaa !7 + %add.ptr46.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 5 + store i8 %add33.i.i, i8* %add.ptr46.i.i, align 1, !tbaa !7 + %.tr.i.i = trunc i64 %23 to i32 + %sext204207.i.i = lshr i32 %.tr.i.i, 24 + %and50.i.i = and i32 %sext204207.i.i, 15 + %add51.i.i = or i32 %and50.i.i, 48 + %conv52.i.i = trunc i32 %add51.i.i to i8 + %add.ptr55.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 6 + store i8 %conv52.i.i, i8* %add.ptr55.i.i, align 1, !tbaa !7 + %add.ptr64.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 7 + store i8 %conv52.i.i, i8* %add.ptr64.i.i, align 1, !tbaa !7 + %block.sroa.0.4.extract.shift.i.i = lshr i64 %23, 32 + %conv67211.i.i = trunc i64 %block.sroa.0.4.extract.shift.i.i to i8 + %and68.i.i = and i8 %conv67211.i.i, 15 + %add69.i.i = or i8 %and68.i.i, 48 + %add.ptr73.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 8 + store i8 %add69.i.i, i8* %add.ptr73.i.i, align 1, !tbaa !7 + %add.ptr82.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 9 + store i8 %add69.i.i, i8* %add.ptr82.i.i, align 1, !tbaa !7 + %block.sroa.0.5.extract.shift.i.i = lshr i64 %23, 40 + %conv85217.i.i = trunc i64 %block.sroa.0.5.extract.shift.i.i to i8 + %and86.i.i = and i8 %conv85217.i.i, 15 + %add87.i.i = or i8 %and86.i.i, 48 + %add.ptr91.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 10 + store i8 %add87.i.i, i8* %add.ptr91.i.i, align 1, !tbaa !7 + %add.ptr100.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 11 + store i8 %add87.i.i, i8* %add.ptr100.i.i, align 1, !tbaa !7 + %block.sroa.0.6.extract.shift.i.i = lshr i64 %23, 48 + %conv103223.i.i = trunc i64 %block.sroa.0.6.extract.shift.i.i to i8 + %and104.i.i = and i8 %conv103223.i.i, 15 + %add105.i.i = or i8 %and104.i.i, 48 + %add.ptr109.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 12 + store i8 %add105.i.i, i8* %add.ptr109.i.i, align 1, !tbaa !7 + %add.ptr118.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 13 + store i8 %add105.i.i, i8* %add.ptr118.i.i, align 1, !tbaa !7 + %block.sroa.0.7.extract.shift.i.i = lshr i64 %23, 56 + %conv121229.i.i = trunc i64 %block.sroa.0.7.extract.shift.i.i to i8 + %and122.i.i = and i8 %conv121229.i.i, 15 + %add123.i.i = or i8 %and122.i.i, 48 + %add.ptr127.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 14 + store i8 %add123.i.i, i8* %add.ptr127.i.i, align 1, !tbaa !7 + %add.ptr136.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 15 + store i8 %add123.i.i, i8* %add.ptr136.i.i, align 1, !tbaa !7 + %indvars.iv.next.i52 = add nuw nsw i64 %indvars.iv.i51, 1 + %exitcond.i53 = icmp eq i64 %indvars.iv.next.i52, 1024 + br i1 %exitcond.i53, label %for.end.i, label %for.body6.i + +for.end.i: ; preds = %for.body6.i + %25 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 + %call12.i = call i32 @fputs(i8* nonnull %call.i, %struct._IO_FILE* %25) #7 + %indvars.iv.next5.i54 = add nuw nsw i64 %indvars.iv4.i50, 1 + %exitcond6.i55 = icmp eq i64 %indvars.iv.next5.i54, 1024 + br i1 %exitcond6.i55, label %print_array.exit, label %for.cond3.preheader.i + +print_array.exit: ; preds = %for.end.i + call void @free(i8* nonnull %call.i) #6 + call void @free(i8* nonnull %1) #6 + call void @free(i8* %5) #6 + call void @free(i8* %9) #6 + ret i32 0 +} + +; Function Attrs: nounwind +declare i32 @posix_memalign(i8**, i64, i64) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) local_unnamed_addr #2 + +; Function Attrs: noreturn nounwind +declare void @exit(i32) local_unnamed_addr #5 + +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #6 + +attributes #0 = { norecurse nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { noreturn nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { nounwind } +attributes #7 = { cold nounwind } +attributes #8 = { noreturn nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 278052) (llvm/trunk 278053)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"double", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !3, i64 0} +!7 = !{!3, !3, i64 0} + + +; CHECK: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_flush_cache': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_flush_cache': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_prepare_instruments': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_prepare_instruments': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_timer_start': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_timer_start': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_timer_stop': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_timer_stop': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_timer_print': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_timer_print': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_alloc_data': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_alloc_data': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body8.i => for.inc23.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body8.i => for.inc23.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body3.i44 => for.inc26.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body3.i44 => for.inc26.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond1.preheader.i42 => kernel_gemm.exit' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond1.preheader.i42 => kernel_gemm.exit' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body41.i => for.inc54.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body41.i => for.inc54.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond38.preheader.i => for.cond1.preheader.i42' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond38.preheader.i => for.cond1.preheader.i42' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body18.i => for.inc31.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body18.i => for.inc31.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond15.preheader.i => for.cond38.preheader.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond15.preheader.i => for.cond38.preheader.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body3.i => for.inc8.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body3.i => for.inc8.i' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond1.preheader.i => for.cond15.preheader.i.preheader' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond1.preheader.i => for.cond15.preheader.i.preheader' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond1.preheader.i => kernel_gemm.exit' in function 'main': +; CHECK-NEXT: Original Zone { +; CHECK-NEXT: Lifetime: { [MemRef_9[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 1; [MemRef_9[i0, i1] -> [2, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 < i0; [MemRef_9[i0, i1] -> [2, i0, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 <= 0; [MemRef_9[i0, i1] -> [2, i0, 1, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 < i1; [MemRef_9[i0, i1] -> [2, i0, 1, i1, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_9[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 3; [MemRef_9[i0, i1] -> [2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 > i0; [MemRef_9[i0, i1] -> [2, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 >= 2; [MemRef_9[i0, i1] -> [2, i0, 1, i5, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 > i1; [MemRef_9[i0, i1] -> [2, i0, 1, i1, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0; [MemRef_1[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 < i2 <= 2; [MemRef_1[i0, i1] -> [3, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 < i0; [MemRef_1[i0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 > i0; [MemRef_1[i0, i1] -> [3, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 < i1; [MemRef_1[i0, i1] -> [0, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 >= 2; [MemRef_1[i0, i1] -> [0, i0, 1, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 > i1; [MemRef_1[i0, i1] -> [3, i0, i1, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 < 0; [MemRef_1[i0, i1] -> [0, i0, 1, i1, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0; [MemRef_1[i0, i1] -> [3, i0, i1, 0, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_1[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 4; [MemRef_1[i0, i1] -> [3, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 > i0; [MemRef_1[i0, i1] -> [3, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 > i1; [MemRef_1[i0, i1] -> [3, i0, i1, i5, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 >= 2; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 >= 1024; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -1 + i6] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 < i6 <= 1023; [MemRef_1[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 < 0; [MemRef_1[i0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 < i0; [MemRef_1[i0, i1] -> [0, i0, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 <= 0; [MemRef_1[i0, i1] -> [0, i0, 1, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 < i1; [MemRef_1[i0, i1] -> [0, i0, 1, i1, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_5[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 2; [MemRef_5[i0, i1] -> [1, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 > i0; [MemRef_5[i0, i1] -> [1, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 >= 2; [MemRef_5[i0, i1] -> [1, i0, 1, i5, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 > i1; [MemRef_5[i0, i1] -> [1, i0, 1, i1, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_1[i0, i1] -> [3, i0, i1, 0, i6{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0; [MemRef_5[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 0; [MemRef_5[i0, i1] -> [1, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 < i0; [MemRef_5[i0, i1] -> [1, i0, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 <= 0; [MemRef_5[i0, i1] -> [1, i0, 1, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 < i1; [MemRef_5[i0, i1] -> [1, i0, 1, i1, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0 } +; CHECK-NEXT: Written: { [MemRef_5[i0, i1] -> [1, i0, 1, i1, 0{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, i6] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i6 <= 1023; [MemRef_1[i0, i1] -> [3, i0, i1, 0, 0{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_9[i0, i1] -> [2, i0, 1, i1, 0{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [0, i0, 1, i1, 0{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 } +; CHECK-NEXT: } +; CHECK-NEXT: Mapped 0x2951663ca20: +; CHECK-NEXT: Accesses: 3 +; CHECK-NEXT: Target: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_1[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 } +; CHECK-NEXT: Lifetime: { Stmt_for_body8_i[i0, i1, 0] -> [3, i0, i1, 1, o4] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and o4 <= 0; Stmt_for_body8_i[i0, i1, i2] -> [3, i0, i1, 1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 < i2 <= 1023; Stmt_for_body8_i[i0, i1, 0] -> [3, i0, i1, 0, o4] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and o4 > 0 } +; CHECK-NEXT: Zone { +; CHECK-NEXT: Lifetime: { [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -1 + i6] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 < i6 <= 1023; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_1[i0, i1] -> [3, i0, i1, 0, i6{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0 } +; CHECK-NEXT: Written: { [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, i6] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i6 <= 1023; [MemRef_1[i0, i1] -> [3, i0, i1, 0, 0{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 } +; CHECK-NEXT: } +; CHECK-NEXT: After Zone { +; CHECK-NEXT: Lifetime: { [MemRef_9[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 1; [MemRef_9[i0, i1] -> [2, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 < i0; [MemRef_9[i0, i1] -> [2, i0, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 <= 0; [MemRef_9[i0, i1] -> [2, i0, 1, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 < i1; [MemRef_9[i0, i1] -> [2, i0, 1, i1, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_9[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 3; [MemRef_9[i0, i1] -> [2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 > i0; [MemRef_9[i0, i1] -> [2, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 >= 2; [MemRef_9[i0, i1] -> [2, i0, 1, i5, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 > i1; [MemRef_9[i0, i1] -> [2, i0, 1, i1, i6{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0; [MemRef_1[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 < i2 <= 2; [MemRef_1[i0, i1] -> [3, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 < i0; [MemRef_1[i0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 > i0; [MemRef_1[i0, i1] -> [3, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 < i1; [MemRef_1[i0, i1] -> [0, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 >= 2; [MemRef_1[i0, i1] -> [0, i0, 1, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 > i1; [MemRef_1[i0, i1] -> [3, i0, i1, i5, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 < 0; [MemRef_1[i0, i1] -> [0, i0, 1, i1, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0; [MemRef_1[i0, i1] -> [3, i0, i1, 0, i6{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_1[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 4; [MemRef_1[i0, i1] -> [3, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 > i0; [MemRef_1[i0, i1] -> [3, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 > i1; [MemRef_1[i0, i1] -> [3, i0, i1, i5, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 >= 2; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 >= 1024; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -1 + i6] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 < i6 <= 1023; [MemRef_1[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 < 0; [MemRef_1[i0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 < i0; [MemRef_1[i0, i1] -> [0, i0, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 <= 0; [MemRef_1[i0, i1] -> [0, i0, 1, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 < i1; [MemRef_1[i0, i1] -> [0, i0, 1, i1, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_5[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 2; [MemRef_5[i0, i1] -> [1, i3, i4, i5, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 > i0; [MemRef_5[i0, i1] -> [1, i0, i4, i5, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 >= 2; [MemRef_5[i0, i1] -> [1, i0, 1, i5, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 > i1; [MemRef_5[i0, i1] -> [1, i0, 1, i1, i6{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0; [MemRef_1[i0, i1] -> [3, i0, i1, 0, i6{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 > 0; [MemRef_5[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 0; [MemRef_5[i0, i1] -> [1, i3, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i3 < i0; [MemRef_5[i0, i1] -> [1, i0, i4, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i4 <= 0; [MemRef_5[i0, i1] -> [1, i0, 1, i5, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i5 < i1; [MemRef_5[i0, i1] -> [1, i0, 1, i1, i6{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i6 <= 0 } +; CHECK-NEXT: Written: { [MemRef_5[i0, i1] -> [1, i0, 1, i1, 0{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [3, i0, i1, 1, i6{{\]\]}} -> [Stmt_for_body8_i[i0, i1, i6] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i6 <= 1023; [MemRef_1[i0, i1] -> [3, i0, i1, 0, 0{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_9[i0, i1] -> [2, i0, 1, i1, 0{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [0, i0, 1, i1, 0{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 } +; CHECK-NEXT: } +; CHECK-NEXT: After Statements { +; CHECK-NEXT: Stmt_for_cond1_preheader_i +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_cond1_preheader_i[i0] -> MemRef_conv_i[] }; +; CHECK-NEXT: Stmt_for_body3_i +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body3_i[i0, i1] -> MemRef_conv_i[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3_i[i0, i1] -> MemRef_1[i0, i1] }; +; CHECK-NEXT: Stmt_for_cond15_preheader_i +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_cond15_preheader_i[i0] -> MemRef_conv19_i[] }; +; CHECK-NEXT: Stmt_for_body18_i +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body18_i[i0, i1] -> MemRef_conv19_i[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body18_i[i0, i1] -> MemRef_5[i0, i1] }; +; CHECK-NEXT: Stmt_for_cond38_preheader_i +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_cond38_preheader_i[i0] -> MemRef_conv42_i[] }; +; CHECK-NEXT: Stmt_for_body41_i +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body41_i[i0, i1] -> MemRef_conv42_i[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body41_i[i0, i1] -> MemRef_9[i0, i1] }; +; CHECK-NEXT: Stmt_for_body3_i44 +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3_i44[i0, i1] -> MemRef_1[i0, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3_i44[i0, i1] -> MemRef_1[i0, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body3_i44[i0, i1] -> MemRef_19__phi[] }; +; CHECK-NEXT: new: { Stmt_for_body3_i44[i0, i1] -> MemRef_1[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }; +; CHECK-NEXT: Stmt_for_body8_i +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_19__phi[] }; +; CHECK-NEXT: new: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_1[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_19__phi[] }; +; CHECK-NEXT: new: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_1[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_5[i0, i2] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_9[i2, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_1[i0, i1] }; +; CHECK-NEXT: } +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond1.preheader.i => kernel_gemm.exit' in function 'main': +; CHECK-NEXT: :: isl ast :: main :: %for.cond1.preheader.i---%kernel_gemm.exit +; +; CHECK: if (1 && (&MemRef_9[1023][1024] <= &MemRef_5[0][0] || &MemRef_5[1023][1024] <= &MemRef_9[0][0]) && (&MemRef_1[1023][1024] <= &MemRef_5[0][0] || &MemRef_5[1023][1024] <= &MemRef_1[0][0]) && (&MemRef_1[1023][1024] <= &MemRef_9[0][0] || &MemRef_9[1023][1024] <= &MemRef_1[0][0])) +; +; CHECK: { +; CHECK-NEXT: for (int c0 = 0; c0 <= 1023; c0 += 1) { +; CHECK-NEXT: Stmt_for_cond1_preheader_i(c0); +; CHECK-NEXT: for (int c1 = 0; c1 <= 1023; c1 += 1) +; CHECK-NEXT: Stmt_for_body3_i(c0, c1); +; CHECK-NEXT: } +; CHECK-NEXT: for (int c0 = 0; c0 <= 1023; c0 += 1) { +; CHECK-NEXT: Stmt_for_cond15_preheader_i(c0); +; CHECK-NEXT: for (int c1 = 0; c1 <= 1023; c1 += 1) +; CHECK-NEXT: Stmt_for_body18_i(c0, c1); +; CHECK-NEXT: } +; CHECK-NEXT: for (int c0 = 0; c0 <= 1023; c0 += 1) { +; CHECK-NEXT: Stmt_for_cond38_preheader_i(c0); +; CHECK-NEXT: for (int c1 = 0; c1 <= 1023; c1 += 1) +; CHECK-NEXT: Stmt_for_body41_i(c0, c1); +; CHECK-NEXT: } +; CHECK-NEXT: for (int c0 = 0; c0 <= 1023; c0 += 1) +; CHECK-NEXT: for (int c1 = 0; c1 <= 1023; c1 += 1) { +; CHECK-NEXT: Stmt_for_body3_i44(c0, c1); +; CHECK-NEXT: for (int c2 = 0; c2 <= 1023; c2 += 1) +; CHECK-NEXT: Stmt_for_body8_i(c0, c1, c2); +; CHECK-NEXT: } +; CHECK-NEXT: } +; +; CHECK: else +; CHECK-NEXT: { /* original code */ } +; +; CHECK: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body6.i => for.end.i' in function 'main': +; CHECK-NEXT: Original Zone { +; CHECK-NEXT: Lifetime: [p_0] -> { [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add123_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -15 + i0 <= 16o0 <= -14 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -1 + i0 <= 16o0 <= i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add15_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -3 + i0 <= 16o0 <= -2 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__conv52_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -7 + i0 <= 16o0 <= -6 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add33_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -5 + i0 <= 16o0 <= -4 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add105_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -13 + i0 <= 16o0 <= -12 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add69_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -9 + i0 <= 16o0 <= -8 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add87_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -11 + i0 <= 16o0 <= -10 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> Undef[] : -14 <= i0 <= 16383 and 16i1 <= 14 + i0 and ((16*floor((2 + i0)/16) = 2 + i0 and 14 <= i0 <= 16382 and 16i1 <= -14 + i0) or (-2 <= i0 <= 16381 and 16i1 <= 2 + i0 and 16*floor((2 + i0)/16) <= -12 + i0) or (-5 <= i0 <= 16378 and 16i1 <= 5 + i0 and 16*floor((5 + i0)/16) <= -9 + i0) or (-8 <= i0 <= 16375 and 16i1 <= 8 + i0 and 16*floor((8 + i0)/16) <= -6 + i0) or (-11 <= i0 <= 16372 and 16i1 <= 11 + i0 and 16*floor((11 + i0)/16) <= -3 + i0) or (i0 <= 16369 and 16*floor((14 + i0)/16) <= i0) or (16*floor((14 + i0)/16) = 14 + i0 and 2 <= i0 <= 16370 and 16i1 <= -2 + i0) or (16*floor((11 + i0)/16) = 11 + i0 and 5 <= i0 <= 16373 and 16i1 <= -5 + i0) or (16*floor((8 + i0)/16) = 8 + i0 and 8 <= i0 <= 16376 and 16i1 <= -8 + i0) or (16*floor((5 + i0)/16) = 5 + i0 and 11 <= i0 <= 16379 and 16i1 <= -11 + i0) or (16*floor((-15 + i0)/16) = -15 + i0 and i0 >= 15 and 16i1 <= -15 + i0)) } +; CHECK-NEXT: Written: [p_0] -> { [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add123_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -15 + i0 <= 16i1 <= -14 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__conv52_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -7 + i0 <= 16i1 <= -6 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -1 + i0 <= 16i1 <= i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add33_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -5 + i0 <= 16i1 <= -4 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add69_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -9 + i0 <= 16i1 <= -8 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add105_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -13 + i0 <= 16i1 <= -12 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add15_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -3 + i0 <= 16i1 <= -2 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add87_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -11 + i0 <= 16i1 <= -10 + i0 } +; CHECK-NEXT: } +; CHECK-NEXT: After Zone { +; CHECK-NEXT: Lifetime: [p_0] -> { [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add123_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -15 + i0 <= 16o0 <= -14 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -1 + i0 <= 16o0 <= i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add15_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -3 + i0 <= 16o0 <= -2 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__conv52_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -7 + i0 <= 16o0 <= -6 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add33_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -5 + i0 <= 16o0 <= -4 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add105_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -13 + i0 <= 16o0 <= -12 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add69_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -9 + i0 <= 16o0 <= -8 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add87_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -11 + i0 <= 16o0 <= -10 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> Undef[] : -14 <= i0 <= 16383 and 16i1 <= 14 + i0 and ((16*floor((2 + i0)/16) = 2 + i0 and 14 <= i0 <= 16382 and 16i1 <= -14 + i0) or (-2 <= i0 <= 16381 and 16i1 <= 2 + i0 and 16*floor((2 + i0)/16) <= -12 + i0) or (-5 <= i0 <= 16378 and 16i1 <= 5 + i0 and 16*floor((5 + i0)/16) <= -9 + i0) or (-8 <= i0 <= 16375 and 16i1 <= 8 + i0 and 16*floor((8 + i0)/16) <= -6 + i0) or (-11 <= i0 <= 16372 and 16i1 <= 11 + i0 and 16*floor((11 + i0)/16) <= -3 + i0) or (i0 <= 16369 and 16*floor((14 + i0)/16) <= i0) or (16*floor((14 + i0)/16) = 14 + i0 and 2 <= i0 <= 16370 and 16i1 <= -2 + i0) or (16*floor((11 + i0)/16) = 11 + i0 and 5 <= i0 <= 16373 and 16i1 <= -5 + i0) or (16*floor((8 + i0)/16) = 8 + i0 and 8 <= i0 <= 16376 and 16i1 <= -8 + i0) or (16*floor((5 + i0)/16) = 5 + i0 and 11 <= i0 <= 16379 and 16i1 <= -11 + i0) or (16*floor((-15 + i0)/16) = -15 + i0 and i0 >= 15 and 16i1 <= -15 + i0)) } +; CHECK-NEXT: Written: [p_0] -> { [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add123_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -15 + i0 <= 16i1 <= -14 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__conv52_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -7 + i0 <= 16i1 <= -6 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -1 + i0 <= 16i1 <= i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add33_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -5 + i0 <= 16i1 <= -4 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add69_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -9 + i0 <= 16i1 <= -8 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add105_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -13 + i0 <= 16i1 <= -12 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add15_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -3 + i0 <= 16i1 <= -2 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add87_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -11 + i0 <= 16i1 <= -10 + i0 } +; CHECK-NEXT: } +; CHECK-NEXT: After Statements { +; CHECK-NEXT: Stmt_for_body6_i +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_1[p_0, i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[1 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[2 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[3 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[4 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[5 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[6 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[7 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[8 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[9 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[10 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[11 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[12 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[13 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[14 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[15 + 16i0] }; +; CHECK-NEXT: } +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body6.i => for.end.i' in function 'main': +; CHECK-NEXT: :: isl ast :: main :: %for.body6.i---%for.end.i +; +; CHECK: if (1) +; +; CHECK: for (int c0 = 0; c0 <= 1023; c0 += 1) +; CHECK-NEXT: Stmt_for_body6_i(c0); +; +; CHECK: else +; CHECK-NEXT: { /* original code */ } +; +; CHECK: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond3.preheader.i => print_array.exit' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond3.preheader.i => print_array.exit' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'main': +; CHECK-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'main': + +; FLAT: Printing analysis 'Polly - Flatten schedule' for region: 'entry => ' in function 'polybench_flush_cache': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_flush_cache': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_flush_cache': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'entry => ' in function 'polybench_prepare_instruments': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_prepare_instruments': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_prepare_instruments': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'entry => ' in function 'polybench_timer_start': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_timer_start': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_timer_start': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'entry => ' in function 'polybench_timer_stop': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_timer_stop': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_timer_stop': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'entry => ' in function 'polybench_timer_print': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_timer_print': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_timer_print': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'entry => ' in function 'polybench_alloc_data': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'polybench_alloc_data': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'polybench_alloc_data': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.body8.i => for.inc23.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body8.i => for.inc23.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body8.i => for.inc23.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.body3.i44 => for.inc26.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body3.i44 => for.inc26.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body3.i44 => for.inc26.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.cond1.preheader.i42 => kernel_gemm.exit' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond1.preheader.i42 => kernel_gemm.exit' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond1.preheader.i42 => kernel_gemm.exit' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.body41.i => for.inc54.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body41.i => for.inc54.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body41.i => for.inc54.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.cond38.preheader.i => for.cond1.preheader.i42' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond38.preheader.i => for.cond1.preheader.i42' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond38.preheader.i => for.cond1.preheader.i42' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.body18.i => for.inc31.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body18.i => for.inc31.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body18.i => for.inc31.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.cond15.preheader.i => for.cond38.preheader.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond15.preheader.i => for.cond38.preheader.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond15.preheader.i => for.cond38.preheader.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.body3.i => for.inc8.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body3.i => for.inc8.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body3.i => for.inc8.i' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.cond1.preheader.i => for.cond15.preheader.i.preheader' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond1.preheader.i => for.cond15.preheader.i.preheader' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond1.preheader.i => for.cond15.preheader.i.preheader' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'for.cond1.preheader.i => kernel_gemm.exit' in function 'main': +; FLAT-NEXT: Schedule before flattening { +; FLAT-NEXT: { Stmt_for_cond15_preheader_i[i0] -> [1, i0, 0, 0, 0] } +; FLAT-NEXT: { Stmt_for_cond1_preheader_i[i0] -> [0, i0, 0, 0, 0] } +; FLAT-NEXT: { Stmt_for_cond38_preheader_i[i0] -> [2, i0, 0, 0, 0] } +; FLAT-NEXT: { Stmt_for_body3_i44[i0, i1] -> [3, i0, i1, 0, 0] } +; FLAT-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> [3, i0, i1, 1, i2] } +; FLAT-NEXT: { Stmt_for_body41_i[i0, i1] -> [2, i0, 1, i1, 0] } +; FLAT-NEXT: { Stmt_for_body3_i[i0, i1] -> [0, i0, 1, i1, 0] } +; FLAT-NEXT: { Stmt_for_body18_i[i0, i1] -> [1, i0, 1, i1, 0] } +; FLAT-NEXT: } +; +; FLAT: Schedule after flattening { +; FLAT-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> [3148801 + 1049600i0 + 1025i1 + i2] } +; FLAT-NEXT: { Stmt_for_body3_i44[i0, i1] -> [3148800 + 1049600i0 + 1025i1] } +; FLAT-NEXT: { Stmt_for_cond15_preheader_i[i0] -> [1049600 + 1025i0] } +; FLAT-NEXT: { Stmt_for_body3_i[i0, i1] -> [1 + 1025i0 + i1] } +; FLAT-NEXT: { Stmt_for_body41_i[i0, i1] -> [2099201 + 1025i0 + i1] } +; FLAT-NEXT: { Stmt_for_body18_i[i0, i1] -> [1049601 + 1025i0 + i1] } +; FLAT-NEXT: { Stmt_for_cond1_preheader_i[i0] -> [1025i0] } +; FLAT-NEXT: { Stmt_for_cond38_preheader_i[i0] -> [2099200 + 1025i0] } +; FLAT-NEXT: } +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond1.preheader.i => kernel_gemm.exit' in function 'main': +; FLAT-NEXT: Original Zone { +; FLAT-NEXT: Lifetime: { [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 2 + 1025i0 + i1 <= i2 <= 3148800 + 1049600i0 + 1025i1; [MemRef_5[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 1049602 + 1025i0 + i1; [MemRef_9[i0, i1] -> [i2{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 2099201 + 1025i0 + i1; [MemRef_1[i0, i1] -> [3148801 + 1049600i0 + 1025i1{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 3149826 + 1049600i0 + 1025i1; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -3148802 - 1049600i0 - 1025i1 + i2] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 3148802 + 1049600i0 + 1025i1 <= i2 <= 3149825 + 1049600i0 + 1025i1; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 1 + 1025i0 + i1; [MemRef_9[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 2099202 + 1025i0 + i1; [MemRef_5[i0, i1] -> [i2{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 1049601 + 1025i0 + i1 } +; FLAT-NEXT: Written: { [MemRef_1[i0, i1] -> [1 + 1025i0 + i1{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [3148800 + 1049600i0 + 1025i1{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_5[i0, i1] -> [1049601 + 1025i0 + i1{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_9[i0, i1] -> [2099201 + 1025i0 + i1{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -3148801 - 1049600i0 - 1025i1 + i2] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 3148801 + 1049600i0 + 1025i1 <= i2 <= 3149824 + 1049600i0 + 1025i1 } +; FLAT-NEXT: } +; FLAT-NEXT: Mapped 0x2e70cd4db60: +; FLAT-NEXT: Accesses: 3 +; FLAT-NEXT: Target: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_1[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 0 and -1 - 1049600i0 - 1025i1 <= i2 <= 1074790398 - 1049600i0 - 1025i1 and i2 <= 1023 } +; FLAT-NEXT: Lifetime: { Stmt_for_body8_i[i0, i1, i2] -> [3148801 + 1049600i0 + 1025i1 + i2] : i0 >= 0 and i1 >= 0 and 0 <= i2 <= 1074789375 - 1049600i0 - 1025i1 and i2 <= 1048575 - 1025i1 and i2 <= 1023; Stmt_for_body8_i[i0, 1023, i2] -> [4197376 + 1049600i0 + i2] : i2 > 0 and -1048574 - 1049600i0 <= i2 <= 1073740800 - 1049600i0 and i2 <= 1023; Stmt_for_body8_i[1023, 1023, i2] -> [1077938176 + i2] : 0 < i2 <= 1023 } +; FLAT-NEXT: Zone { +; FLAT-NEXT: Lifetime: { [MemRef_1[i0, i1] -> [3148801 + 1049600i0 + 1025i1{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and i1 >= 0 and -1024i0 <= i1 <= 1048574 - 1024i0 and i1 <= 1023; [MemRef_1[1023, 1023] -> [1077938176{{\]\]}} -> [Stmt_for_body3_i44[1023, 1023] -> Val_double__mul_i43[{{\]\]}}; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -3148802 - 1049600i0 - 1025i1 + i2] -> Val_double__add_i[{{\]\]}} : exists (e2, e3: 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 3148802 + 1049600i0 + 1025i1 and 3148802 <= i2 <= 1077938176 and i2 <= 4197376 + 1049600i0 and i2 <= 3149825 + 1049600i0 + 1025i1 and 1025*floor((-3148802 - 1049600i0 - 1025i1 + i2)/1025) >= -3149825 - 1049600i0 - 1025i1 + i2 and 1025*floor((-3148801 - 1049600i0 - 1025i1 + i2)/1025) <= -3148802 - 1049600i0 - 1025i1 + i2 and 0 <= e2 <= 1023 and 1049600e2 >= -4197376 + i2 and 0 <= e3 <= 1023 and -3149824 + i2 - 1049600e2 <= 1025e3 <= -3148801 + i2 - 1049600e2); [MemRef_1[i0, 1023] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, 1023, -4197377 - 1049600i0 + i2] -> Val_double__add_i[{{\]\]}} : i0 >= 0 and 4197377 + 1049600i0 <= i2 <= 1077938176 and i2 <= 4198400 + 1049600i0 and 1025*floor((-5245951 - 1049600i0 + i2)/1025) <= -5245952 - 1049600i0 + i2 and 1049600*floor((-1049600i0 + i2)/1049600) <= -1048577 - 1049600i0 + i2 and 1025*floor((-4197377 - 1049600i0 + i2)/1025) >= -4198400 - 1049600i0 + i2; [MemRef_1[1023, 1023] -> [i2{{\]\]}} -> [Stmt_for_body8_i[1023, 1023, -1077938177 + i2] -> Val_double__add_i[{{\]\]}} : 1077938177 <= i2 <= 1077939199 and 1025*floor((-1077938177 + i2)/1025) >= -1077939200 + i2 } +; FLAT-NEXT: Written: { [MemRef_1[i0, i1] -> [3148800 + 1049600i0 + 1025i1{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1048574 - 1024i0 and i1 <= 1023; [MemRef_1[1023, 1023] -> [1077938175{{\]\]}} -> [Stmt_for_body3_i44[1023, 1023] -> Val_double__mul_i43[{{\]\]}}; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -3148801 - 1049600i0 - 1025i1 + i2] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 3148801 + 1049600i0 + 1025i1 <= i2 <= 4197375 + 1049600i0 and i2 <= 3149824 + 1049600i0 + 1025i1; [MemRef_1[i0, 1023] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, 1023, -4197376 - 1049600i0 + i2] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 4197376 + 1049600i0 <= i2 <= 1077938175 and i2 <= 4198399 + 1049600i0; [MemRef_1[1023, 1023] -> [i2{{\]\]}} -> [Stmt_for_body8_i[1023, 1023, -1077938176 + i2] -> Val_double__add_i[{{\]\]}} : 1077938176 <= i2 <= 1077939199 } +; FLAT-NEXT: } +; FLAT-NEXT: After Zone { +; FLAT-NEXT: Lifetime: { [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 2 + 1025i0 + i1 <= i2 <= 3148800 + 1049600i0 + 1025i1; [MemRef_5[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 1049602 + 1025i0 + i1; [MemRef_9[i0, i1] -> [i2{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 2099201 + 1025i0 + i1; [MemRef_1[i0, i1] -> [3148801 + 1049600i0 + 1025i1{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, i1, 1023] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 3149826 + 1049600i0 + 1025i1; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -3148802 - 1049600i0 - 1025i1 + i2] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 3148802 + 1049600i0 + 1025i1 <= i2 <= 3149825 + 1049600i0 + 1025i1; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 1 + 1025i0 + i1; [MemRef_9[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 2099202 + 1025i0 + i1; [MemRef_5[i0, i1] -> [i2{{\]\]}} -> Undef[] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 <= 1049601 + 1025i0 + i1 } +; FLAT-NEXT: Written: { [MemRef_1[i0, i1] -> [1 + 1025i0 + i1{{\]\]}} -> [Stmt_for_body3_i[i0, i1] -> Val_double__div_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [3148800 + 1049600i0 + 1025i1{{\]\]}} -> [Stmt_for_body3_i44[i0, i1] -> Val_double__mul_i43[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_5[i0, i1] -> [1049601 + 1025i0 + i1{{\]\]}} -> [Stmt_for_body18_i[i0, i1] -> Val_double__div23_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_9[i0, i1] -> [2099201 + 1025i0 + i1{{\]\]}} -> [Stmt_for_body41_i[i0, i1] -> Val_double__div46_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023; [MemRef_1[i0, i1] -> [i2{{\]\]}} -> [Stmt_for_body8_i[i0, i1, -3148801 - 1049600i0 - 1025i1 + i2] -> Val_double__add_i[{{\]\]}} : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 3148801 + 1049600i0 + 1025i1 <= i2 <= 3149824 + 1049600i0 + 1025i1 } +; FLAT-NEXT: } +; FLAT-NEXT: After Statements { +; FLAT-NEXT: Stmt_for_cond1_preheader_i +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_cond1_preheader_i[i0] -> MemRef_conv_i[] }; +; FLAT-NEXT: Stmt_for_body3_i +; FLAT-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_body3_i[i0, i1] -> MemRef_conv_i[] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: { Stmt_for_body3_i[i0, i1] -> MemRef_1[i0, i1] }; +; FLAT-NEXT: Stmt_for_cond15_preheader_i +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_cond15_preheader_i[i0] -> MemRef_conv19_i[] }; +; FLAT-NEXT: Stmt_for_body18_i +; FLAT-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_body18_i[i0, i1] -> MemRef_conv19_i[] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: { Stmt_for_body18_i[i0, i1] -> MemRef_5[i0, i1] }; +; FLAT-NEXT: Stmt_for_cond38_preheader_i +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_cond38_preheader_i[i0] -> MemRef_conv42_i[] }; +; FLAT-NEXT: Stmt_for_body41_i +; FLAT-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_body41_i[i0, i1] -> MemRef_conv42_i[] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: { Stmt_for_body41_i[i0, i1] -> MemRef_9[i0, i1] }; +; FLAT-NEXT: Stmt_for_body3_i44 +; FLAT-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: { Stmt_for_body3_i44[i0, i1] -> MemRef_1[i0, i1] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: { Stmt_for_body3_i44[i0, i1] -> MemRef_1[i0, i1] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_body3_i44[i0, i1] -> MemRef_19__phi[] }; +; FLAT-NEXT: new: { Stmt_for_body3_i44[i0, i1] -> MemRef_1[i0, i1] : i0 >= 0 and 0 <= i1 <= 1048574 - 1024i0 and i1 <= 1023; Stmt_for_body3_i44[1023, 1023] -> MemRef_1[1023, 1023] }; +; FLAT-NEXT: Stmt_for_body8_i +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_19__phi[] }; +; FLAT-NEXT: new: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_1[i0, i1] : 0 <= i0 <= 1023 and i1 >= 0 and 0 <= i2 <= 1048574 - 1025i1 and i2 <= 1023; Stmt_for_body8_i[i0, 1023, i2] -> MemRef_1[i0, 1023] : i0 >= 0 and 0 <= i2 <= 1073740799 - 1049600i0 and i2 <= 1023; Stmt_for_body8_i[1023, 1023, i2] -> MemRef_1[1023, 1023] : 0 <= i2 <= 1023 }; +; FLAT-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; FLAT-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_19__phi[] }; +; FLAT-NEXT: new: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_1[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and i2 >= 0 and -1 - 1049600i0 - 1025i1 <= i2 <= 1074790398 - 1049600i0 - 1025i1 and i2 <= 1023 }; +; FLAT-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_5[i0, i2] }; +; FLAT-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_9[i2, i1] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: { Stmt_for_body8_i[i0, i1, i2] -> MemRef_1[i0, i1] }; +; FLAT-NEXT: } +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond1.preheader.i => kernel_gemm.exit' in function 'main': +; FLAT-NEXT: :: isl ast :: main :: %for.cond1.preheader.i---%kernel_gemm.exit +; +; FLAT: if (1 && (&MemRef_9[1023][1024] <= &MemRef_5[0][0] || &MemRef_5[1023][1024] <= &MemRef_9[0][0]) && (&MemRef_1[1023][1024] <= &MemRef_5[0][0] || &MemRef_5[1023][1024] <= &MemRef_1[0][0]) && (&MemRef_1[1023][1024] <= &MemRef_9[0][0] || &MemRef_9[1023][1024] <= &MemRef_1[0][0])) +; +; FLAT: { +; FLAT-NEXT: for (int c0 = 0; c0 <= 1049599; c0 += 1) { +; FLAT-NEXT: if (c0 % 1025 == 0) { +; FLAT-NEXT: Stmt_for_cond1_preheader_i(c0 / 1025); +; FLAT-NEXT: } else { +; FLAT-NEXT: Stmt_for_body3_i((c0 - 1) / 1025, (c0 + 1024) % 1025); +; FLAT-NEXT: } +; FLAT-NEXT: } +; FLAT-NEXT: for (int c0 = 1049600; c0 <= 2099199; c0 += 1) { +; FLAT-NEXT: if (c0 % 1025 == 0) { +; FLAT-NEXT: Stmt_for_cond15_preheader_i((c0 / 1025) - 1024); +; FLAT-NEXT: } else { +; FLAT-NEXT: Stmt_for_body18_i((c0 - 1) / 1025 - 1024, (c0 - 1) % 1025); +; FLAT-NEXT: } +; FLAT-NEXT: } +; FLAT-NEXT: for (int c0 = 2099200; c0 <= 1077939199; c0 += 1) { +; FLAT-NEXT: if ((c0 >= 3148801 && c0 <= 3149824) || (c0 >= 3149825 && (1024 * c0 + 1024) % 1025 <= 1023)) { +; FLAT-NEXT: Stmt_for_body8_i(c0 - (1049599 * c0 + 1049599) / 1049600 - 3, c0 <= 3149824 || ((1049599 * c0 + 1049599) % 1049600 <= 1049598 && (1049599 * c0 + 1049599) % 1049600 >= 1048576) ? 0 : -1023 * c0 - (1024 * c0 + 1024) / 1025 + 1024 * ((1049599 * c0 + 1049599) / 1049600), -((1024 * c0 + 1024) % 1025) + 1023); +; FLAT-NEXT: } else if (c0 <= 3148799 && c0 % 1025 >= 1) { +; FLAT-NEXT: Stmt_for_body41_i((c0 - 1) / 1025 - 2048, (c0 - 1) % 1025); +; FLAT-NEXT: } else if (c0 <= 3147775 && c0 % 1025 == 0) { +; FLAT-NEXT: Stmt_for_cond38_preheader_i((c0 / 1025) - 2048); +; FLAT-NEXT: } else { +; FLAT-NEXT: Stmt_for_body3_i44(c0 / 1049600 - 3, (c0 / 1025) - 1024 * (c0 / 1049600)); +; FLAT-NEXT: } +; FLAT-NEXT: } +; FLAT-NEXT: } +; +; FLAT: else +; FLAT-NEXT: { /* original code */ } +; +; FLAT: Printing analysis 'Polly - Flatten schedule' for region: 'for.body6.i => for.end.i' in function 'main': +; FLAT-NEXT: Schedule before flattening { +; FLAT-NEXT: { Stmt_for_body6_i[i0] -> [i0] } +; FLAT-NEXT: } +; +; FLAT: Schedule after flattening { +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> [i0] } +; FLAT-NEXT: } +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.body6.i => for.end.i' in function 'main': +; FLAT-NEXT: Original Zone { +; FLAT-NEXT: Lifetime: [p_0] -> { [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add123_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -15 + i0 <= 16o0 <= -14 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -1 + i0 <= 16o0 <= i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add15_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -3 + i0 <= 16o0 <= -2 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__conv52_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -7 + i0 <= 16o0 <= -6 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add33_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -5 + i0 <= 16o0 <= -4 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add105_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -13 + i0 <= 16o0 <= -12 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add69_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -9 + i0 <= 16o0 <= -8 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add87_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -11 + i0 <= 16o0 <= -10 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> Undef[] : -14 <= i0 <= 16383 and 16i1 <= 14 + i0 and ((16*floor((2 + i0)/16) = 2 + i0 and 14 <= i0 <= 16382 and 16i1 <= -14 + i0) or (-2 <= i0 <= 16381 and 16i1 <= 2 + i0 and 16*floor((2 + i0)/16) <= -12 + i0) or (-5 <= i0 <= 16378 and 16i1 <= 5 + i0 and 16*floor((5 + i0)/16) <= -9 + i0) or (-8 <= i0 <= 16375 and 16i1 <= 8 + i0 and 16*floor((8 + i0)/16) <= -6 + i0) or (-11 <= i0 <= 16372 and 16i1 <= 11 + i0 and 16*floor((11 + i0)/16) <= -3 + i0) or (i0 <= 16369 and 16*floor((14 + i0)/16) <= i0) or (16*floor((14 + i0)/16) = 14 + i0 and 2 <= i0 <= 16370 and 16i1 <= -2 + i0) or (16*floor((11 + i0)/16) = 11 + i0 and 5 <= i0 <= 16373 and 16i1 <= -5 + i0) or (16*floor((8 + i0)/16) = 8 + i0 and 8 <= i0 <= 16376 and 16i1 <= -8 + i0) or (16*floor((5 + i0)/16) = 5 + i0 and 11 <= i0 <= 16379 and 16i1 <= -11 + i0) or (16*floor((-15 + i0)/16) = -15 + i0 and i0 >= 15 and 16i1 <= -15 + i0)) } +; FLAT-NEXT: Written: [p_0] -> { [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add123_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -15 + i0 <= 16i1 <= -14 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__conv52_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -7 + i0 <= 16i1 <= -6 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -1 + i0 <= 16i1 <= i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add33_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -5 + i0 <= 16i1 <= -4 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add69_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -9 + i0 <= 16i1 <= -8 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add105_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -13 + i0 <= 16i1 <= -12 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add15_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -3 + i0 <= 16i1 <= -2 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add87_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -11 + i0 <= 16i1 <= -10 + i0 } +; FLAT-NEXT: } +; FLAT-NEXT: After Zone { +; FLAT-NEXT: Lifetime: [p_0] -> { [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add123_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -15 + i0 <= 16o0 <= -14 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -1 + i0 <= 16o0 <= i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add15_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -3 + i0 <= 16o0 <= -2 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__conv52_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -7 + i0 <= 16o0 <= -6 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add33_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -5 + i0 <= 16o0 <= -4 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add105_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -13 + i0 <= 16o0 <= -12 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add69_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -9 + i0 <= 16o0 <= -8 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[o0] -> Val_i8__add87_i_i[{{\]\]}} : 0 <= o0 <= 1023 and o0 < i1 and -11 + i0 <= 16o0 <= -10 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> Undef[] : -14 <= i0 <= 16383 and 16i1 <= 14 + i0 and ((16*floor((2 + i0)/16) = 2 + i0 and 14 <= i0 <= 16382 and 16i1 <= -14 + i0) or (-2 <= i0 <= 16381 and 16i1 <= 2 + i0 and 16*floor((2 + i0)/16) <= -12 + i0) or (-5 <= i0 <= 16378 and 16i1 <= 5 + i0 and 16*floor((5 + i0)/16) <= -9 + i0) or (-8 <= i0 <= 16375 and 16i1 <= 8 + i0 and 16*floor((8 + i0)/16) <= -6 + i0) or (-11 <= i0 <= 16372 and 16i1 <= 11 + i0 and 16*floor((11 + i0)/16) <= -3 + i0) or (i0 <= 16369 and 16*floor((14 + i0)/16) <= i0) or (16*floor((14 + i0)/16) = 14 + i0 and 2 <= i0 <= 16370 and 16i1 <= -2 + i0) or (16*floor((11 + i0)/16) = 11 + i0 and 5 <= i0 <= 16373 and 16i1 <= -5 + i0) or (16*floor((8 + i0)/16) = 8 + i0 and 8 <= i0 <= 16376 and 16i1 <= -8 + i0) or (16*floor((5 + i0)/16) = 5 + i0 and 11 <= i0 <= 16379 and 16i1 <= -11 + i0) or (16*floor((-15 + i0)/16) = -15 + i0 and i0 >= 15 and 16i1 <= -15 + i0)) } +; FLAT-NEXT: Written: [p_0] -> { [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add123_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -15 + i0 <= 16i1 <= -14 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__conv52_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -7 + i0 <= 16i1 <= -6 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -1 + i0 <= 16i1 <= i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add33_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -5 + i0 <= 16i1 <= -4 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add69_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -9 + i0 <= 16i1 <= -8 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add105_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -13 + i0 <= 16i1 <= -12 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add15_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -3 + i0 <= 16i1 <= -2 + i0; [MemRef_call_i[i0] -> [i1{{\]\]}} -> [Stmt_for_body6_i[i1] -> Val_i8__add87_i_i[{{\]\]}} : 0 <= i1 <= 1023 and -11 + i0 <= 16i1 <= -10 + i0 } +; FLAT-NEXT: } +; FLAT-NEXT: After Statements { +; FLAT-NEXT: Stmt_for_body6_i +; FLAT-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_1[p_0, i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[1 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[2 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[3 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[4 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[5 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[6 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[7 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[8 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[9 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[10 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[11 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[12 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[13 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[14 + 16i0] }; +; FLAT-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; FLAT-NEXT: [p_0] -> { Stmt_for_body6_i[i0] -> MemRef_call_i[15 + 16i0] }; +; FLAT-NEXT: } +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.body6.i => for.end.i' in function 'main': +; FLAT-NEXT: :: isl ast :: main :: %for.body6.i---%for.end.i +; +; FLAT: if (1) +; +; FLAT: for (int c0 = 0; c0 <= 1023; c0 += 1) +; FLAT-NEXT: Stmt_for_body6_i(c0); +; +; FLAT: else +; FLAT-NEXT: { /* original code */ } +; +; FLAT: Printing analysis 'Polly - Flatten schedule' for region: 'for.cond3.preheader.i => print_array.exit' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'for.cond3.preheader.i => print_array.exit' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'for.cond3.preheader.i => print_array.exit' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Flatten schedule' for region: 'entry => ' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - DeLICM/DePRE' for region: 'entry => ' in function 'main': +; FLAT-NEXT: Printing analysis 'Polly - Generate an AST from the SCoP (isl)' for region: 'entry => ' in function 'main': + +; PREIR: ; ModuleID = '' +; PREIR-NEXT: source_filename = "/mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/gemm/gemm.c" +; PREIR-NEXT: target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +; PREIR-NEXT: target triple = "x86_64-unknown-linux-gnu" +; +; PREIR: %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +; PREIR-NEXT: %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } +; +; PREIR: @polybench_papi_counters_threadid = local_unnamed_addr global i32 0, align 4 +; PREIR-NEXT: @polybench_program_total_flops = local_unnamed_addr global double 0.000000e+00, align 8 +; PREIR-NEXT: @polybench_t_start = common local_unnamed_addr global double 0.000000e+00, align 8 +; PREIR-NEXT: @polybench_t_end = common local_unnamed_addr global double 0.000000e+00, align 8 +; PREIR-NEXT: @.str = private unnamed_addr constant [7 x i8] c"%0.6f\0A\00", align 1 +; PREIR-NEXT: @polybench_c_start = common local_unnamed_addr global i64 0, align 8 +; PREIR-NEXT: @polybench_c_end = common local_unnamed_addr global i64 0, align 8 +; PREIR-NEXT: @stderr = external local_unnamed_addr global %struct._IO_FILE*, align 8 +; PREIR-NEXT: @.str.1 = private unnamed_addr constant [51 x i8] c"[PolyBench] posix_memalign: cannot allocate memory\00", align 1 +; +; PREIR: ; Function Attrs: norecurse nounwind readnone uwtable +; PREIR-NEXT: define void @polybench_flush_cache() local_unnamed_addr #0 { +; PREIR-NEXT: entry: +; PREIR-NEXT: br label %entry.split +; +; PREIR: entry.split: ; preds = %entry +; PREIR-NEXT: ret void +; PREIR-NEXT: } +; +; PREIR: ; Function Attrs: argmemonly nounwind +; PREIR-NEXT: declare void @llvm.lifetime.start(i64, i8* nocapture) #1 +; +; PREIR: ; Function Attrs: nounwind +; PREIR-NEXT: declare noalias i8* @calloc(i64, i64) local_unnamed_addr #2 +; +; PREIR: ; Function Attrs: nounwind +; PREIR-NEXT: declare void @free(i8* nocapture) local_unnamed_addr #2 +; +; PREIR: ; Function Attrs: argmemonly nounwind +; PREIR-NEXT: declare void @llvm.lifetime.end(i64, i8* nocapture) #1 +; +; PREIR: ; Function Attrs: norecurse nounwind readnone uwtable +; PREIR-NEXT: define void @polybench_prepare_instruments() local_unnamed_addr #0 { +; PREIR-NEXT: entry: +; PREIR-NEXT: br label %entry.split +; +; PREIR: entry.split: ; preds = %entry +; PREIR-NEXT: ret void +; PREIR-NEXT: } +; +; PREIR: ; Function Attrs: norecurse nounwind uwtable +; PREIR-NEXT: define void @polybench_timer_start() local_unnamed_addr #3 { +; PREIR-NEXT: entry: +; PREIR-NEXT: br label %entry.split +; +; PREIR: entry.split: ; preds = %entry +; PREIR-NEXT: store double 0.000000e+00, double* @polybench_t_start, align 8, !tbaa !1 +; PREIR-NEXT: ret void +; PREIR-NEXT: } +; +; PREIR: ; Function Attrs: norecurse nounwind uwtable +; PREIR-NEXT: define void @polybench_timer_stop() local_unnamed_addr #3 { +; PREIR-NEXT: entry: +; PREIR-NEXT: br label %entry.split +; +; PREIR: entry.split: ; preds = %entry +; PREIR-NEXT: store double 0.000000e+00, double* @polybench_t_end, align 8, !tbaa !1 +; PREIR-NEXT: ret void +; PREIR-NEXT: } +; +; PREIR: ; Function Attrs: nounwind uwtable +; PREIR-NEXT: define void @polybench_timer_print() local_unnamed_addr #4 { +; PREIR-NEXT: entry: +; PREIR-NEXT: br label %entry.split +; +; PREIR: entry.split: ; preds = %entry +; PREIR-NEXT: %0 = load double, double* @polybench_t_end, align 8, !tbaa !1 +; PREIR-NEXT: %1 = load double, double* @polybench_t_start, align 8, !tbaa !1 +; PREIR-NEXT: %sub = fsub double %0, %1 +; PREIR-NEXT: %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), double %sub) +; PREIR-NEXT: ret void +; PREIR-NEXT: } +; +; PREIR: ; Function Attrs: nounwind +; PREIR-NEXT: declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #2 +; +; PREIR: ; Function Attrs: nounwind uwtable +; PREIR-NEXT: define i8* @polybench_alloc_data(i64 %n, i32 %elt_size) local_unnamed_addr #4 { +; PREIR-NEXT: entry: +; PREIR-NEXT: %new.i = alloca i8*, align 8 +; PREIR-NEXT: br label %entry.split +; +; PREIR: entry.split: ; preds = %entry +; PREIR-NEXT: %conv = sext i32 %elt_size to i64 +; PREIR-NEXT: %mul = mul i64 %conv, %n +; PREIR-NEXT: %0 = bitcast i8** %new.i to i8* +; PREIR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %0) #7 +; PREIR-NEXT: store i8* null, i8** %new.i, align 8, !tbaa !5 +; PREIR-NEXT: %call.i = call i32 @posix_memalign(i8** nonnull %new.i, i64 32, i64 %mul) #7 +; PREIR-NEXT: %1 = load i8*, i8** %new.i, align 8, !tbaa !5 +; PREIR-NEXT: %tobool.i = icmp eq i8* %1, null +; PREIR-NEXT: %tobool1.i = icmp ne i32 %call.i, 0 +; PREIR-NEXT: %or.cond.i = or i1 %tobool1.i, %tobool.i +; PREIR-NEXT: br i1 %or.cond.i, label %if.then.i, label %xmalloc.exit +; +; PREIR: if.then.i: ; preds = %entry.split +; PREIR-NEXT: %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; PREIR-NEXT: %3 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %2) #9 +; PREIR-NEXT: call void @exit(i32 1) #10 +; PREIR-NEXT: unreachable +; +; PREIR: xmalloc.exit: ; preds = %entry.split +; PREIR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %0) #7 +; PREIR-NEXT: ret i8* %1 +; PREIR-NEXT: } +; +; PREIR: ; Function Attrs: nounwind uwtable +; PREIR-NEXT: define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #5 { +; PREIR-NEXT: entry: +; PREIR-NEXT: %conv42.i.s2a = alloca double +; PREIR-NEXT: %conv19.i.s2a = alloca double +; PREIR-NEXT: %conv.i.s2a = alloca double +; PREIR-NEXT: %new.i.i34 = alloca i8*, align 8 +; PREIR-NEXT: %new.i.i27 = alloca i8*, align 8 +; PREIR-NEXT: %new.i.i = alloca i8*, align 8 +; PREIR-NEXT: br label %entry.split +; +; PREIR: entry.split: ; preds = %entry +; PREIR-NEXT: %0 = bitcast i8** %new.i.i to i8* +; PREIR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %0) #7 +; PREIR-NEXT: store i8* null, i8** %new.i.i, align 8, !tbaa !5 +; PREIR-NEXT: %call.i.i = call i32 @posix_memalign(i8** nonnull %new.i.i, i64 32, i64 8388608) #7 +; PREIR-NEXT: %1 = load i8*, i8** %new.i.i, align 8, !tbaa !5 +; PREIR-NEXT: %tobool.i.i = icmp eq i8* %1, null +; PREIR-NEXT: %tobool1.i.i = icmp ne i32 %call.i.i, 0 +; PREIR-NEXT: %or.cond.i.i = or i1 %tobool1.i.i, %tobool.i.i +; PREIR-NEXT: br i1 %or.cond.i.i, label %if.then.i.i, label %polybench_alloc_data.exit +; +; PREIR: if.then.i.i: ; preds = %entry.split +; PREIR-NEXT: %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; PREIR-NEXT: %3 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %2) #9 +; PREIR-NEXT: call void @exit(i32 1) #10 +; PREIR-NEXT: unreachable +; +; PREIR: polybench_alloc_data.exit: ; preds = %entry.split +; PREIR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %0) #7 +; PREIR-NEXT: %4 = bitcast i8** %new.i.i27 to i8* +; PREIR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %4) #7 +; PREIR-NEXT: store i8* null, i8** %new.i.i27, align 8, !tbaa !5 +; PREIR-NEXT: %call.i.i28 = call i32 @posix_memalign(i8** nonnull %new.i.i27, i64 32, i64 8388608) #7 +; PREIR-NEXT: %5 = load i8*, i8** %new.i.i27, align 8, !tbaa !5 +; PREIR-NEXT: %tobool.i.i29 = icmp eq i8* %5, null +; PREIR-NEXT: %tobool1.i.i30 = icmp ne i32 %call.i.i28, 0 +; PREIR-NEXT: %or.cond.i.i31 = or i1 %tobool1.i.i30, %tobool.i.i29 +; PREIR-NEXT: br i1 %or.cond.i.i31, label %if.then.i.i32, label %polybench_alloc_data.exit33 +; +; PREIR: if.then.i.i32: ; preds = %polybench_alloc_data.exit +; PREIR-NEXT: %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; PREIR-NEXT: %7 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %6) #9 +; PREIR-NEXT: call void @exit(i32 1) #10 +; PREIR-NEXT: unreachable +; +; PREIR: polybench_alloc_data.exit33: ; preds = %polybench_alloc_data.exit +; PREIR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %4) #7 +; PREIR-NEXT: %8 = bitcast i8** %new.i.i34 to i8* +; PREIR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %8) #7 +; PREIR-NEXT: store i8* null, i8** %new.i.i34, align 8, !tbaa !5 +; PREIR-NEXT: %call.i.i35 = call i32 @posix_memalign(i8** nonnull %new.i.i34, i64 32, i64 8388608) #7 +; PREIR-NEXT: %9 = load i8*, i8** %new.i.i34, align 8, !tbaa !5 +; PREIR-NEXT: %tobool.i.i36 = icmp eq i8* %9, null +; PREIR-NEXT: %tobool1.i.i37 = icmp ne i32 %call.i.i35, 0 +; PREIR-NEXT: %or.cond.i.i38 = or i1 %tobool1.i.i37, %tobool.i.i36 +; PREIR-NEXT: br i1 %or.cond.i.i38, label %if.then.i.i39, label %polybench_alloc_data.exit40 +; +; PREIR: if.then.i.i39: ; preds = %polybench_alloc_data.exit33 +; PREIR-NEXT: %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; PREIR-NEXT: %11 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %10) #9 +; PREIR-NEXT: call void @exit(i32 1) #10 +; PREIR-NEXT: unreachable +; +; PREIR: polybench_alloc_data.exit40: ; preds = %polybench_alloc_data.exit33 +; PREIR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %8) #7 +; PREIR-NEXT: %arraydecay = bitcast i8* %1 to [1024 x double]* +; PREIR-NEXT: %arraydecay3 = bitcast i8* %5 to [1024 x double]* +; PREIR-NEXT: br label %polly.split_new_and_old +; +; PREIR: polly.split_new_and_old: ; preds = %polybench_alloc_data.exit40 +; PREIR-NEXT: %polly.access.cast. = bitcast i8* %9 to double* +; PREIR-NEXT: %polly.access.mul. = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit = extractvalue { i64, i1 } %polly.access.mul., 1 +; PREIR-NEXT: %polly.overflow.state = or i1 false, %polly.access.mul..obit +; PREIR-NEXT: %polly.access.mul..res = extractvalue { i64, i1 } %polly.access.mul., 0 +; PREIR-NEXT: %polly.access.add. = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res, i64 1024) +; PREIR-NEXT: %polly.access.add..obit = extractvalue { i64, i1 } %polly.access.add., 1 +; PREIR-NEXT: %polly.overflow.state1 = or i1 %polly.overflow.state, %polly.access.add..obit +; PREIR-NEXT: %polly.access.add..res = extractvalue { i64, i1 } %polly.access.add., 0 +; PREIR-NEXT: %polly.access. = getelementptr double, double* %polly.access.cast., i64 %polly.access.add..res +; PREIR-NEXT: %polly.access.cast.2 = bitcast i8* %5 to double* +; PREIR-NEXT: %polly.access.mul.3 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit4 = extractvalue { i64, i1 } %polly.access.mul.3, 1 +; PREIR-NEXT: %polly.overflow.state5 = or i1 %polly.overflow.state1, %polly.access.mul..obit4 +; PREIR-NEXT: %polly.access.mul..res6 = extractvalue { i64, i1 } %polly.access.mul.3, 0 +; PREIR-NEXT: %polly.access.add.7 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res6, i64 0) +; PREIR-NEXT: %polly.access.add..obit8 = extractvalue { i64, i1 } %polly.access.add.7, 1 +; PREIR-NEXT: %polly.overflow.state9 = or i1 %polly.overflow.state5, %polly.access.add..obit8 +; PREIR-NEXT: %polly.access.add..res10 = extractvalue { i64, i1 } %polly.access.add.7, 0 +; PREIR-NEXT: %polly.access.11 = getelementptr double, double* %polly.access.cast.2, i64 %polly.access.add..res10 +; PREIR-NEXT: %12 = ptrtoint double* %polly.access. to i64 +; PREIR-NEXT: %13 = ptrtoint double* %polly.access.11 to i64 +; PREIR-NEXT: %14 = icmp ule i64 %12, %13 +; PREIR-NEXT: %polly.access.cast.12 = bitcast i8* %5 to double* +; PREIR-NEXT: %polly.access.mul.13 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit14 = extractvalue { i64, i1 } %polly.access.mul.13, 1 +; PREIR-NEXT: %polly.overflow.state15 = or i1 %polly.overflow.state9, %polly.access.mul..obit14 +; PREIR-NEXT: %polly.access.mul..res16 = extractvalue { i64, i1 } %polly.access.mul.13, 0 +; PREIR-NEXT: %polly.access.add.17 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res16, i64 1024) +; PREIR-NEXT: %polly.access.add..obit18 = extractvalue { i64, i1 } %polly.access.add.17, 1 +; PREIR-NEXT: %polly.overflow.state19 = or i1 %polly.overflow.state15, %polly.access.add..obit18 +; PREIR-NEXT: %polly.access.add..res20 = extractvalue { i64, i1 } %polly.access.add.17, 0 +; PREIR-NEXT: %polly.access.21 = getelementptr double, double* %polly.access.cast.12, i64 %polly.access.add..res20 +; PREIR-NEXT: %polly.access.cast.22 = bitcast i8* %9 to double* +; PREIR-NEXT: %polly.access.mul.23 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit24 = extractvalue { i64, i1 } %polly.access.mul.23, 1 +; PREIR-NEXT: %polly.overflow.state25 = or i1 %polly.overflow.state19, %polly.access.mul..obit24 +; PREIR-NEXT: %polly.access.mul..res26 = extractvalue { i64, i1 } %polly.access.mul.23, 0 +; PREIR-NEXT: %polly.access.add.27 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res26, i64 0) +; PREIR-NEXT: %polly.access.add..obit28 = extractvalue { i64, i1 } %polly.access.add.27, 1 +; PREIR-NEXT: %polly.overflow.state29 = or i1 %polly.overflow.state25, %polly.access.add..obit28 +; PREIR-NEXT: %polly.access.add..res30 = extractvalue { i64, i1 } %polly.access.add.27, 0 +; PREIR-NEXT: %polly.access.31 = getelementptr double, double* %polly.access.cast.22, i64 %polly.access.add..res30 +; PREIR-NEXT: %15 = ptrtoint double* %polly.access.21 to i64 +; PREIR-NEXT: %16 = ptrtoint double* %polly.access.31 to i64 +; PREIR-NEXT: %17 = icmp ule i64 %15, %16 +; PREIR-NEXT: %18 = or i1 %14, %17 +; PREIR-NEXT: %19 = and i1 true, %18 +; PREIR-NEXT: %polly.access.cast.32 = bitcast i8* %1 to double* +; PREIR-NEXT: %polly.access.mul.33 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit34 = extractvalue { i64, i1 } %polly.access.mul.33, 1 +; PREIR-NEXT: %polly.overflow.state35 = or i1 %polly.overflow.state29, %polly.access.mul..obit34 +; PREIR-NEXT: %polly.access.mul..res36 = extractvalue { i64, i1 } %polly.access.mul.33, 0 +; PREIR-NEXT: %polly.access.add.37 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res36, i64 1024) +; PREIR-NEXT: %polly.access.add..obit38 = extractvalue { i64, i1 } %polly.access.add.37, 1 +; PREIR-NEXT: %polly.overflow.state39 = or i1 %polly.overflow.state35, %polly.access.add..obit38 +; PREIR-NEXT: %polly.access.add..res40 = extractvalue { i64, i1 } %polly.access.add.37, 0 +; PREIR-NEXT: %polly.access.41 = getelementptr double, double* %polly.access.cast.32, i64 %polly.access.add..res40 +; PREIR-NEXT: %polly.access.cast.42 = bitcast i8* %5 to double* +; PREIR-NEXT: %polly.access.mul.43 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit44 = extractvalue { i64, i1 } %polly.access.mul.43, 1 +; PREIR-NEXT: %polly.overflow.state45 = or i1 %polly.overflow.state39, %polly.access.mul..obit44 +; PREIR-NEXT: %polly.access.mul..res46 = extractvalue { i64, i1 } %polly.access.mul.43, 0 +; PREIR-NEXT: %polly.access.add.47 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res46, i64 0) +; PREIR-NEXT: %polly.access.add..obit48 = extractvalue { i64, i1 } %polly.access.add.47, 1 +; PREIR-NEXT: %polly.overflow.state49 = or i1 %polly.overflow.state45, %polly.access.add..obit48 +; PREIR-NEXT: %polly.access.add..res50 = extractvalue { i64, i1 } %polly.access.add.47, 0 +; PREIR-NEXT: %polly.access.51 = getelementptr double, double* %polly.access.cast.42, i64 %polly.access.add..res50 +; PREIR-NEXT: %20 = ptrtoint double* %polly.access.41 to i64 +; PREIR-NEXT: %21 = ptrtoint double* %polly.access.51 to i64 +; PREIR-NEXT: %22 = icmp ule i64 %20, %21 +; PREIR-NEXT: %polly.access.cast.52 = bitcast i8* %5 to double* +; PREIR-NEXT: %polly.access.mul.53 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit54 = extractvalue { i64, i1 } %polly.access.mul.53, 1 +; PREIR-NEXT: %polly.overflow.state55 = or i1 %polly.overflow.state49, %polly.access.mul..obit54 +; PREIR-NEXT: %polly.access.mul..res56 = extractvalue { i64, i1 } %polly.access.mul.53, 0 +; PREIR-NEXT: %polly.access.add.57 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res56, i64 1024) +; PREIR-NEXT: %polly.access.add..obit58 = extractvalue { i64, i1 } %polly.access.add.57, 1 +; PREIR-NEXT: %polly.overflow.state59 = or i1 %polly.overflow.state55, %polly.access.add..obit58 +; PREIR-NEXT: %polly.access.add..res60 = extractvalue { i64, i1 } %polly.access.add.57, 0 +; PREIR-NEXT: %polly.access.61 = getelementptr double, double* %polly.access.cast.52, i64 %polly.access.add..res60 +; PREIR-NEXT: %polly.access.cast.62 = bitcast i8* %1 to double* +; PREIR-NEXT: %polly.access.mul.63 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit64 = extractvalue { i64, i1 } %polly.access.mul.63, 1 +; PREIR-NEXT: %polly.overflow.state65 = or i1 %polly.overflow.state59, %polly.access.mul..obit64 +; PREIR-NEXT: %polly.access.mul..res66 = extractvalue { i64, i1 } %polly.access.mul.63, 0 +; PREIR-NEXT: %polly.access.add.67 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res66, i64 0) +; PREIR-NEXT: %polly.access.add..obit68 = extractvalue { i64, i1 } %polly.access.add.67, 1 +; PREIR-NEXT: %polly.overflow.state69 = or i1 %polly.overflow.state65, %polly.access.add..obit68 +; PREIR-NEXT: %polly.access.add..res70 = extractvalue { i64, i1 } %polly.access.add.67, 0 +; PREIR-NEXT: %polly.access.71 = getelementptr double, double* %polly.access.cast.62, i64 %polly.access.add..res70 +; PREIR-NEXT: %23 = ptrtoint double* %polly.access.61 to i64 +; PREIR-NEXT: %24 = ptrtoint double* %polly.access.71 to i64 +; PREIR-NEXT: %25 = icmp ule i64 %23, %24 +; PREIR-NEXT: %26 = or i1 %22, %25 +; PREIR-NEXT: %27 = and i1 %19, %26 +; PREIR-NEXT: %polly.access.cast.72 = bitcast i8* %1 to double* +; PREIR-NEXT: %polly.access.mul.73 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit74 = extractvalue { i64, i1 } %polly.access.mul.73, 1 +; PREIR-NEXT: %polly.overflow.state75 = or i1 %polly.overflow.state69, %polly.access.mul..obit74 +; PREIR-NEXT: %polly.access.mul..res76 = extractvalue { i64, i1 } %polly.access.mul.73, 0 +; PREIR-NEXT: %polly.access.add.77 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res76, i64 1024) +; PREIR-NEXT: %polly.access.add..obit78 = extractvalue { i64, i1 } %polly.access.add.77, 1 +; PREIR-NEXT: %polly.overflow.state79 = or i1 %polly.overflow.state75, %polly.access.add..obit78 +; PREIR-NEXT: %polly.access.add..res80 = extractvalue { i64, i1 } %polly.access.add.77, 0 +; PREIR-NEXT: %polly.access.81 = getelementptr double, double* %polly.access.cast.72, i64 %polly.access.add..res80 +; PREIR-NEXT: %polly.access.cast.82 = bitcast i8* %9 to double* +; PREIR-NEXT: %polly.access.mul.83 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit84 = extractvalue { i64, i1 } %polly.access.mul.83, 1 +; PREIR-NEXT: %polly.overflow.state85 = or i1 %polly.overflow.state79, %polly.access.mul..obit84 +; PREIR-NEXT: %polly.access.mul..res86 = extractvalue { i64, i1 } %polly.access.mul.83, 0 +; PREIR-NEXT: %polly.access.add.87 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res86, i64 0) +; PREIR-NEXT: %polly.access.add..obit88 = extractvalue { i64, i1 } %polly.access.add.87, 1 +; PREIR-NEXT: %polly.overflow.state89 = or i1 %polly.overflow.state85, %polly.access.add..obit88 +; PREIR-NEXT: %polly.access.add..res90 = extractvalue { i64, i1 } %polly.access.add.87, 0 +; PREIR-NEXT: %polly.access.91 = getelementptr double, double* %polly.access.cast.82, i64 %polly.access.add..res90 +; PREIR-NEXT: %28 = ptrtoint double* %polly.access.81 to i64 +; PREIR-NEXT: %29 = ptrtoint double* %polly.access.91 to i64 +; PREIR-NEXT: %30 = icmp ule i64 %28, %29 +; PREIR-NEXT: %polly.access.cast.92 = bitcast i8* %9 to double* +; PREIR-NEXT: %polly.access.mul.93 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit94 = extractvalue { i64, i1 } %polly.access.mul.93, 1 +; PREIR-NEXT: %polly.overflow.state95 = or i1 %polly.overflow.state89, %polly.access.mul..obit94 +; PREIR-NEXT: %polly.access.mul..res96 = extractvalue { i64, i1 } %polly.access.mul.93, 0 +; PREIR-NEXT: %polly.access.add.97 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res96, i64 1024) +; PREIR-NEXT: %polly.access.add..obit98 = extractvalue { i64, i1 } %polly.access.add.97, 1 +; PREIR-NEXT: %polly.overflow.state99 = or i1 %polly.overflow.state95, %polly.access.add..obit98 +; PREIR-NEXT: %polly.access.add..res100 = extractvalue { i64, i1 } %polly.access.add.97, 0 +; PREIR-NEXT: %polly.access.101 = getelementptr double, double* %polly.access.cast.92, i64 %polly.access.add..res100 +; PREIR-NEXT: %polly.access.cast.102 = bitcast i8* %1 to double* +; PREIR-NEXT: %polly.access.mul.103 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; PREIR-NEXT: %polly.access.mul..obit104 = extractvalue { i64, i1 } %polly.access.mul.103, 1 +; PREIR-NEXT: %polly.overflow.state105 = or i1 %polly.overflow.state99, %polly.access.mul..obit104 +; PREIR-NEXT: %polly.access.mul..res106 = extractvalue { i64, i1 } %polly.access.mul.103, 0 +; PREIR-NEXT: %polly.access.add.107 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res106, i64 0) +; PREIR-NEXT: %polly.access.add..obit108 = extractvalue { i64, i1 } %polly.access.add.107, 1 +; PREIR-NEXT: %polly.overflow.state109 = or i1 %polly.overflow.state105, %polly.access.add..obit108 +; PREIR-NEXT: %polly.access.add..res110 = extractvalue { i64, i1 } %polly.access.add.107, 0 +; PREIR-NEXT: %polly.access.111 = getelementptr double, double* %polly.access.cast.102, i64 %polly.access.add..res110 +; PREIR-NEXT: %31 = ptrtoint double* %polly.access.101 to i64 +; PREIR-NEXT: %32 = ptrtoint double* %polly.access.111 to i64 +; PREIR-NEXT: %33 = icmp ule i64 %31, %32 +; PREIR-NEXT: %34 = or i1 %30, %33 +; PREIR-NEXT: %35 = and i1 %27, %34 +; PREIR-NEXT: %polly.rtc.overflown = xor i1 %polly.overflow.state109, true +; PREIR-NEXT: %polly.rtc.result = and i1 %35, %polly.rtc.overflown +; PREIR-NEXT: br i1 %polly.rtc.result, label %polly.start, label %for.cond1.preheader.i +; +; PREIR: for.cond1.preheader.i: ; preds = %polly.split_new_and_old, %for.inc8.i +; PREIR-NEXT: %indvars.iv19.i = phi i64 [ %indvars.iv.next20.i, %for.inc8.i ], [ 0, %polly.split_new_and_old ] +; PREIR-NEXT: %36 = trunc i64 %indvars.iv19.i to i32 +; PREIR-NEXT: %conv.i = sitofp i32 %36 to double +; PREIR-NEXT: br label %for.body3.i +; +; PREIR: for.body3.i: ; preds = %for.body3.i, %for.cond1.preheader.i +; PREIR-NEXT: %indvars.iv16.i = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next17.i, %for.body3.i ] +; PREIR-NEXT: %37 = trunc i64 %indvars.iv16.i to i32 +; PREIR-NEXT: %conv4.i = sitofp i32 %37 to double +; PREIR-NEXT: %mul.i = fmul double %conv.i, %conv4.i +; PREIR-NEXT: %div.i = fmul double %mul.i, 9.765625e-04 +; PREIR-NEXT: %arrayidx7.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv19.i, i64 %indvars.iv16.i +; PREIR-NEXT: store double %div.i, double* %arrayidx7.i, align 8, !tbaa !1 +; PREIR-NEXT: %indvars.iv.next17.i = add nuw nsw i64 %indvars.iv16.i, 1 +; PREIR-NEXT: %exitcond18.i = icmp eq i64 %indvars.iv.next17.i, 1024 +; PREIR-NEXT: br i1 %exitcond18.i, label %for.inc8.i, label %for.body3.i +; +; PREIR: for.inc8.i: ; preds = %for.body3.i +; PREIR-NEXT: %indvars.iv.next20.i = add nuw nsw i64 %indvars.iv19.i, 1 +; PREIR-NEXT: %exitcond21.i = icmp eq i64 %indvars.iv.next20.i, 1024 +; PREIR-NEXT: br i1 %exitcond21.i, label %for.cond15.preheader.i.preheader, label %for.cond1.preheader.i +; +; PREIR: for.cond15.preheader.i.preheader: ; preds = %for.inc8.i +; PREIR-NEXT: %arraydecay4 = bitcast i8* %9 to [1024 x double]* +; PREIR-NEXT: br label %for.cond15.preheader.i +; +; PREIR: for.cond15.preheader.i: ; preds = %for.inc31.i, %for.cond15.preheader.i.preheader +; PREIR-NEXT: %indvars.iv13.i = phi i64 [ %indvars.iv.next14.i, %for.inc31.i ], [ 0, %for.cond15.preheader.i.preheader ] +; PREIR-NEXT: %38 = trunc i64 %indvars.iv13.i to i32 +; PREIR-NEXT: %conv19.i = sitofp i32 %38 to double +; PREIR-NEXT: br label %for.body18.i +; +; PREIR: for.body18.i: ; preds = %for.body18.i, %for.cond15.preheader.i +; PREIR-NEXT: %indvars.iv10.i = phi i64 [ 0, %for.cond15.preheader.i ], [ %indvars.iv.next11.i, %for.body18.i ] +; PREIR-NEXT: %39 = trunc i64 %indvars.iv10.i to i32 +; PREIR-NEXT: %conv20.i = sitofp i32 %39 to double +; PREIR-NEXT: %mul21.i = fmul double %conv19.i, %conv20.i +; PREIR-NEXT: %div23.i = fmul double %mul21.i, 9.765625e-04 +; PREIR-NEXT: %arrayidx27.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay3, i64 %indvars.iv13.i, i64 %indvars.iv10.i +; PREIR-NEXT: store double %div23.i, double* %arrayidx27.i, align 8, !tbaa !1 +; PREIR-NEXT: %indvars.iv.next11.i = add nuw nsw i64 %indvars.iv10.i, 1 +; PREIR-NEXT: %exitcond12.i = icmp eq i64 %indvars.iv.next11.i, 1024 +; PREIR-NEXT: br i1 %exitcond12.i, label %for.inc31.i, label %for.body18.i +; +; PREIR: for.inc31.i: ; preds = %for.body18.i +; PREIR-NEXT: %indvars.iv.next14.i = add nuw nsw i64 %indvars.iv13.i, 1 +; PREIR-NEXT: %exitcond15.i = icmp eq i64 %indvars.iv.next14.i, 1024 +; PREIR-NEXT: br i1 %exitcond15.i, label %for.cond38.preheader.i, label %for.cond15.preheader.i +; +; PREIR: for.cond38.preheader.i: ; preds = %for.inc54.i, %for.inc31.i +; PREIR-NEXT: %indvars.iv7.i = phi i64 [ %indvars.iv.next8.i, %for.inc54.i ], [ 0, %for.inc31.i ] +; PREIR-NEXT: %40 = trunc i64 %indvars.iv7.i to i32 +; PREIR-NEXT: %conv42.i = sitofp i32 %40 to double +; PREIR-NEXT: br label %for.body41.i +; +; PREIR: for.body41.i: ; preds = %for.body41.i, %for.cond38.preheader.i +; PREIR-NEXT: %indvars.iv.i = phi i64 [ 0, %for.cond38.preheader.i ], [ %indvars.iv.next.i, %for.body41.i ] +; PREIR-NEXT: %41 = trunc i64 %indvars.iv.i to i32 +; PREIR-NEXT: %conv43.i = sitofp i32 %41 to double +; PREIR-NEXT: %mul44.i = fmul double %conv42.i, %conv43.i +; PREIR-NEXT: %div46.i = fmul double %mul44.i, 9.765625e-04 +; PREIR-NEXT: %arrayidx50.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay4, i64 %indvars.iv7.i, i64 %indvars.iv.i +; PREIR-NEXT: store double %div46.i, double* %arrayidx50.i, align 8, !tbaa !1 +; PREIR-NEXT: %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 +; PREIR-NEXT: %exitcond.i = icmp eq i64 %indvars.iv.next.i, 1024 +; PREIR-NEXT: br i1 %exitcond.i, label %for.inc54.i, label %for.body41.i +; +; PREIR: for.inc54.i: ; preds = %for.body41.i +; PREIR-NEXT: %indvars.iv.next8.i = add nuw nsw i64 %indvars.iv7.i, 1 +; PREIR-NEXT: %exitcond9.i = icmp eq i64 %indvars.iv.next8.i, 1024 +; PREIR-NEXT: br i1 %exitcond9.i, label %for.cond1.preheader.i42, label %for.cond38.preheader.i +; +; PREIR: for.cond1.preheader.i42: ; preds = %for.inc26.i, %for.inc54.i +; PREIR-NEXT: %indvars.iv7.i41 = phi i64 [ %indvars.iv.next8.i48, %for.inc26.i ], [ 0, %for.inc54.i ] +; PREIR-NEXT: br label %for.body3.i44 +; +; PREIR: for.body3.i44: ; preds = %for.inc23.i, %for.cond1.preheader.i42 +; PREIR-NEXT: %indvars.iv4.i = phi i64 [ 0, %for.cond1.preheader.i42 ], [ %indvars.iv.next5.i, %for.inc23.i ] +; PREIR-NEXT: %arrayidx5.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv7.i41, i64 %indvars.iv4.i +; PREIR-NEXT: %42 = load double, double* %arrayidx5.i, align 8, !tbaa !1 +; PREIR-NEXT: %mul.i43 = fmul double %42, 2.123000e+03 +; PREIR-NEXT: store double %mul.i43, double* %arrayidx5.i, align 8, !tbaa !1 +; PREIR-NEXT: br label %for.body8.i +; +; PREIR: for.body8.i: ; preds = %for.body8.i, %for.body3.i44 +; PREIR-NEXT: %43 = phi double [ %mul.i43, %for.body3.i44 ], [ %add.i, %for.body8.i ] +; PREIR-NEXT: %indvars.iv.i45 = phi i64 [ 0, %for.body3.i44 ], [ %indvars.iv.next.i46, %for.body8.i ] +; PREIR-NEXT: %arrayidx12.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay3, i64 %indvars.iv7.i41, i64 %indvars.iv.i45 +; PREIR-NEXT: %44 = load double, double* %arrayidx12.i, align 8, !tbaa !1 +; PREIR-NEXT: %mul13.i = fmul double %44, 3.241200e+04 +; PREIR-NEXT: %arrayidx17.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay4, i64 %indvars.iv.i45, i64 %indvars.iv4.i +; PREIR-NEXT: %45 = load double, double* %arrayidx17.i, align 8, !tbaa !1 +; PREIR-NEXT: %mul18.i = fmul double %mul13.i, %45 +; PREIR-NEXT: %add.i = fadd double %43, %mul18.i +; PREIR-NEXT: store double %add.i, double* %arrayidx5.i, align 8, !tbaa !1 +; PREIR-NEXT: %indvars.iv.next.i46 = add nuw nsw i64 %indvars.iv.i45, 1 +; PREIR-NEXT: %exitcond.i47 = icmp eq i64 %indvars.iv.next.i46, 1024 +; PREIR-NEXT: br i1 %exitcond.i47, label %for.inc23.i, label %for.body8.i +; +; PREIR: for.inc23.i: ; preds = %for.body8.i +; PREIR-NEXT: %indvars.iv.next5.i = add nuw nsw i64 %indvars.iv4.i, 1 +; PREIR-NEXT: %exitcond6.i = icmp eq i64 %indvars.iv.next5.i, 1024 +; PREIR-NEXT: br i1 %exitcond6.i, label %for.inc26.i, label %for.body3.i44 +; +; PREIR: for.inc26.i: ; preds = %for.inc23.i +; PREIR-NEXT: %indvars.iv.next8.i48 = add nuw nsw i64 %indvars.iv7.i41, 1 +; PREIR-NEXT: %exitcond9.i49 = icmp eq i64 %indvars.iv.next8.i48, 1024 +; PREIR-NEXT: br i1 %exitcond9.i49, label %polly.merge_new_and_old, label %for.cond1.preheader.i42 +; +; PREIR: polly.merge_new_and_old: ; preds = %polly.exiting, %for.inc26.i +; PREIR-NEXT: br label %kernel_gemm.exit +; +; PREIR: kernel_gemm.exit: ; preds = %polly.merge_new_and_old +; PREIR-NEXT: %call.i = call noalias i8* @malloc(i64 16385) #7 +; PREIR-NEXT: %arrayidx.i = getelementptr inbounds i8, i8* %call.i, i64 16384 +; PREIR-NEXT: store i8 0, i8* %arrayidx.i, align 1, !tbaa !7 +; PREIR-NEXT: %scevgep209 = getelementptr i8, i8* %call.i, i64 1 +; PREIR-NEXT: %scevgep211 = getelementptr i8, i8* %call.i, i64 2 +; PREIR-NEXT: %scevgep213 = getelementptr i8, i8* %call.i, i64 3 +; PREIR-NEXT: %scevgep215 = getelementptr i8, i8* %call.i, i64 4 +; PREIR-NEXT: %scevgep217 = getelementptr i8, i8* %call.i, i64 5 +; PREIR-NEXT: %scevgep219 = getelementptr i8, i8* %call.i, i64 6 +; PREIR-NEXT: %scevgep221 = getelementptr i8, i8* %call.i, i64 7 +; PREIR-NEXT: %scevgep223 = getelementptr i8, i8* %call.i, i64 8 +; PREIR-NEXT: %scevgep225 = getelementptr i8, i8* %call.i, i64 9 +; PREIR-NEXT: %scevgep227 = getelementptr i8, i8* %call.i, i64 10 +; PREIR-NEXT: %scevgep229 = getelementptr i8, i8* %call.i, i64 11 +; PREIR-NEXT: %scevgep231 = getelementptr i8, i8* %call.i, i64 12 +; PREIR-NEXT: %scevgep233 = getelementptr i8, i8* %call.i, i64 13 +; PREIR-NEXT: %scevgep235 = getelementptr i8, i8* %call.i, i64 14 +; PREIR-NEXT: %scevgep237 = getelementptr i8, i8* %call.i, i64 15 +; PREIR-NEXT: br label %for.cond3.preheader.i +; +; PREIR: for.cond3.preheader.i: ; preds = %for.end.i, %kernel_gemm.exit +; PREIR-NEXT: %indvars.iv4.i50 = phi i64 [ 0, %kernel_gemm.exit ], [ %indvars.iv.next5.i54, %for.end.i ] +; PREIR-NEXT: %46 = shl i64 %indvars.iv4.i50, 0 +; PREIR-NEXT: %47 = add i64 %46, 0 +; PREIR-NEXT: br label %polly.split_new_and_old194 +; +; PREIR: polly.split_new_and_old194: ; preds = %for.cond3.preheader.i +; PREIR-NEXT: br i1 true, label %polly.start196, label %for.body6.i +; +; PREIR: for.body6.i: ; preds = %polly.split_new_and_old194, %for.body6.i +; PREIR-NEXT: %indvars.iv.i51 = phi i64 [ %indvars.iv.next.i52, %for.body6.i ], [ 0, %polly.split_new_and_old194 ] +; PREIR-NEXT: %arrayidx10.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv4.i50, i64 %indvars.iv.i51 +; PREIR-NEXT: %48 = bitcast double* %arrayidx10.i to i64* +; PREIR-NEXT: %49 = load i64, i64* %48, align 8, !tbaa !1 +; PREIR-NEXT: %50 = shl nsw i64 %indvars.iv.i51, 4 +; PREIR-NEXT: %block.sroa.0.0.extract.trunc138.i.i = trunc i64 %49 to i8 +; PREIR-NEXT: %and.i.i = and i8 %block.sroa.0.0.extract.trunc138.i.i, 15 +; PREIR-NEXT: %add.i.i = or i8 %and.i.i, 48 +; PREIR-NEXT: %add.ptr.i.i = getelementptr inbounds i8, i8* %call.i, i64 %50 +; PREIR-NEXT: store i8 %add.i.i, i8* %add.ptr.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %add.ptr10.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 1 +; PREIR-NEXT: store i8 %add.i.i, i8* %add.ptr10.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %block.sroa.0.1.extract.shift.i.i = lshr i64 %49, 8 +; PREIR-NEXT: %conv13195.i.i = trunc i64 %block.sroa.0.1.extract.shift.i.i to i8 +; PREIR-NEXT: %and14.i.i = and i8 %conv13195.i.i, 15 +; PREIR-NEXT: %add15.i.i = or i8 %and14.i.i, 48 +; PREIR-NEXT: %add.ptr19.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 2 +; PREIR-NEXT: store i8 %add15.i.i, i8* %add.ptr19.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %add.ptr28.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 3 +; PREIR-NEXT: store i8 %add15.i.i, i8* %add.ptr28.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %block.sroa.0.2.extract.shift.i.i = lshr i64 %49, 16 +; PREIR-NEXT: %conv31201.i.i = trunc i64 %block.sroa.0.2.extract.shift.i.i to i8 +; PREIR-NEXT: %and32.i.i = and i8 %conv31201.i.i, 15 +; PREIR-NEXT: %add33.i.i = or i8 %and32.i.i, 48 +; PREIR-NEXT: %add.ptr37.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 4 +; PREIR-NEXT: store i8 %add33.i.i, i8* %add.ptr37.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %add.ptr46.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 5 +; PREIR-NEXT: store i8 %add33.i.i, i8* %add.ptr46.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %.tr.i.i = trunc i64 %49 to i32 +; PREIR-NEXT: %sext204207.i.i = lshr i32 %.tr.i.i, 24 +; PREIR-NEXT: %and50.i.i = and i32 %sext204207.i.i, 15 +; PREIR-NEXT: %add51.i.i = or i32 %and50.i.i, 48 +; PREIR-NEXT: %conv52.i.i = trunc i32 %add51.i.i to i8 +; PREIR-NEXT: %add.ptr55.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 6 +; PREIR-NEXT: store i8 %conv52.i.i, i8* %add.ptr55.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %add.ptr64.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 7 +; PREIR-NEXT: store i8 %conv52.i.i, i8* %add.ptr64.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %block.sroa.0.4.extract.shift.i.i = lshr i64 %49, 32 +; PREIR-NEXT: %conv67211.i.i = trunc i64 %block.sroa.0.4.extract.shift.i.i to i8 +; PREIR-NEXT: %and68.i.i = and i8 %conv67211.i.i, 15 +; PREIR-NEXT: %add69.i.i = or i8 %and68.i.i, 48 +; PREIR-NEXT: %add.ptr73.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 8 +; PREIR-NEXT: store i8 %add69.i.i, i8* %add.ptr73.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %add.ptr82.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 9 +; PREIR-NEXT: store i8 %add69.i.i, i8* %add.ptr82.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %block.sroa.0.5.extract.shift.i.i = lshr i64 %49, 40 +; PREIR-NEXT: %conv85217.i.i = trunc i64 %block.sroa.0.5.extract.shift.i.i to i8 +; PREIR-NEXT: %and86.i.i = and i8 %conv85217.i.i, 15 +; PREIR-NEXT: %add87.i.i = or i8 %and86.i.i, 48 +; PREIR-NEXT: %add.ptr91.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 10 +; PREIR-NEXT: store i8 %add87.i.i, i8* %add.ptr91.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %add.ptr100.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 11 +; PREIR-NEXT: store i8 %add87.i.i, i8* %add.ptr100.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %block.sroa.0.6.extract.shift.i.i = lshr i64 %49, 48 +; PREIR-NEXT: %conv103223.i.i = trunc i64 %block.sroa.0.6.extract.shift.i.i to i8 +; PREIR-NEXT: %and104.i.i = and i8 %conv103223.i.i, 15 +; PREIR-NEXT: %add105.i.i = or i8 %and104.i.i, 48 +; PREIR-NEXT: %add.ptr109.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 12 +; PREIR-NEXT: store i8 %add105.i.i, i8* %add.ptr109.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %add.ptr118.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 13 +; PREIR-NEXT: store i8 %add105.i.i, i8* %add.ptr118.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %block.sroa.0.7.extract.shift.i.i = lshr i64 %49, 56 +; PREIR-NEXT: %conv121229.i.i = trunc i64 %block.sroa.0.7.extract.shift.i.i to i8 +; PREIR-NEXT: %and122.i.i = and i8 %conv121229.i.i, 15 +; PREIR-NEXT: %add123.i.i = or i8 %and122.i.i, 48 +; PREIR-NEXT: %add.ptr127.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 14 +; PREIR-NEXT: store i8 %add123.i.i, i8* %add.ptr127.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %add.ptr136.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 15 +; PREIR-NEXT: store i8 %add123.i.i, i8* %add.ptr136.i.i, align 1, !tbaa !7 +; PREIR-NEXT: %indvars.iv.next.i52 = add nuw nsw i64 %indvars.iv.i51, 1 +; PREIR-NEXT: %exitcond.i53 = icmp eq i64 %indvars.iv.next.i52, 1024 +; PREIR-NEXT: br i1 %exitcond.i53, label %polly.merge_new_and_old195, label %for.body6.i +; +; PREIR: polly.merge_new_and_old195: ; preds = %polly.exiting197, %for.body6.i +; PREIR-NEXT: br label %for.end.i +; +; PREIR: for.end.i: ; preds = %polly.merge_new_and_old195 +; PREIR-NEXT: %51 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; PREIR-NEXT: %call12.i = call i32 @fputs(i8* nonnull %call.i, %struct._IO_FILE* %51) #9 +; PREIR-NEXT: %indvars.iv.next5.i54 = add nuw nsw i64 %indvars.iv4.i50, 1 +; PREIR-NEXT: %exitcond6.i55 = icmp eq i64 %indvars.iv.next5.i54, 1024 +; PREIR-NEXT: br i1 %exitcond6.i55, label %print_array.exit, label %for.cond3.preheader.i +; +; PREIR: print_array.exit: ; preds = %for.end.i +; PREIR-NEXT: call void @free(i8* nonnull %call.i) #7 +; PREIR-NEXT: call void @free(i8* nonnull %1) #7 +; PREIR-NEXT: call void @free(i8* %5) #7 +; PREIR-NEXT: call void @free(i8* %9) #7 +; PREIR-NEXT: ret i32 0 +; +; PREIR: polly.start: ; preds = %polly.split_new_and_old +; PREIR-NEXT: br label %polly.loop_preheader +; +; PREIR: polly.loop_exit: ; preds = %polly.loop_exit114 +; PREIR-NEXT: br label %polly.loop_preheader121 +; +; PREIR: polly.loop_exit122: ; preds = %polly.loop_exit128 +; PREIR-NEXT: br label %polly.loop_preheader136 +; +; PREIR: polly.loop_exit137: ; preds = %polly.loop_exit143 +; PREIR-NEXT: br label %polly.loop_preheader151 +; +; PREIR: polly.loop_exit152: ; preds = %polly.loop_exit158 +; PREIR-NEXT: br label %polly.exiting +; +; PREIR: polly.exiting: ; preds = %polly.loop_exit152 +; PREIR-NEXT: br label %polly.merge_new_and_old +; +; PREIR: polly.loop_header: ; preds = %polly.loop_exit114, %polly.loop_preheader +; PREIR-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.loop_exit114 ] +; PREIR-NEXT: br label %polly.stmt.for.cond1.preheader.i +; +; PREIR: polly.stmt.for.cond1.preheader.i: ; preds = %polly.loop_header +; PREIR-NEXT: %52 = trunc i64 %polly.indvar to i32 +; PREIR-NEXT: %p_conv.i = sitofp i32 %52 to double +; PREIR-NEXT: store double %p_conv.i, double* %conv.i.s2a +; PREIR-NEXT: br label %polly.loop_preheader113 +; +; PREIR: polly.loop_exit114: ; preds = %polly.stmt.for.body3.i +; PREIR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 +; PREIR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit +; +; PREIR: polly.loop_preheader: ; preds = %polly.start +; PREIR-NEXT: br label %polly.loop_header +; +; PREIR: polly.loop_header112: ; preds = %polly.stmt.for.body3.i, %polly.loop_preheader113 +; PREIR-NEXT: %polly.indvar115 = phi i64 [ 0, %polly.loop_preheader113 ], [ %polly.indvar_next116, %polly.stmt.for.body3.i ] +; PREIR-NEXT: br label %polly.stmt.for.body3.i +; +; PREIR: polly.stmt.for.body3.i: ; preds = %polly.loop_header112 +; PREIR-NEXT: %conv.i.s2a.reload = load double, double* %conv.i.s2a +; PREIR-NEXT: %53 = trunc i64 %polly.indvar115 to i32 +; PREIR-NEXT: %p_conv4.i = sitofp i32 %53 to double +; PREIR-NEXT: %p_mul.i = fmul double %conv.i.s2a.reload, %p_conv4.i +; PREIR-NEXT: %p_div.i = fmul double %p_mul.i, 9.765625e-04 +; PREIR-NEXT: %54 = shl i64 %polly.indvar115, 3 +; PREIR-NEXT: %scevgep118 = getelementptr i8, i8* %scevgep, i64 %54 +; PREIR-NEXT: %scevgep118119 = bitcast i8* %scevgep118 to double* +; PREIR-NEXT: store double %p_div.i, double* %scevgep118119, align 8, !alias.scope !8, !noalias !10 +; PREIR-NEXT: %polly.indvar_next116 = add nsw i64 %polly.indvar115, 1 +; PREIR-NEXT: %polly.loop_cond117 = icmp sle i64 %polly.indvar115, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond117, label %polly.loop_header112, label %polly.loop_exit114 +; +; PREIR: polly.loop_preheader113: ; preds = %polly.stmt.for.cond1.preheader.i +; PREIR-NEXT: %55 = shl i64 %polly.indvar, 13 +; PREIR-NEXT: %scevgep = getelementptr i8, i8* %1, i64 %55 +; PREIR-NEXT: br label %polly.loop_header112 +; +; PREIR: polly.loop_header120: ; preds = %polly.loop_exit128, %polly.loop_preheader121 +; PREIR-NEXT: %polly.indvar123 = phi i64 [ 0, %polly.loop_preheader121 ], [ %polly.indvar_next124, %polly.loop_exit128 ] +; PREIR-NEXT: br label %polly.stmt.for.cond15.preheader.i +; +; PREIR: polly.stmt.for.cond15.preheader.i: ; preds = %polly.loop_header120 +; PREIR-NEXT: %56 = trunc i64 %polly.indvar123 to i32 +; PREIR-NEXT: %p_conv19.i = sitofp i32 %56 to double +; PREIR-NEXT: store double %p_conv19.i, double* %conv19.i.s2a +; PREIR-NEXT: br label %polly.loop_preheader127 +; +; PREIR: polly.loop_exit128: ; preds = %polly.stmt.for.body18.i +; PREIR-NEXT: %polly.indvar_next124 = add nsw i64 %polly.indvar123, 1 +; PREIR-NEXT: %polly.loop_cond125 = icmp sle i64 %polly.indvar123, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond125, label %polly.loop_header120, label %polly.loop_exit122 +; +; PREIR: polly.loop_preheader121: ; preds = %polly.loop_exit +; PREIR-NEXT: br label %polly.loop_header120 +; +; PREIR: polly.loop_header126: ; preds = %polly.stmt.for.body18.i, %polly.loop_preheader127 +; PREIR-NEXT: %polly.indvar129 = phi i64 [ 0, %polly.loop_preheader127 ], [ %polly.indvar_next130, %polly.stmt.for.body18.i ] +; PREIR-NEXT: br label %polly.stmt.for.body18.i +; +; PREIR: polly.stmt.for.body18.i: ; preds = %polly.loop_header126 +; PREIR-NEXT: %conv19.i.s2a.reload = load double, double* %conv19.i.s2a +; PREIR-NEXT: %57 = trunc i64 %polly.indvar129 to i32 +; PREIR-NEXT: %p_conv20.i = sitofp i32 %57 to double +; PREIR-NEXT: %p_mul21.i = fmul double %conv19.i.s2a.reload, %p_conv20.i +; PREIR-NEXT: %p_div23.i = fmul double %p_mul21.i, 9.765625e-04 +; PREIR-NEXT: %58 = shl i64 %polly.indvar129, 3 +; PREIR-NEXT: %scevgep133 = getelementptr i8, i8* %scevgep132, i64 %58 +; PREIR-NEXT: %scevgep133134 = bitcast i8* %scevgep133 to double* +; PREIR-NEXT: store double %p_div23.i, double* %scevgep133134, align 8, !alias.scope !16, !noalias !17 +; PREIR-NEXT: %polly.indvar_next130 = add nsw i64 %polly.indvar129, 1 +; PREIR-NEXT: %polly.loop_cond131 = icmp sle i64 %polly.indvar129, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond131, label %polly.loop_header126, label %polly.loop_exit128 +; +; PREIR: polly.loop_preheader127: ; preds = %polly.stmt.for.cond15.preheader.i +; PREIR-NEXT: %59 = shl i64 %polly.indvar123, 13 +; PREIR-NEXT: %scevgep132 = getelementptr i8, i8* %5, i64 %59 +; PREIR-NEXT: br label %polly.loop_header126 +; +; PREIR: polly.loop_header135: ; preds = %polly.loop_exit143, %polly.loop_preheader136 +; PREIR-NEXT: %polly.indvar138 = phi i64 [ 0, %polly.loop_preheader136 ], [ %polly.indvar_next139, %polly.loop_exit143 ] +; PREIR-NEXT: br label %polly.stmt.for.cond38.preheader.i +; +; PREIR: polly.stmt.for.cond38.preheader.i: ; preds = %polly.loop_header135 +; PREIR-NEXT: %60 = trunc i64 %polly.indvar138 to i32 +; PREIR-NEXT: %p_conv42.i = sitofp i32 %60 to double +; PREIR-NEXT: store double %p_conv42.i, double* %conv42.i.s2a +; PREIR-NEXT: br label %polly.loop_preheader142 +; +; PREIR: polly.loop_exit143: ; preds = %polly.stmt.for.body41.i +; PREIR-NEXT: %polly.indvar_next139 = add nsw i64 %polly.indvar138, 1 +; PREIR-NEXT: %polly.loop_cond140 = icmp sle i64 %polly.indvar138, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond140, label %polly.loop_header135, label %polly.loop_exit137 +; +; PREIR: polly.loop_preheader136: ; preds = %polly.loop_exit122 +; PREIR-NEXT: br label %polly.loop_header135 +; +; PREIR: polly.loop_header141: ; preds = %polly.stmt.for.body41.i, %polly.loop_preheader142 +; PREIR-NEXT: %polly.indvar144 = phi i64 [ 0, %polly.loop_preheader142 ], [ %polly.indvar_next145, %polly.stmt.for.body41.i ] +; PREIR-NEXT: br label %polly.stmt.for.body41.i +; +; PREIR: polly.stmt.for.body41.i: ; preds = %polly.loop_header141 +; PREIR-NEXT: %conv42.i.s2a.reload = load double, double* %conv42.i.s2a +; PREIR-NEXT: %61 = trunc i64 %polly.indvar144 to i32 +; PREIR-NEXT: %p_conv43.i = sitofp i32 %61 to double +; PREIR-NEXT: %p_mul44.i = fmul double %conv42.i.s2a.reload, %p_conv43.i +; PREIR-NEXT: %p_div46.i = fmul double %p_mul44.i, 9.765625e-04 +; PREIR-NEXT: %62 = shl i64 %polly.indvar144, 3 +; PREIR-NEXT: %scevgep148 = getelementptr i8, i8* %scevgep147, i64 %62 +; PREIR-NEXT: %scevgep148149 = bitcast i8* %scevgep148 to double* +; PREIR-NEXT: store double %p_div46.i, double* %scevgep148149, align 8, !alias.scope !11, !noalias !18 +; PREIR-NEXT: %polly.indvar_next145 = add nsw i64 %polly.indvar144, 1 +; PREIR-NEXT: %polly.loop_cond146 = icmp sle i64 %polly.indvar144, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond146, label %polly.loop_header141, label %polly.loop_exit143 +; +; PREIR: polly.loop_preheader142: ; preds = %polly.stmt.for.cond38.preheader.i +; PREIR-NEXT: %63 = shl i64 %polly.indvar138, 13 +; PREIR-NEXT: %scevgep147 = getelementptr i8, i8* %9, i64 %63 +; PREIR-NEXT: br label %polly.loop_header141 +; +; PREIR: polly.loop_header150: ; preds = %polly.loop_exit158, %polly.loop_preheader151 +; PREIR-NEXT: %polly.indvar153 = phi i64 [ 0, %polly.loop_preheader151 ], [ %polly.indvar_next154, %polly.loop_exit158 ] +; PREIR-NEXT: br label %polly.loop_preheader157 +; +; PREIR: polly.loop_exit158: ; preds = %polly.loop_exit171 +; PREIR-NEXT: %polly.indvar_next154 = add nsw i64 %polly.indvar153, 1 +; PREIR-NEXT: %polly.loop_cond155 = icmp sle i64 %polly.indvar153, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond155, label %polly.loop_header150, label %polly.loop_exit152 +; +; PREIR: polly.loop_preheader151: ; preds = %polly.loop_exit137 +; PREIR-NEXT: br label %polly.loop_header150 +; +; PREIR: polly.loop_header156: ; preds = %polly.loop_exit171, %polly.loop_preheader157 +; PREIR-NEXT: %polly.indvar159 = phi i64 [ 0, %polly.loop_preheader157 ], [ %polly.indvar_next160, %polly.loop_exit171 ] +; PREIR-NEXT: br label %polly.stmt.for.body3.i44 +; +; PREIR: polly.stmt.for.body3.i44: ; preds = %polly.loop_header156 +; PREIR-NEXT: %64 = shl i64 %polly.indvar159, 3 +; PREIR-NEXT: %scevgep163 = getelementptr i8, i8* %scevgep162, i64 %64 +; PREIR-NEXT: %scevgep163164 = bitcast i8* %scevgep163 to double* +; PREIR-NEXT: %_p_scalar_ = load double, double* %scevgep163164, align 8, !alias.scope !8, !noalias !10 +; PREIR-NEXT: %p_mul.i43 = fmul double %_p_scalar_, 2.123000e+03 +; PREIR-NEXT: store double %p_mul.i43, double* %scevgep163164, align 8, !alias.scope !8, !noalias !10 +; PREIR-NEXT: %polly.access.cast.165 = bitcast i8* %1 to double* +; PREIR-NEXT: %polly.access.mul.166 = mul nsw i64 %polly.indvar153, 1024 +; PREIR-NEXT: %polly.access.add.167 = add nsw i64 %polly.access.mul.166, %polly.indvar159 +; PREIR-NEXT: %polly.access.168 = getelementptr double, double* %polly.access.cast.165, i64 %polly.access.add.167 +; PREIR-NEXT: store double %p_mul.i43, double* %polly.access.168, !alias.scope !8, !noalias !10 +; PREIR-NEXT: br label %polly.loop_preheader170 +; +; PREIR: polly.loop_exit171: ; preds = %polly.stmt.for.body8.i +; PREIR-NEXT: %polly.indvar_next160 = add nsw i64 %polly.indvar159, 1 +; PREIR-NEXT: %polly.loop_cond161 = icmp sle i64 %polly.indvar159, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond161, label %polly.loop_header156, label %polly.loop_exit158 +; +; PREIR: polly.loop_preheader157: ; preds = %polly.loop_header150 +; PREIR-NEXT: %65 = shl i64 %polly.indvar153, 13 +; PREIR-NEXT: %scevgep162 = getelementptr i8, i8* %1, i64 %65 +; PREIR-NEXT: %scevgep179 = getelementptr i8, i8* %5, i64 %65 +; PREIR-NEXT: %scevgep187 = getelementptr i8, i8* %1, i64 %65 +; PREIR-NEXT: br label %polly.loop_header156 +; +; PREIR: polly.loop_header169: ; preds = %polly.stmt.for.body8.i, %polly.loop_preheader170 +; PREIR-NEXT: %polly.indvar172 = phi i64 [ 0, %polly.loop_preheader170 ], [ %polly.indvar_next173, %polly.stmt.for.body8.i ] +; PREIR-NEXT: br label %polly.stmt.for.body8.i +; +; PREIR: polly.stmt.for.body8.i: ; preds = %polly.loop_header169 +; PREIR-NEXT: %polly.access.cast.175 = bitcast i8* %1 to double* +; PREIR-NEXT: %polly.access.mul.176 = mul nsw i64 %polly.indvar153, 1024 +; PREIR-NEXT: %polly.access.add.177 = add nsw i64 %polly.access.mul.176, %polly.indvar159 +; PREIR-NEXT: %polly.access.178 = getelementptr double, double* %polly.access.cast.175, i64 %polly.access.add.177 +; PREIR-NEXT: %polly.access.178.reload = load double, double* %polly.access.178, !alias.scope !8, !noalias !10 +; PREIR-NEXT: %66 = shl i64 %polly.indvar172, 3 +; PREIR-NEXT: %scevgep180 = getelementptr i8, i8* %scevgep179, i64 %66 +; PREIR-NEXT: %scevgep180181 = bitcast i8* %scevgep180 to double* +; PREIR-NEXT: %_p_scalar_182 = load double, double* %scevgep180181, align 8, !alias.scope !16, !noalias !17 +; PREIR-NEXT: %p_mul13.i = fmul double %_p_scalar_182, 3.241200e+04 +; PREIR-NEXT: %67 = shl i64 %polly.indvar172, 13 +; PREIR-NEXT: %scevgep184 = getelementptr i8, i8* %scevgep183, i64 %67 +; PREIR-NEXT: %scevgep184185 = bitcast i8* %scevgep184 to double* +; PREIR-NEXT: %_p_scalar_186 = load double, double* %scevgep184185, align 8, !alias.scope !11, !noalias !18 +; PREIR-NEXT: %p_mul18.i = fmul double %p_mul13.i, %_p_scalar_186 +; PREIR-NEXT: %p_add.i = fadd double %polly.access.178.reload, %p_mul18.i +; PREIR-NEXT: store double %p_add.i, double* %scevgep188189, align 8, !alias.scope !8, !noalias !10 +; PREIR-NEXT: %polly.access.cast.190 = bitcast i8* %1 to double* +; PREIR-NEXT: %polly.access.mul.191 = mul nsw i64 %polly.indvar153, 1024 +; PREIR-NEXT: %polly.access.add.192 = add nsw i64 %polly.access.mul.191, %polly.indvar159 +; PREIR-NEXT: %polly.access.193 = getelementptr double, double* %polly.access.cast.190, i64 %polly.access.add.192 +; PREIR-NEXT: store double %p_add.i, double* %polly.access.193, !alias.scope !8, !noalias !10 +; PREIR-NEXT: %polly.indvar_next173 = add nsw i64 %polly.indvar172, 1 +; PREIR-NEXT: %polly.loop_cond174 = icmp sle i64 %polly.indvar172, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond174, label %polly.loop_header169, label %polly.loop_exit171 +; +; PREIR: polly.loop_preheader170: ; preds = %polly.stmt.for.body3.i44 +; PREIR-NEXT: %68 = shl i64 %polly.indvar159, 3 +; PREIR-NEXT: %scevgep183 = getelementptr i8, i8* %9, i64 %68 +; PREIR-NEXT: %scevgep188 = getelementptr i8, i8* %scevgep187, i64 %68 +; PREIR-NEXT: %scevgep188189 = bitcast i8* %scevgep188 to double* +; PREIR-NEXT: br label %polly.loop_header169 +; +; PREIR: polly.start196: ; preds = %polly.split_new_and_old194 +; PREIR-NEXT: br label %polly.loop_preheader199 +; +; PREIR: polly.loop_exit200: ; preds = %polly.stmt.for.body6.i +; PREIR-NEXT: br label %polly.exiting197 +; +; PREIR: polly.exiting197: ; preds = %polly.loop_exit200 +; PREIR-NEXT: br label %polly.merge_new_and_old195 +; +; PREIR: polly.loop_header198: ; preds = %polly.stmt.for.body6.i, %polly.loop_preheader199 +; PREIR-NEXT: %polly.indvar201 = phi i64 [ 0, %polly.loop_preheader199 ], [ %polly.indvar_next202, %polly.stmt.for.body6.i ] +; PREIR-NEXT: br label %polly.stmt.for.body6.i +; +; PREIR: polly.stmt.for.body6.i: ; preds = %polly.loop_header198 +; PREIR-NEXT: %69 = shl i64 %polly.indvar201, 3 +; PREIR-NEXT: %scevgep205 = getelementptr i8, i8* %scevgep204, i64 %69 +; PREIR-NEXT: %scevgep205206 = bitcast i8* %scevgep205 to i64* +; PREIR-NEXT: %_p_scalar_207 = load i64, i64* %scevgep205206, align 8, !alias.scope !19, !noalias !21 +; PREIR-NEXT: %p_block.sroa.0.0.extract.trunc138.i.i = trunc i64 %_p_scalar_207 to i8 +; PREIR-NEXT: %p_and.i.i = and i8 %p_block.sroa.0.0.extract.trunc138.i.i, 15 +; PREIR-NEXT: %p_add.i.i = or i8 %p_and.i.i, 48 +; PREIR-NEXT: %70 = shl i64 %polly.indvar201, 4 +; PREIR-NEXT: %scevgep208 = getelementptr i8, i8* %call.i, i64 %70 +; PREIR-NEXT: store i8 %p_add.i.i, i8* %scevgep208, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %scevgep210 = getelementptr i8, i8* %scevgep209, i64 %70 +; PREIR-NEXT: store i8 %p_add.i.i, i8* %scevgep210, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %p_block.sroa.0.1.extract.shift.i.i = lshr i64 %_p_scalar_207, 8 +; PREIR-NEXT: %p_conv13195.i.i = trunc i64 %p_block.sroa.0.1.extract.shift.i.i to i8 +; PREIR-NEXT: %p_and14.i.i = and i8 %p_conv13195.i.i, 15 +; PREIR-NEXT: %p_add15.i.i = or i8 %p_and14.i.i, 48 +; PREIR-NEXT: %71 = shl i64 %polly.indvar201, 4 +; PREIR-NEXT: %scevgep212 = getelementptr i8, i8* %scevgep211, i64 %71 +; PREIR-NEXT: store i8 %p_add15.i.i, i8* %scevgep212, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %scevgep214 = getelementptr i8, i8* %scevgep213, i64 %71 +; PREIR-NEXT: store i8 %p_add15.i.i, i8* %scevgep214, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %p_block.sroa.0.2.extract.shift.i.i = lshr i64 %_p_scalar_207, 16 +; PREIR-NEXT: %p_conv31201.i.i = trunc i64 %p_block.sroa.0.2.extract.shift.i.i to i8 +; PREIR-NEXT: %p_and32.i.i = and i8 %p_conv31201.i.i, 15 +; PREIR-NEXT: %p_add33.i.i = or i8 %p_and32.i.i, 48 +; PREIR-NEXT: %72 = shl i64 %polly.indvar201, 4 +; PREIR-NEXT: %scevgep216 = getelementptr i8, i8* %scevgep215, i64 %72 +; PREIR-NEXT: store i8 %p_add33.i.i, i8* %scevgep216, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %scevgep218 = getelementptr i8, i8* %scevgep217, i64 %72 +; PREIR-NEXT: store i8 %p_add33.i.i, i8* %scevgep218, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %p_.tr.i.i = trunc i64 %_p_scalar_207 to i32 +; PREIR-NEXT: %p_sext204207.i.i = lshr i32 %p_.tr.i.i, 24 +; PREIR-NEXT: %p_and50.i.i = and i32 %p_sext204207.i.i, 15 +; PREIR-NEXT: %p_add51.i.i = or i32 %p_and50.i.i, 48 +; PREIR-NEXT: %p_conv52.i.i = trunc i32 %p_add51.i.i to i8 +; PREIR-NEXT: %73 = shl i64 %polly.indvar201, 4 +; PREIR-NEXT: %scevgep220 = getelementptr i8, i8* %scevgep219, i64 %73 +; PREIR-NEXT: store i8 %p_conv52.i.i, i8* %scevgep220, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %scevgep222 = getelementptr i8, i8* %scevgep221, i64 %73 +; PREIR-NEXT: store i8 %p_conv52.i.i, i8* %scevgep222, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %p_block.sroa.0.4.extract.shift.i.i = lshr i64 %_p_scalar_207, 32 +; PREIR-NEXT: %p_conv67211.i.i = trunc i64 %p_block.sroa.0.4.extract.shift.i.i to i8 +; PREIR-NEXT: %p_and68.i.i = and i8 %p_conv67211.i.i, 15 +; PREIR-NEXT: %p_add69.i.i = or i8 %p_and68.i.i, 48 +; PREIR-NEXT: %74 = shl i64 %polly.indvar201, 4 +; PREIR-NEXT: %scevgep224 = getelementptr i8, i8* %scevgep223, i64 %74 +; PREIR-NEXT: store i8 %p_add69.i.i, i8* %scevgep224, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %scevgep226 = getelementptr i8, i8* %scevgep225, i64 %74 +; PREIR-NEXT: store i8 %p_add69.i.i, i8* %scevgep226, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %p_block.sroa.0.5.extract.shift.i.i = lshr i64 %_p_scalar_207, 40 +; PREIR-NEXT: %p_conv85217.i.i = trunc i64 %p_block.sroa.0.5.extract.shift.i.i to i8 +; PREIR-NEXT: %p_and86.i.i = and i8 %p_conv85217.i.i, 15 +; PREIR-NEXT: %p_add87.i.i = or i8 %p_and86.i.i, 48 +; PREIR-NEXT: %75 = shl i64 %polly.indvar201, 4 +; PREIR-NEXT: %scevgep228 = getelementptr i8, i8* %scevgep227, i64 %75 +; PREIR-NEXT: store i8 %p_add87.i.i, i8* %scevgep228, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %scevgep230 = getelementptr i8, i8* %scevgep229, i64 %75 +; PREIR-NEXT: store i8 %p_add87.i.i, i8* %scevgep230, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %p_block.sroa.0.6.extract.shift.i.i = lshr i64 %_p_scalar_207, 48 +; PREIR-NEXT: %p_conv103223.i.i = trunc i64 %p_block.sroa.0.6.extract.shift.i.i to i8 +; PREIR-NEXT: %p_and104.i.i = and i8 %p_conv103223.i.i, 15 +; PREIR-NEXT: %p_add105.i.i = or i8 %p_and104.i.i, 48 +; PREIR-NEXT: %76 = shl i64 %polly.indvar201, 4 +; PREIR-NEXT: %scevgep232 = getelementptr i8, i8* %scevgep231, i64 %76 +; PREIR-NEXT: store i8 %p_add105.i.i, i8* %scevgep232, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %scevgep234 = getelementptr i8, i8* %scevgep233, i64 %76 +; PREIR-NEXT: store i8 %p_add105.i.i, i8* %scevgep234, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %p_block.sroa.0.7.extract.shift.i.i = lshr i64 %_p_scalar_207, 56 +; PREIR-NEXT: %p_conv121229.i.i = trunc i64 %p_block.sroa.0.7.extract.shift.i.i to i8 +; PREIR-NEXT: %p_and122.i.i = and i8 %p_conv121229.i.i, 15 +; PREIR-NEXT: %p_add123.i.i = or i8 %p_and122.i.i, 48 +; PREIR-NEXT: %77 = shl i64 %polly.indvar201, 4 +; PREIR-NEXT: %scevgep236 = getelementptr i8, i8* %scevgep235, i64 %77 +; PREIR-NEXT: store i8 %p_add123.i.i, i8* %scevgep236, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %scevgep238 = getelementptr i8, i8* %scevgep237, i64 %77 +; PREIR-NEXT: store i8 %p_add123.i.i, i8* %scevgep238, align 1, !alias.scope !22, !noalias !23 +; PREIR-NEXT: %polly.indvar_next202 = add nsw i64 %polly.indvar201, 1 +; PREIR-NEXT: %polly.loop_cond203 = icmp sle i64 %polly.indvar201, 1022 +; PREIR-NEXT: br i1 %polly.loop_cond203, label %polly.loop_header198, label %polly.loop_exit200 +; +; PREIR: polly.loop_preheader199: ; preds = %polly.start196 +; PREIR-NEXT: %78 = shl i64 %47, 13 +; PREIR-NEXT: %scevgep204 = getelementptr i8, i8* %1, i64 %78 +; PREIR-NEXT: br label %polly.loop_header198 +; PREIR-NEXT: } +; +; PREIR: ; Function Attrs: nounwind +; PREIR-NEXT: declare i32 @posix_memalign(i8**, i64, i64) local_unnamed_addr #2 +; +; PREIR: ; Function Attrs: nounwind +; PREIR-NEXT: declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) local_unnamed_addr #2 +; +; PREIR: ; Function Attrs: noreturn nounwind +; PREIR-NEXT: declare void @exit(i32) local_unnamed_addr #6 +; +; PREIR: ; Function Attrs: nounwind +; PREIR-NEXT: declare noalias i8* @malloc(i64) local_unnamed_addr #2 +; +; PREIR: ; Function Attrs: nounwind +; PREIR-NEXT: declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture) local_unnamed_addr #2 +; +; PREIR: ; Function Attrs: nounwind +; PREIR-NEXT: declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #7 +; +; PREIR: ; Function Attrs: nounwind readnone +; PREIR-NEXT: declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) #8 +; +; PREIR: ; Function Attrs: nounwind readnone +; PREIR-NEXT: declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) #8 +; +; PREIR: attributes #0 = { norecurse nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; PREIR-NEXT: attributes #1 = { argmemonly nounwind } +; PREIR-NEXT: attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; PREIR-NEXT: attributes #3 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; PREIR-NEXT: attributes #4 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; PREIR-NEXT: attributes #5 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; PREIR-NEXT: attributes #6 = { noreturn nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; PREIR-NEXT: attributes #7 = { nounwind } +; PREIR-NEXT: attributes #8 = { nounwind readnone } +; PREIR-NEXT: attributes #9 = { cold nounwind } +; PREIR-NEXT: attributes #10 = { noreturn nounwind } +; +; PREIR: !llvm.ident = !{!0} +; +; PREIR: !0 = !{!"clang version 4.0.0 (trunk 278052) (llvm/trunk 278053)"} +; PREIR-NEXT: !1 = !{!2, !2, i64 0} +; PREIR-NEXT: !2 = !{!"double", !3, i64 0} +; PREIR-NEXT: !3 = !{!"omnipotent char", !4, i64 0} +; PREIR-NEXT: !4 = !{!"Simple C/C++ TBAA"} +; PREIR-NEXT: !5 = !{!6, !6, i64 0} +; PREIR-NEXT: !6 = !{!"any pointer", !3, i64 0} +; PREIR-NEXT: !7 = !{!3, !3, i64 0} +; PREIR-NEXT: !8 = distinct !{!8, !9, !"polly.alias.scope."} +; PREIR-NEXT: !9 = distinct !{!9, !"polly.alias.scope.domain"} +; PREIR-NEXT: !10 = !{!11, !12, !13, !14, !15, !16} +; PREIR-NEXT: !11 = distinct !{!11, !9, !"polly.alias.scope."} +; PREIR-NEXT: !12 = distinct !{!12, !9, !"polly.alias.scope.conv42.i"} +; PREIR-NEXT: !13 = distinct !{!13, !9, !"polly.alias.scope.conv.i"} +; PREIR-NEXT: !14 = distinct !{!14, !9, !"polly.alias.scope."} +; PREIR-NEXT: !15 = distinct !{!15, !9, !"polly.alias.scope.conv19.i"} +; PREIR-NEXT: !16 = distinct !{!16, !9, !"polly.alias.scope."} +; PREIR-NEXT: !17 = !{!11, !12, !13, !14, !8, !15} +; PREIR-NEXT: !18 = !{!12, !13, !14, !8, !15, !16} +; PREIR-NEXT: !19 = distinct !{!19, !20, !"polly.alias.scope."} +; PREIR-NEXT: !20 = distinct !{!20, !"polly.alias.scope.domain"} +; PREIR-NEXT: !21 = !{!22} +; PREIR-NEXT: !22 = distinct !{!22, !20, !"polly.alias.scope.call.i"} +; PREIR-NEXT: !23 = !{!19} + +; IR: ; ModuleID = '' +; IR-NEXT: source_filename = "/mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/gemm/gemm.c" +; IR-NEXT: target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +; IR-NEXT: target triple = "x86_64-unknown-linux-gnu" +; +; IR: %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +; IR-NEXT: %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } +; +; IR: @polybench_papi_counters_threadid = local_unnamed_addr global i32 0, align 4 +; IR-NEXT: @polybench_program_total_flops = local_unnamed_addr global double 0.000000e+00, align 8 +; IR-NEXT: @polybench_t_start = common local_unnamed_addr global double 0.000000e+00, align 8 +; IR-NEXT: @polybench_t_end = common local_unnamed_addr global double 0.000000e+00, align 8 +; IR-NEXT: @.str = private unnamed_addr constant [7 x i8] c"%0.6f\0A\00", align 1 +; IR-NEXT: @polybench_c_start = common local_unnamed_addr global i64 0, align 8 +; IR-NEXT: @polybench_c_end = common local_unnamed_addr global i64 0, align 8 +; IR-NEXT: @stderr = external local_unnamed_addr global %struct._IO_FILE*, align 8 +; IR-NEXT: @.str.1 = private unnamed_addr constant [51 x i8] c"[PolyBench] posix_memalign: cannot allocate memory\00", align 1 +; +; IR: ; Function Attrs: norecurse nounwind readnone uwtable +; IR-NEXT: define void @polybench_flush_cache() local_unnamed_addr #0 { +; IR-NEXT: entry: +; IR-NEXT: br label %entry.split +; +; IR: entry.split: ; preds = %entry +; IR-NEXT: ret void +; IR-NEXT: } +; +; IR: ; Function Attrs: argmemonly nounwind +; IR-NEXT: declare void @llvm.lifetime.start(i64, i8* nocapture) #1 +; +; IR: ; Function Attrs: nounwind +; IR-NEXT: declare noalias i8* @calloc(i64, i64) local_unnamed_addr #2 +; +; IR: ; Function Attrs: nounwind +; IR-NEXT: declare void @free(i8* nocapture) local_unnamed_addr #2 +; +; IR: ; Function Attrs: argmemonly nounwind +; IR-NEXT: declare void @llvm.lifetime.end(i64, i8* nocapture) #1 +; +; IR: ; Function Attrs: norecurse nounwind readnone uwtable +; IR-NEXT: define void @polybench_prepare_instruments() local_unnamed_addr #0 { +; IR-NEXT: entry: +; IR-NEXT: br label %entry.split +; +; IR: entry.split: ; preds = %entry +; IR-NEXT: ret void +; IR-NEXT: } +; +; IR: ; Function Attrs: norecurse nounwind uwtable +; IR-NEXT: define void @polybench_timer_start() local_unnamed_addr #3 { +; IR-NEXT: entry: +; IR-NEXT: br label %entry.split +; +; IR: entry.split: ; preds = %entry +; IR-NEXT: store double 0.000000e+00, double* @polybench_t_start, align 8, !tbaa !1 +; IR-NEXT: ret void +; IR-NEXT: } +; +; IR: ; Function Attrs: norecurse nounwind uwtable +; IR-NEXT: define void @polybench_timer_stop() local_unnamed_addr #3 { +; IR-NEXT: entry: +; IR-NEXT: br label %entry.split +; +; IR: entry.split: ; preds = %entry +; IR-NEXT: store double 0.000000e+00, double* @polybench_t_end, align 8, !tbaa !1 +; IR-NEXT: ret void +; IR-NEXT: } +; +; IR: ; Function Attrs: nounwind uwtable +; IR-NEXT: define void @polybench_timer_print() local_unnamed_addr #4 { +; IR-NEXT: entry: +; IR-NEXT: br label %entry.split +; +; IR: entry.split: ; preds = %entry +; IR-NEXT: %0 = load double, double* @polybench_t_end, align 8, !tbaa !1 +; IR-NEXT: %1 = load double, double* @polybench_t_start, align 8, !tbaa !1 +; IR-NEXT: %sub = fsub double %0, %1 +; IR-NEXT: %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), double %sub) +; IR-NEXT: ret void +; IR-NEXT: } +; +; IR: ; Function Attrs: nounwind +; IR-NEXT: declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #2 +; +; IR: ; Function Attrs: nounwind uwtable +; IR-NEXT: define i8* @polybench_alloc_data(i64 %n, i32 %elt_size) local_unnamed_addr #4 { +; IR-NEXT: entry: +; IR-NEXT: %new.i = alloca i8*, align 8 +; IR-NEXT: br label %entry.split +; +; IR: entry.split: ; preds = %entry +; IR-NEXT: %conv = sext i32 %elt_size to i64 +; IR-NEXT: %mul = mul i64 %conv, %n +; IR-NEXT: %0 = bitcast i8** %new.i to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %0) #7 +; IR-NEXT: store i8* null, i8** %new.i, align 8, !tbaa !5 +; IR-NEXT: %call.i = call i32 @posix_memalign(i8** nonnull %new.i, i64 32, i64 %mul) #7 +; IR-NEXT: %1 = load i8*, i8** %new.i, align 8, !tbaa !5 +; IR-NEXT: %tobool.i = icmp eq i8* %1, null +; IR-NEXT: %tobool1.i = icmp ne i32 %call.i, 0 +; IR-NEXT: %or.cond.i = or i1 %tobool1.i, %tobool.i +; IR-NEXT: br i1 %or.cond.i, label %if.then.i, label %xmalloc.exit +; +; IR: if.then.i: ; preds = %entry.split +; IR-NEXT: %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; IR-NEXT: %3 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %2) #9 +; IR-NEXT: call void @exit(i32 1) #10 +; IR-NEXT: unreachable +; +; IR: xmalloc.exit: ; preds = %entry.split +; IR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %0) #7 +; IR-NEXT: ret i8* %1 +; IR-NEXT: } +; +; IR: ; Function Attrs: nounwind uwtable +; IR-NEXT: define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #5 { +; IR-NEXT: entry: +; IR-NEXT: %conv19.i.s2a = alloca double +; IR-NEXT: %conv.i.s2a = alloca double +; IR-NEXT: %conv42.i.s2a = alloca double +; IR-NEXT: %new.i.i34 = alloca i8*, align 8 +; IR-NEXT: %new.i.i27 = alloca i8*, align 8 +; IR-NEXT: %new.i.i = alloca i8*, align 8 +; IR-NEXT: br label %entry.split +; +; IR: entry.split: ; preds = %entry +; IR-NEXT: %0 = bitcast i8** %new.i.i to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %0) #7 +; IR-NEXT: store i8* null, i8** %new.i.i, align 8, !tbaa !5 +; IR-NEXT: %call.i.i = call i32 @posix_memalign(i8** nonnull %new.i.i, i64 32, i64 8388608) #7 +; IR-NEXT: %1 = load i8*, i8** %new.i.i, align 8, !tbaa !5 +; IR-NEXT: %tobool.i.i = icmp eq i8* %1, null +; IR-NEXT: %tobool1.i.i = icmp ne i32 %call.i.i, 0 +; IR-NEXT: %or.cond.i.i = or i1 %tobool1.i.i, %tobool.i.i +; IR-NEXT: br i1 %or.cond.i.i, label %if.then.i.i, label %polybench_alloc_data.exit +; +; IR: if.then.i.i: ; preds = %entry.split +; IR-NEXT: %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; IR-NEXT: %3 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %2) #9 +; IR-NEXT: call void @exit(i32 1) #10 +; IR-NEXT: unreachable +; +; IR: polybench_alloc_data.exit: ; preds = %entry.split +; IR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %0) #7 +; IR-NEXT: %4 = bitcast i8** %new.i.i27 to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %4) #7 +; IR-NEXT: store i8* null, i8** %new.i.i27, align 8, !tbaa !5 +; IR-NEXT: %call.i.i28 = call i32 @posix_memalign(i8** nonnull %new.i.i27, i64 32, i64 8388608) #7 +; IR-NEXT: %5 = load i8*, i8** %new.i.i27, align 8, !tbaa !5 +; IR-NEXT: %tobool.i.i29 = icmp eq i8* %5, null +; IR-NEXT: %tobool1.i.i30 = icmp ne i32 %call.i.i28, 0 +; IR-NEXT: %or.cond.i.i31 = or i1 %tobool1.i.i30, %tobool.i.i29 +; IR-NEXT: br i1 %or.cond.i.i31, label %if.then.i.i32, label %polybench_alloc_data.exit33 +; +; IR: if.then.i.i32: ; preds = %polybench_alloc_data.exit +; IR-NEXT: %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; IR-NEXT: %7 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %6) #9 +; IR-NEXT: call void @exit(i32 1) #10 +; IR-NEXT: unreachable +; +; IR: polybench_alloc_data.exit33: ; preds = %polybench_alloc_data.exit +; IR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %4) #7 +; IR-NEXT: %8 = bitcast i8** %new.i.i34 to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %8) #7 +; IR-NEXT: store i8* null, i8** %new.i.i34, align 8, !tbaa !5 +; IR-NEXT: %call.i.i35 = call i32 @posix_memalign(i8** nonnull %new.i.i34, i64 32, i64 8388608) #7 +; IR-NEXT: %9 = load i8*, i8** %new.i.i34, align 8, !tbaa !5 +; IR-NEXT: %tobool.i.i36 = icmp eq i8* %9, null +; IR-NEXT: %tobool1.i.i37 = icmp ne i32 %call.i.i35, 0 +; IR-NEXT: %or.cond.i.i38 = or i1 %tobool1.i.i37, %tobool.i.i36 +; IR-NEXT: br i1 %or.cond.i.i38, label %if.then.i.i39, label %polybench_alloc_data.exit40 +; +; IR: if.then.i.i39: ; preds = %polybench_alloc_data.exit33 +; IR-NEXT: %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; IR-NEXT: %11 = call i64 @fwrite(i8* getelementptr inbounds ([51 x i8], [51 x i8]* @.str.1, i64 0, i64 0), i64 50, i64 1, %struct._IO_FILE* %10) #9 +; IR-NEXT: call void @exit(i32 1) #10 +; IR-NEXT: unreachable +; +; IR: polybench_alloc_data.exit40: ; preds = %polybench_alloc_data.exit33 +; IR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %8) #7 +; IR-NEXT: %arraydecay = bitcast i8* %1 to [1024 x double]* +; IR-NEXT: %arraydecay3 = bitcast i8* %5 to [1024 x double]* +; IR-NEXT: br label %polly.split_new_and_old +; +; IR: polly.split_new_and_old: ; preds = %polybench_alloc_data.exit40 +; IR-NEXT: %polly.access.cast. = bitcast i8* %9 to double* +; IR-NEXT: %polly.access.mul. = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; IR-NEXT: %polly.access.mul..obit = extractvalue { i64, i1 } %polly.access.mul., 1 +; IR-NEXT: %polly.overflow.state = or i1 false, %polly.access.mul..obit +; IR-NEXT: %polly.access.mul..res = extractvalue { i64, i1 } %polly.access.mul., 0 +; IR-NEXT: %polly.access.add. = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res, i64 1024) +; IR-NEXT: %polly.access.add..obit = extractvalue { i64, i1 } %polly.access.add., 1 +; IR-NEXT: %polly.overflow.state1 = or i1 %polly.overflow.state, %polly.access.add..obit +; IR-NEXT: %polly.access.add..res = extractvalue { i64, i1 } %polly.access.add., 0 +; IR-NEXT: %polly.access. = getelementptr double, double* %polly.access.cast., i64 %polly.access.add..res +; IR-NEXT: %polly.access.cast.2 = bitcast i8* %5 to double* +; IR-NEXT: %polly.access.mul.3 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; IR-NEXT: %polly.access.mul..obit4 = extractvalue { i64, i1 } %polly.access.mul.3, 1 +; IR-NEXT: %polly.overflow.state5 = or i1 %polly.overflow.state1, %polly.access.mul..obit4 +; IR-NEXT: %polly.access.mul..res6 = extractvalue { i64, i1 } %polly.access.mul.3, 0 +; IR-NEXT: %polly.access.add.7 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res6, i64 0) +; IR-NEXT: %polly.access.add..obit8 = extractvalue { i64, i1 } %polly.access.add.7, 1 +; IR-NEXT: %polly.overflow.state9 = or i1 %polly.overflow.state5, %polly.access.add..obit8 +; IR-NEXT: %polly.access.add..res10 = extractvalue { i64, i1 } %polly.access.add.7, 0 +; IR-NEXT: %polly.access.11 = getelementptr double, double* %polly.access.cast.2, i64 %polly.access.add..res10 +; IR-NEXT: %12 = ptrtoint double* %polly.access. to i64 +; IR-NEXT: %13 = ptrtoint double* %polly.access.11 to i64 +; IR-NEXT: %14 = icmp ule i64 %12, %13 +; IR-NEXT: %polly.access.cast.12 = bitcast i8* %5 to double* +; IR-NEXT: %polly.access.mul.13 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; IR-NEXT: %polly.access.mul..obit14 = extractvalue { i64, i1 } %polly.access.mul.13, 1 +; IR-NEXT: %polly.overflow.state15 = or i1 %polly.overflow.state9, %polly.access.mul..obit14 +; IR-NEXT: %polly.access.mul..res16 = extractvalue { i64, i1 } %polly.access.mul.13, 0 +; IR-NEXT: %polly.access.add.17 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res16, i64 1024) +; IR-NEXT: %polly.access.add..obit18 = extractvalue { i64, i1 } %polly.access.add.17, 1 +; IR-NEXT: %polly.overflow.state19 = or i1 %polly.overflow.state15, %polly.access.add..obit18 +; IR-NEXT: %polly.access.add..res20 = extractvalue { i64, i1 } %polly.access.add.17, 0 +; IR-NEXT: %polly.access.21 = getelementptr double, double* %polly.access.cast.12, i64 %polly.access.add..res20 +; IR-NEXT: %polly.access.cast.22 = bitcast i8* %9 to double* +; IR-NEXT: %polly.access.mul.23 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; IR-NEXT: %polly.access.mul..obit24 = extractvalue { i64, i1 } %polly.access.mul.23, 1 +; IR-NEXT: %polly.overflow.state25 = or i1 %polly.overflow.state19, %polly.access.mul..obit24 +; IR-NEXT: %polly.access.mul..res26 = extractvalue { i64, i1 } %polly.access.mul.23, 0 +; IR-NEXT: %polly.access.add.27 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res26, i64 0) +; IR-NEXT: %polly.access.add..obit28 = extractvalue { i64, i1 } %polly.access.add.27, 1 +; IR-NEXT: %polly.overflow.state29 = or i1 %polly.overflow.state25, %polly.access.add..obit28 +; IR-NEXT: %polly.access.add..res30 = extractvalue { i64, i1 } %polly.access.add.27, 0 +; IR-NEXT: %polly.access.31 = getelementptr double, double* %polly.access.cast.22, i64 %polly.access.add..res30 +; IR-NEXT: %15 = ptrtoint double* %polly.access.21 to i64 +; IR-NEXT: %16 = ptrtoint double* %polly.access.31 to i64 +; IR-NEXT: %17 = icmp ule i64 %15, %16 +; IR-NEXT: %18 = or i1 %14, %17 +; IR-NEXT: %19 = and i1 true, %18 +; IR-NEXT: %polly.access.cast.32 = bitcast i8* %1 to double* +; IR-NEXT: %polly.access.mul.33 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; IR-NEXT: %polly.access.mul..obit34 = extractvalue { i64, i1 } %polly.access.mul.33, 1 +; IR-NEXT: %polly.overflow.state35 = or i1 %polly.overflow.state29, %polly.access.mul..obit34 +; IR-NEXT: %polly.access.mul..res36 = extractvalue { i64, i1 } %polly.access.mul.33, 0 +; IR-NEXT: %polly.access.add.37 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res36, i64 1024) +; IR-NEXT: %polly.access.add..obit38 = extractvalue { i64, i1 } %polly.access.add.37, 1 +; IR-NEXT: %polly.overflow.state39 = or i1 %polly.overflow.state35, %polly.access.add..obit38 +; IR-NEXT: %polly.access.add..res40 = extractvalue { i64, i1 } %polly.access.add.37, 0 +; IR-NEXT: %polly.access.41 = getelementptr double, double* %polly.access.cast.32, i64 %polly.access.add..res40 +; IR-NEXT: %polly.access.cast.42 = bitcast i8* %5 to double* +; IR-NEXT: %polly.access.mul.43 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; IR-NEXT: %polly.access.mul..obit44 = extractvalue { i64, i1 } %polly.access.mul.43, 1 +; IR-NEXT: %polly.overflow.state45 = or i1 %polly.overflow.state39, %polly.access.mul..obit44 +; IR-NEXT: %polly.access.mul..res46 = extractvalue { i64, i1 } %polly.access.mul.43, 0 +; IR-NEXT: %polly.access.add.47 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res46, i64 0) +; IR-NEXT: %polly.access.add..obit48 = extractvalue { i64, i1 } %polly.access.add.47, 1 +; IR-NEXT: %polly.overflow.state49 = or i1 %polly.overflow.state45, %polly.access.add..obit48 +; IR-NEXT: %polly.access.add..res50 = extractvalue { i64, i1 } %polly.access.add.47, 0 +; IR-NEXT: %polly.access.51 = getelementptr double, double* %polly.access.cast.42, i64 %polly.access.add..res50 +; IR-NEXT: %20 = ptrtoint double* %polly.access.41 to i64 +; IR-NEXT: %21 = ptrtoint double* %polly.access.51 to i64 +; IR-NEXT: %22 = icmp ule i64 %20, %21 +; IR-NEXT: %polly.access.cast.52 = bitcast i8* %5 to double* +; IR-NEXT: %polly.access.mul.53 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; IR-NEXT: %polly.access.mul..obit54 = extractvalue { i64, i1 } %polly.access.mul.53, 1 +; IR-NEXT: %polly.overflow.state55 = or i1 %polly.overflow.state49, %polly.access.mul..obit54 +; IR-NEXT: %polly.access.mul..res56 = extractvalue { i64, i1 } %polly.access.mul.53, 0 +; IR-NEXT: %polly.access.add.57 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res56, i64 1024) +; IR-NEXT: %polly.access.add..obit58 = extractvalue { i64, i1 } %polly.access.add.57, 1 +; IR-NEXT: %polly.overflow.state59 = or i1 %polly.overflow.state55, %polly.access.add..obit58 +; IR-NEXT: %polly.access.add..res60 = extractvalue { i64, i1 } %polly.access.add.57, 0 +; IR-NEXT: %polly.access.61 = getelementptr double, double* %polly.access.cast.52, i64 %polly.access.add..res60 +; IR-NEXT: %polly.access.cast.62 = bitcast i8* %1 to double* +; IR-NEXT: %polly.access.mul.63 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; IR-NEXT: %polly.access.mul..obit64 = extractvalue { i64, i1 } %polly.access.mul.63, 1 +; IR-NEXT: %polly.overflow.state65 = or i1 %polly.overflow.state59, %polly.access.mul..obit64 +; IR-NEXT: %polly.access.mul..res66 = extractvalue { i64, i1 } %polly.access.mul.63, 0 +; IR-NEXT: %polly.access.add.67 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res66, i64 0) +; IR-NEXT: %polly.access.add..obit68 = extractvalue { i64, i1 } %polly.access.add.67, 1 +; IR-NEXT: %polly.overflow.state69 = or i1 %polly.overflow.state65, %polly.access.add..obit68 +; IR-NEXT: %polly.access.add..res70 = extractvalue { i64, i1 } %polly.access.add.67, 0 +; IR-NEXT: %polly.access.71 = getelementptr double, double* %polly.access.cast.62, i64 %polly.access.add..res70 +; IR-NEXT: %23 = ptrtoint double* %polly.access.61 to i64 +; IR-NEXT: %24 = ptrtoint double* %polly.access.71 to i64 +; IR-NEXT: %25 = icmp ule i64 %23, %24 +; IR-NEXT: %26 = or i1 %22, %25 +; IR-NEXT: %27 = and i1 %19, %26 +; IR-NEXT: %polly.access.cast.72 = bitcast i8* %1 to double* +; IR-NEXT: %polly.access.mul.73 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; IR-NEXT: %polly.access.mul..obit74 = extractvalue { i64, i1 } %polly.access.mul.73, 1 +; IR-NEXT: %polly.overflow.state75 = or i1 %polly.overflow.state69, %polly.access.mul..obit74 +; IR-NEXT: %polly.access.mul..res76 = extractvalue { i64, i1 } %polly.access.mul.73, 0 +; IR-NEXT: %polly.access.add.77 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res76, i64 1024) +; IR-NEXT: %polly.access.add..obit78 = extractvalue { i64, i1 } %polly.access.add.77, 1 +; IR-NEXT: %polly.overflow.state79 = or i1 %polly.overflow.state75, %polly.access.add..obit78 +; IR-NEXT: %polly.access.add..res80 = extractvalue { i64, i1 } %polly.access.add.77, 0 +; IR-NEXT: %polly.access.81 = getelementptr double, double* %polly.access.cast.72, i64 %polly.access.add..res80 +; IR-NEXT: %polly.access.cast.82 = bitcast i8* %9 to double* +; IR-NEXT: %polly.access.mul.83 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; IR-NEXT: %polly.access.mul..obit84 = extractvalue { i64, i1 } %polly.access.mul.83, 1 +; IR-NEXT: %polly.overflow.state85 = or i1 %polly.overflow.state79, %polly.access.mul..obit84 +; IR-NEXT: %polly.access.mul..res86 = extractvalue { i64, i1 } %polly.access.mul.83, 0 +; IR-NEXT: %polly.access.add.87 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res86, i64 0) +; IR-NEXT: %polly.access.add..obit88 = extractvalue { i64, i1 } %polly.access.add.87, 1 +; IR-NEXT: %polly.overflow.state89 = or i1 %polly.overflow.state85, %polly.access.add..obit88 +; IR-NEXT: %polly.access.add..res90 = extractvalue { i64, i1 } %polly.access.add.87, 0 +; IR-NEXT: %polly.access.91 = getelementptr double, double* %polly.access.cast.82, i64 %polly.access.add..res90 +; IR-NEXT: %28 = ptrtoint double* %polly.access.81 to i64 +; IR-NEXT: %29 = ptrtoint double* %polly.access.91 to i64 +; IR-NEXT: %30 = icmp ule i64 %28, %29 +; IR-NEXT: %polly.access.cast.92 = bitcast i8* %9 to double* +; IR-NEXT: %polly.access.mul.93 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1023, i64 1024) +; IR-NEXT: %polly.access.mul..obit94 = extractvalue { i64, i1 } %polly.access.mul.93, 1 +; IR-NEXT: %polly.overflow.state95 = or i1 %polly.overflow.state89, %polly.access.mul..obit94 +; IR-NEXT: %polly.access.mul..res96 = extractvalue { i64, i1 } %polly.access.mul.93, 0 +; IR-NEXT: %polly.access.add.97 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res96, i64 1024) +; IR-NEXT: %polly.access.add..obit98 = extractvalue { i64, i1 } %polly.access.add.97, 1 +; IR-NEXT: %polly.overflow.state99 = or i1 %polly.overflow.state95, %polly.access.add..obit98 +; IR-NEXT: %polly.access.add..res100 = extractvalue { i64, i1 } %polly.access.add.97, 0 +; IR-NEXT: %polly.access.101 = getelementptr double, double* %polly.access.cast.92, i64 %polly.access.add..res100 +; IR-NEXT: %polly.access.cast.102 = bitcast i8* %1 to double* +; IR-NEXT: %polly.access.mul.103 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 1024) +; IR-NEXT: %polly.access.mul..obit104 = extractvalue { i64, i1 } %polly.access.mul.103, 1 +; IR-NEXT: %polly.overflow.state105 = or i1 %polly.overflow.state99, %polly.access.mul..obit104 +; IR-NEXT: %polly.access.mul..res106 = extractvalue { i64, i1 } %polly.access.mul.103, 0 +; IR-NEXT: %polly.access.add.107 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res106, i64 0) +; IR-NEXT: %polly.access.add..obit108 = extractvalue { i64, i1 } %polly.access.add.107, 1 +; IR-NEXT: %polly.overflow.state109 = or i1 %polly.overflow.state105, %polly.access.add..obit108 +; IR-NEXT: %polly.access.add..res110 = extractvalue { i64, i1 } %polly.access.add.107, 0 +; IR-NEXT: %polly.access.111 = getelementptr double, double* %polly.access.cast.102, i64 %polly.access.add..res110 +; IR-NEXT: %31 = ptrtoint double* %polly.access.101 to i64 +; IR-NEXT: %32 = ptrtoint double* %polly.access.111 to i64 +; IR-NEXT: %33 = icmp ule i64 %31, %32 +; IR-NEXT: %34 = or i1 %30, %33 +; IR-NEXT: %35 = and i1 %27, %34 +; IR-NEXT: %polly.rtc.overflown = xor i1 %polly.overflow.state109, true +; IR-NEXT: %polly.rtc.result = and i1 %35, %polly.rtc.overflown +; IR-NEXT: br i1 %polly.rtc.result, label %polly.start, label %for.cond1.preheader.i +; +; IR: for.cond1.preheader.i: ; preds = %polly.split_new_and_old, %for.inc8.i +; IR-NEXT: %indvars.iv19.i = phi i64 [ %indvars.iv.next20.i, %for.inc8.i ], [ 0, %polly.split_new_and_old ] +; IR-NEXT: %36 = trunc i64 %indvars.iv19.i to i32 +; IR-NEXT: %conv.i = sitofp i32 %36 to double +; IR-NEXT: br label %for.body3.i +; +; IR: for.body3.i: ; preds = %for.body3.i, %for.cond1.preheader.i +; IR-NEXT: %indvars.iv16.i = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next17.i, %for.body3.i ] +; IR-NEXT: %37 = trunc i64 %indvars.iv16.i to i32 +; IR-NEXT: %conv4.i = sitofp i32 %37 to double +; IR-NEXT: %mul.i = fmul double %conv.i, %conv4.i +; IR-NEXT: %div.i = fmul double %mul.i, 9.765625e-04 +; IR-NEXT: %arrayidx7.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv19.i, i64 %indvars.iv16.i +; IR-NEXT: store double %div.i, double* %arrayidx7.i, align 8, !tbaa !1 +; IR-NEXT: %indvars.iv.next17.i = add nuw nsw i64 %indvars.iv16.i, 1 +; IR-NEXT: %exitcond18.i = icmp eq i64 %indvars.iv.next17.i, 1024 +; IR-NEXT: br i1 %exitcond18.i, label %for.inc8.i, label %for.body3.i +; +; IR: for.inc8.i: ; preds = %for.body3.i +; IR-NEXT: %indvars.iv.next20.i = add nuw nsw i64 %indvars.iv19.i, 1 +; IR-NEXT: %exitcond21.i = icmp eq i64 %indvars.iv.next20.i, 1024 +; IR-NEXT: br i1 %exitcond21.i, label %for.cond15.preheader.i.preheader, label %for.cond1.preheader.i +; +; IR: for.cond15.preheader.i.preheader: ; preds = %for.inc8.i +; IR-NEXT: %arraydecay4 = bitcast i8* %9 to [1024 x double]* +; IR-NEXT: br label %for.cond15.preheader.i +; +; IR: for.cond15.preheader.i: ; preds = %for.inc31.i, %for.cond15.preheader.i.preheader +; IR-NEXT: %indvars.iv13.i = phi i64 [ %indvars.iv.next14.i, %for.inc31.i ], [ 0, %for.cond15.preheader.i.preheader ] +; IR-NEXT: %38 = trunc i64 %indvars.iv13.i to i32 +; IR-NEXT: %conv19.i = sitofp i32 %38 to double +; IR-NEXT: br label %for.body18.i +; +; IR: for.body18.i: ; preds = %for.body18.i, %for.cond15.preheader.i +; IR-NEXT: %indvars.iv10.i = phi i64 [ 0, %for.cond15.preheader.i ], [ %indvars.iv.next11.i, %for.body18.i ] +; IR-NEXT: %39 = trunc i64 %indvars.iv10.i to i32 +; IR-NEXT: %conv20.i = sitofp i32 %39 to double +; IR-NEXT: %mul21.i = fmul double %conv19.i, %conv20.i +; IR-NEXT: %div23.i = fmul double %mul21.i, 9.765625e-04 +; IR-NEXT: %arrayidx27.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay3, i64 %indvars.iv13.i, i64 %indvars.iv10.i +; IR-NEXT: store double %div23.i, double* %arrayidx27.i, align 8, !tbaa !1 +; IR-NEXT: %indvars.iv.next11.i = add nuw nsw i64 %indvars.iv10.i, 1 +; IR-NEXT: %exitcond12.i = icmp eq i64 %indvars.iv.next11.i, 1024 +; IR-NEXT: br i1 %exitcond12.i, label %for.inc31.i, label %for.body18.i +; +; IR: for.inc31.i: ; preds = %for.body18.i +; IR-NEXT: %indvars.iv.next14.i = add nuw nsw i64 %indvars.iv13.i, 1 +; IR-NEXT: %exitcond15.i = icmp eq i64 %indvars.iv.next14.i, 1024 +; IR-NEXT: br i1 %exitcond15.i, label %for.cond38.preheader.i, label %for.cond15.preheader.i +; +; IR: for.cond38.preheader.i: ; preds = %for.inc54.i, %for.inc31.i +; IR-NEXT: %indvars.iv7.i = phi i64 [ %indvars.iv.next8.i, %for.inc54.i ], [ 0, %for.inc31.i ] +; IR-NEXT: %40 = trunc i64 %indvars.iv7.i to i32 +; IR-NEXT: %conv42.i = sitofp i32 %40 to double +; IR-NEXT: br label %for.body41.i +; +; IR: for.body41.i: ; preds = %for.body41.i, %for.cond38.preheader.i +; IR-NEXT: %indvars.iv.i = phi i64 [ 0, %for.cond38.preheader.i ], [ %indvars.iv.next.i, %for.body41.i ] +; IR-NEXT: %41 = trunc i64 %indvars.iv.i to i32 +; IR-NEXT: %conv43.i = sitofp i32 %41 to double +; IR-NEXT: %mul44.i = fmul double %conv42.i, %conv43.i +; IR-NEXT: %div46.i = fmul double %mul44.i, 9.765625e-04 +; IR-NEXT: %arrayidx50.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay4, i64 %indvars.iv7.i, i64 %indvars.iv.i +; IR-NEXT: store double %div46.i, double* %arrayidx50.i, align 8, !tbaa !1 +; IR-NEXT: %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 +; IR-NEXT: %exitcond.i = icmp eq i64 %indvars.iv.next.i, 1024 +; IR-NEXT: br i1 %exitcond.i, label %for.inc54.i, label %for.body41.i +; +; IR: for.inc54.i: ; preds = %for.body41.i +; IR-NEXT: %indvars.iv.next8.i = add nuw nsw i64 %indvars.iv7.i, 1 +; IR-NEXT: %exitcond9.i = icmp eq i64 %indvars.iv.next8.i, 1024 +; IR-NEXT: br i1 %exitcond9.i, label %for.cond1.preheader.i42, label %for.cond38.preheader.i +; +; IR: for.cond1.preheader.i42: ; preds = %for.inc26.i, %for.inc54.i +; IR-NEXT: %indvars.iv7.i41 = phi i64 [ %indvars.iv.next8.i48, %for.inc26.i ], [ 0, %for.inc54.i ] +; IR-NEXT: br label %for.body3.i44 +; +; IR: for.body3.i44: ; preds = %for.inc23.i, %for.cond1.preheader.i42 +; IR-NEXT: %indvars.iv4.i = phi i64 [ 0, %for.cond1.preheader.i42 ], [ %indvars.iv.next5.i, %for.inc23.i ] +; IR-NEXT: %arrayidx5.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv7.i41, i64 %indvars.iv4.i +; IR-NEXT: %42 = load double, double* %arrayidx5.i, align 8, !tbaa !1 +; IR-NEXT: %mul.i43 = fmul double %42, 2.123000e+03 +; IR-NEXT: store double %mul.i43, double* %arrayidx5.i, align 8, !tbaa !1 +; IR-NEXT: br label %for.body8.i +; +; IR: for.body8.i: ; preds = %for.body8.i, %for.body3.i44 +; IR-NEXT: %43 = phi double [ %mul.i43, %for.body3.i44 ], [ %add.i, %for.body8.i ] +; IR-NEXT: %indvars.iv.i45 = phi i64 [ 0, %for.body3.i44 ], [ %indvars.iv.next.i46, %for.body8.i ] +; IR-NEXT: %arrayidx12.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay3, i64 %indvars.iv7.i41, i64 %indvars.iv.i45 +; IR-NEXT: %44 = load double, double* %arrayidx12.i, align 8, !tbaa !1 +; IR-NEXT: %mul13.i = fmul double %44, 3.241200e+04 +; IR-NEXT: %arrayidx17.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay4, i64 %indvars.iv.i45, i64 %indvars.iv4.i +; IR-NEXT: %45 = load double, double* %arrayidx17.i, align 8, !tbaa !1 +; IR-NEXT: %mul18.i = fmul double %mul13.i, %45 +; IR-NEXT: %add.i = fadd double %43, %mul18.i +; IR-NEXT: store double %add.i, double* %arrayidx5.i, align 8, !tbaa !1 +; IR-NEXT: %indvars.iv.next.i46 = add nuw nsw i64 %indvars.iv.i45, 1 +; IR-NEXT: %exitcond.i47 = icmp eq i64 %indvars.iv.next.i46, 1024 +; IR-NEXT: br i1 %exitcond.i47, label %for.inc23.i, label %for.body8.i +; +; IR: for.inc23.i: ; preds = %for.body8.i +; IR-NEXT: %indvars.iv.next5.i = add nuw nsw i64 %indvars.iv4.i, 1 +; IR-NEXT: %exitcond6.i = icmp eq i64 %indvars.iv.next5.i, 1024 +; IR-NEXT: br i1 %exitcond6.i, label %for.inc26.i, label %for.body3.i44 +; +; IR: for.inc26.i: ; preds = %for.inc23.i +; IR-NEXT: %indvars.iv.next8.i48 = add nuw nsw i64 %indvars.iv7.i41, 1 +; IR-NEXT: %exitcond9.i49 = icmp eq i64 %indvars.iv.next8.i48, 1024 +; IR-NEXT: br i1 %exitcond9.i49, label %polly.merge_new_and_old, label %for.cond1.preheader.i42 +; +; IR: polly.merge_new_and_old: ; preds = %polly.exiting, %for.inc26.i +; IR-NEXT: br label %kernel_gemm.exit +; +; IR: kernel_gemm.exit: ; preds = %polly.merge_new_and_old +; IR-NEXT: %call.i = call noalias i8* @malloc(i64 16385) #7 +; IR-NEXT: %arrayidx.i = getelementptr inbounds i8, i8* %call.i, i64 16384 +; IR-NEXT: store i8 0, i8* %arrayidx.i, align 1, !tbaa !7 +; IR-NEXT: %scevgep247 = getelementptr i8, i8* %call.i, i64 1 +; IR-NEXT: %scevgep249 = getelementptr i8, i8* %call.i, i64 2 +; IR-NEXT: %scevgep251 = getelementptr i8, i8* %call.i, i64 3 +; IR-NEXT: %scevgep253 = getelementptr i8, i8* %call.i, i64 4 +; IR-NEXT: %scevgep255 = getelementptr i8, i8* %call.i, i64 5 +; IR-NEXT: %scevgep257 = getelementptr i8, i8* %call.i, i64 6 +; IR-NEXT: %scevgep259 = getelementptr i8, i8* %call.i, i64 7 +; IR-NEXT: %scevgep261 = getelementptr i8, i8* %call.i, i64 8 +; IR-NEXT: %scevgep263 = getelementptr i8, i8* %call.i, i64 9 +; IR-NEXT: %scevgep265 = getelementptr i8, i8* %call.i, i64 10 +; IR-NEXT: %scevgep267 = getelementptr i8, i8* %call.i, i64 11 +; IR-NEXT: %scevgep269 = getelementptr i8, i8* %call.i, i64 12 +; IR-NEXT: %scevgep271 = getelementptr i8, i8* %call.i, i64 13 +; IR-NEXT: %scevgep273 = getelementptr i8, i8* %call.i, i64 14 +; IR-NEXT: %scevgep275 = getelementptr i8, i8* %call.i, i64 15 +; IR-NEXT: br label %for.cond3.preheader.i +; +; IR: for.cond3.preheader.i: ; preds = %for.end.i, %kernel_gemm.exit +; IR-NEXT: %indvars.iv4.i50 = phi i64 [ 0, %kernel_gemm.exit ], [ %indvars.iv.next5.i54, %for.end.i ] +; IR-NEXT: %46 = shl i64 %indvars.iv4.i50, 0 +; IR-NEXT: %47 = add i64 %46, 0 +; IR-NEXT: br label %polly.split_new_and_old232 +; +; IR: polly.split_new_and_old232: ; preds = %for.cond3.preheader.i +; IR-NEXT: br i1 true, label %polly.start234, label %for.body6.i +; +; IR: for.body6.i: ; preds = %polly.split_new_and_old232, %for.body6.i +; IR-NEXT: %indvars.iv.i51 = phi i64 [ %indvars.iv.next.i52, %for.body6.i ], [ 0, %polly.split_new_and_old232 ] +; IR-NEXT: %arrayidx10.i = getelementptr inbounds [1024 x double], [1024 x double]* %arraydecay, i64 %indvars.iv4.i50, i64 %indvars.iv.i51 +; IR-NEXT: %48 = bitcast double* %arrayidx10.i to i64* +; IR-NEXT: %49 = load i64, i64* %48, align 8, !tbaa !1 +; IR-NEXT: %50 = shl nsw i64 %indvars.iv.i51, 4 +; IR-NEXT: %block.sroa.0.0.extract.trunc138.i.i = trunc i64 %49 to i8 +; IR-NEXT: %and.i.i = and i8 %block.sroa.0.0.extract.trunc138.i.i, 15 +; IR-NEXT: %add.i.i = or i8 %and.i.i, 48 +; IR-NEXT: %add.ptr.i.i = getelementptr inbounds i8, i8* %call.i, i64 %50 +; IR-NEXT: store i8 %add.i.i, i8* %add.ptr.i.i, align 1, !tbaa !7 +; IR-NEXT: %add.ptr10.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 1 +; IR-NEXT: store i8 %add.i.i, i8* %add.ptr10.i.i, align 1, !tbaa !7 +; IR-NEXT: %block.sroa.0.1.extract.shift.i.i = lshr i64 %49, 8 +; IR-NEXT: %conv13195.i.i = trunc i64 %block.sroa.0.1.extract.shift.i.i to i8 +; IR-NEXT: %and14.i.i = and i8 %conv13195.i.i, 15 +; IR-NEXT: %add15.i.i = or i8 %and14.i.i, 48 +; IR-NEXT: %add.ptr19.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 2 +; IR-NEXT: store i8 %add15.i.i, i8* %add.ptr19.i.i, align 1, !tbaa !7 +; IR-NEXT: %add.ptr28.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 3 +; IR-NEXT: store i8 %add15.i.i, i8* %add.ptr28.i.i, align 1, !tbaa !7 +; IR-NEXT: %block.sroa.0.2.extract.shift.i.i = lshr i64 %49, 16 +; IR-NEXT: %conv31201.i.i = trunc i64 %block.sroa.0.2.extract.shift.i.i to i8 +; IR-NEXT: %and32.i.i = and i8 %conv31201.i.i, 15 +; IR-NEXT: %add33.i.i = or i8 %and32.i.i, 48 +; IR-NEXT: %add.ptr37.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 4 +; IR-NEXT: store i8 %add33.i.i, i8* %add.ptr37.i.i, align 1, !tbaa !7 +; IR-NEXT: %add.ptr46.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 5 +; IR-NEXT: store i8 %add33.i.i, i8* %add.ptr46.i.i, align 1, !tbaa !7 +; IR-NEXT: %.tr.i.i = trunc i64 %49 to i32 +; IR-NEXT: %sext204207.i.i = lshr i32 %.tr.i.i, 24 +; IR-NEXT: %and50.i.i = and i32 %sext204207.i.i, 15 +; IR-NEXT: %add51.i.i = or i32 %and50.i.i, 48 +; IR-NEXT: %conv52.i.i = trunc i32 %add51.i.i to i8 +; IR-NEXT: %add.ptr55.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 6 +; IR-NEXT: store i8 %conv52.i.i, i8* %add.ptr55.i.i, align 1, !tbaa !7 +; IR-NEXT: %add.ptr64.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 7 +; IR-NEXT: store i8 %conv52.i.i, i8* %add.ptr64.i.i, align 1, !tbaa !7 +; IR-NEXT: %block.sroa.0.4.extract.shift.i.i = lshr i64 %49, 32 +; IR-NEXT: %conv67211.i.i = trunc i64 %block.sroa.0.4.extract.shift.i.i to i8 +; IR-NEXT: %and68.i.i = and i8 %conv67211.i.i, 15 +; IR-NEXT: %add69.i.i = or i8 %and68.i.i, 48 +; IR-NEXT: %add.ptr73.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 8 +; IR-NEXT: store i8 %add69.i.i, i8* %add.ptr73.i.i, align 1, !tbaa !7 +; IR-NEXT: %add.ptr82.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 9 +; IR-NEXT: store i8 %add69.i.i, i8* %add.ptr82.i.i, align 1, !tbaa !7 +; IR-NEXT: %block.sroa.0.5.extract.shift.i.i = lshr i64 %49, 40 +; IR-NEXT: %conv85217.i.i = trunc i64 %block.sroa.0.5.extract.shift.i.i to i8 +; IR-NEXT: %and86.i.i = and i8 %conv85217.i.i, 15 +; IR-NEXT: %add87.i.i = or i8 %and86.i.i, 48 +; IR-NEXT: %add.ptr91.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 10 +; IR-NEXT: store i8 %add87.i.i, i8* %add.ptr91.i.i, align 1, !tbaa !7 +; IR-NEXT: %add.ptr100.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 11 +; IR-NEXT: store i8 %add87.i.i, i8* %add.ptr100.i.i, align 1, !tbaa !7 +; IR-NEXT: %block.sroa.0.6.extract.shift.i.i = lshr i64 %49, 48 +; IR-NEXT: %conv103223.i.i = trunc i64 %block.sroa.0.6.extract.shift.i.i to i8 +; IR-NEXT: %and104.i.i = and i8 %conv103223.i.i, 15 +; IR-NEXT: %add105.i.i = or i8 %and104.i.i, 48 +; IR-NEXT: %add.ptr109.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 12 +; IR-NEXT: store i8 %add105.i.i, i8* %add.ptr109.i.i, align 1, !tbaa !7 +; IR-NEXT: %add.ptr118.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 13 +; IR-NEXT: store i8 %add105.i.i, i8* %add.ptr118.i.i, align 1, !tbaa !7 +; IR-NEXT: %block.sroa.0.7.extract.shift.i.i = lshr i64 %49, 56 +; IR-NEXT: %conv121229.i.i = trunc i64 %block.sroa.0.7.extract.shift.i.i to i8 +; IR-NEXT: %and122.i.i = and i8 %conv121229.i.i, 15 +; IR-NEXT: %add123.i.i = or i8 %and122.i.i, 48 +; IR-NEXT: %add.ptr127.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 14 +; IR-NEXT: store i8 %add123.i.i, i8* %add.ptr127.i.i, align 1, !tbaa !7 +; IR-NEXT: %add.ptr136.i.i = getelementptr inbounds i8, i8* %add.ptr.i.i, i64 15 +; IR-NEXT: store i8 %add123.i.i, i8* %add.ptr136.i.i, align 1, !tbaa !7 +; IR-NEXT: %indvars.iv.next.i52 = add nuw nsw i64 %indvars.iv.i51, 1 +; IR-NEXT: %exitcond.i53 = icmp eq i64 %indvars.iv.next.i52, 1024 +; IR-NEXT: br i1 %exitcond.i53, label %polly.merge_new_and_old233, label %for.body6.i +; +; IR: polly.merge_new_and_old233: ; preds = %polly.exiting235, %for.body6.i +; IR-NEXT: br label %for.end.i +; +; IR: for.end.i: ; preds = %polly.merge_new_and_old233 +; IR-NEXT: %51 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8, !tbaa !5 +; IR-NEXT: %call12.i = call i32 @fputs(i8* nonnull %call.i, %struct._IO_FILE* %51) #9 +; IR-NEXT: %indvars.iv.next5.i54 = add nuw nsw i64 %indvars.iv4.i50, 1 +; IR-NEXT: %exitcond6.i55 = icmp eq i64 %indvars.iv.next5.i54, 1024 +; IR-NEXT: br i1 %exitcond6.i55, label %print_array.exit, label %for.cond3.preheader.i +; +; IR: print_array.exit: ; preds = %for.end.i +; IR-NEXT: call void @free(i8* nonnull %call.i) #7 +; IR-NEXT: call void @free(i8* nonnull %1) #7 +; IR-NEXT: call void @free(i8* %5) #7 +; IR-NEXT: call void @free(i8* %9) #7 +; IR-NEXT: ret i32 0 +; +; IR: polly.start: ; preds = %polly.split_new_and_old +; IR-NEXT: br label %polly.loop_preheader +; +; IR: polly.loop_exit: ; preds = %polly.loop_exit114 +; IR-NEXT: br label %polly.loop_preheader121 +; +; IR: polly.loop_exit122: ; preds = %polly.loop_exit128 +; IR-NEXT: br label %polly.loop_preheader136 +; +; IR: polly.loop_exit137: ; preds = %polly.loop_exit143 +; IR-NEXT: br label %polly.loop_preheader166 +; +; IR: polly.loop_exit167: ; preds = %polly.loop_exit173 +; IR-NEXT: br label %polly.loop_preheader181 +; +; IR: polly.loop_exit182: ; preds = %polly.loop_exit188 +; IR-NEXT: br label %polly.exiting +; +; IR: polly.exiting: ; preds = %polly.loop_exit182 +; IR-NEXT: br label %polly.merge_new_and_old +; +; IR: polly.loop_header: ; preds = %polly.loop_exit114, %polly.loop_preheader +; IR-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.loop_exit114 ] +; IR-NEXT: br label %polly.stmt.for.cond38.preheader.i +; +; IR: polly.stmt.for.cond38.preheader.i: ; preds = %polly.loop_header +; IR-NEXT: %52 = trunc i64 %polly.indvar to i32 +; IR-NEXT: %p_conv42.i = sitofp i32 %52 to double +; IR-NEXT: store double %p_conv42.i, double* %conv42.i.s2a +; IR-NEXT: br label %polly.loop_preheader113 +; +; IR: polly.loop_exit114: ; preds = %polly.stmt.for.body41.i +; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 +; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 1022 +; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit +; +; IR: polly.loop_preheader: ; preds = %polly.start +; IR-NEXT: br label %polly.loop_header +; +; IR: polly.loop_header112: ; preds = %polly.stmt.for.body41.i, %polly.loop_preheader113 +; IR-NEXT: %polly.indvar115 = phi i64 [ 0, %polly.loop_preheader113 ], [ %polly.indvar_next116, %polly.stmt.for.body41.i ] +; IR-NEXT: br label %polly.stmt.for.body41.i +; +; IR: polly.stmt.for.body41.i: ; preds = %polly.loop_header112 +; IR-NEXT: %conv42.i.s2a.reload = load double, double* %conv42.i.s2a +; IR-NEXT: %53 = trunc i64 %polly.indvar115 to i32 +; IR-NEXT: %p_conv43.i = sitofp i32 %53 to double +; IR-NEXT: %p_mul44.i = fmul double %conv42.i.s2a.reload, %p_conv43.i +; IR-NEXT: %p_div46.i = fmul double %p_mul44.i, 9.765625e-04 +; IR-NEXT: %54 = shl i64 %polly.indvar115, 3 +; IR-NEXT: %scevgep118 = getelementptr i8, i8* %scevgep, i64 %54 +; IR-NEXT: %scevgep118119 = bitcast i8* %scevgep118 to double* +; IR-NEXT: store double %p_div46.i, double* %scevgep118119, align 8, !alias.scope !8, !noalias !10 +; IR-NEXT: %polly.indvar_next116 = add nsw i64 %polly.indvar115, 1 +; IR-NEXT: %polly.loop_cond117 = icmp sle i64 %polly.indvar115, 1022 +; IR-NEXT: br i1 %polly.loop_cond117, label %polly.loop_header112, label %polly.loop_exit114 +; +; IR: polly.loop_preheader113: ; preds = %polly.stmt.for.cond38.preheader.i +; IR-NEXT: %55 = shl i64 %polly.indvar, 13 +; IR-NEXT: %scevgep = getelementptr i8, i8* %9, i64 %55 +; IR-NEXT: br label %polly.loop_header112 +; +; IR: polly.loop_header120: ; preds = %polly.loop_exit128, %polly.loop_preheader121 +; IR-NEXT: %polly.indvar123 = phi i64 [ 0, %polly.loop_preheader121 ], [ %polly.indvar_next124, %polly.loop_exit128 ] +; IR-NEXT: br label %polly.stmt.for.cond1.preheader.i +; +; IR: polly.stmt.for.cond1.preheader.i: ; preds = %polly.loop_header120 +; IR-NEXT: %56 = trunc i64 %polly.indvar123 to i32 +; IR-NEXT: %p_conv.i = sitofp i32 %56 to double +; IR-NEXT: store double %p_conv.i, double* %conv.i.s2a +; IR-NEXT: br label %polly.loop_preheader127 +; +; IR: polly.loop_exit128: ; preds = %polly.stmt.for.body3.i +; IR-NEXT: %polly.indvar_next124 = add nsw i64 %polly.indvar123, 1 +; IR-NEXT: %polly.loop_cond125 = icmp sle i64 %polly.indvar123, 1022 +; IR-NEXT: br i1 %polly.loop_cond125, label %polly.loop_header120, label %polly.loop_exit122 +; +; IR: polly.loop_preheader121: ; preds = %polly.loop_exit +; IR-NEXT: br label %polly.loop_header120 +; +; IR: polly.loop_header126: ; preds = %polly.stmt.for.body3.i, %polly.loop_preheader127 +; IR-NEXT: %polly.indvar129 = phi i64 [ 0, %polly.loop_preheader127 ], [ %polly.indvar_next130, %polly.stmt.for.body3.i ] +; IR-NEXT: br label %polly.stmt.for.body3.i +; +; IR: polly.stmt.for.body3.i: ; preds = %polly.loop_header126 +; IR-NEXT: %conv.i.s2a.reload = load double, double* %conv.i.s2a +; IR-NEXT: %57 = trunc i64 %polly.indvar129 to i32 +; IR-NEXT: %p_conv4.i = sitofp i32 %57 to double +; IR-NEXT: %p_mul.i = fmul double %conv.i.s2a.reload, %p_conv4.i +; IR-NEXT: %p_div.i = fmul double %p_mul.i, 9.765625e-04 +; IR-NEXT: %58 = shl i64 %polly.indvar129, 3 +; IR-NEXT: %scevgep133 = getelementptr i8, i8* %scevgep132, i64 %58 +; IR-NEXT: %scevgep133134 = bitcast i8* %scevgep133 to double* +; IR-NEXT: store double %p_div.i, double* %scevgep133134, align 8, !alias.scope !11, !noalias !17 +; IR-NEXT: %polly.indvar_next130 = add nsw i64 %polly.indvar129, 1 +; IR-NEXT: %polly.loop_cond131 = icmp sle i64 %polly.indvar129, 1022 +; IR-NEXT: br i1 %polly.loop_cond131, label %polly.loop_header126, label %polly.loop_exit128 +; +; IR: polly.loop_preheader127: ; preds = %polly.stmt.for.cond1.preheader.i +; IR-NEXT: %59 = shl i64 %polly.indvar123, 13 +; IR-NEXT: %scevgep132 = getelementptr i8, i8* %1, i64 %59 +; IR-NEXT: br label %polly.loop_header126 +; +; IR: polly.loop_header135: ; preds = %polly.loop_exit143, %polly.loop_preheader136 +; IR-NEXT: %polly.indvar138 = phi i64 [ 0, %polly.loop_preheader136 ], [ %polly.indvar_next139, %polly.loop_exit143 ] +; IR-NEXT: br label %polly.loop_preheader142 +; +; IR: polly.loop_exit143: ; preds = %polly.loop_exit149 +; IR-NEXT: %polly.indvar_next139 = add nsw i64 %polly.indvar138, 1 +; IR-NEXT: %polly.loop_cond140 = icmp sle i64 %polly.indvar138, 30 +; IR-NEXT: br i1 %polly.loop_cond140, label %polly.loop_header135, label %polly.loop_exit137 +; +; IR: polly.loop_preheader136: ; preds = %polly.loop_exit122 +; IR-NEXT: br label %polly.loop_header135 +; +; IR: polly.loop_header141: ; preds = %polly.loop_exit149, %polly.loop_preheader142 +; IR-NEXT: %polly.indvar144 = phi i64 [ 0, %polly.loop_preheader142 ], [ %polly.indvar_next145, %polly.loop_exit149 ] +; IR-NEXT: br label %polly.loop_preheader148 +; +; IR: polly.loop_exit149: ; preds = %polly.loop_exit155 +; IR-NEXT: %polly.indvar_next145 = add nsw i64 %polly.indvar144, 1 +; IR-NEXT: %polly.loop_cond146 = icmp sle i64 %polly.indvar144, 30 +; IR-NEXT: br i1 %polly.loop_cond146, label %polly.loop_header141, label %polly.loop_exit143 +; +; IR: polly.loop_preheader142: ; preds = %polly.loop_header135 +; IR-NEXT: br label %polly.loop_header141 +; +; IR: polly.loop_header147: ; preds = %polly.loop_exit155, %polly.loop_preheader148 +; IR-NEXT: %polly.indvar150 = phi i64 [ 0, %polly.loop_preheader148 ], [ %polly.indvar_next151, %polly.loop_exit155 ] +; IR-NEXT: br label %polly.loop_preheader154 +; +; IR: polly.loop_exit155: ; preds = %polly.stmt.for.body3.i44 +; IR-NEXT: %polly.indvar_next151 = add nsw i64 %polly.indvar150, 1 +; IR-NEXT: %polly.loop_cond152 = icmp sle i64 %polly.indvar150, 30 +; IR-NEXT: br i1 %polly.loop_cond152, label %polly.loop_header147, label %polly.loop_exit149 +; +; IR: polly.loop_preheader148: ; preds = %polly.loop_header141 +; IR-NEXT: br label %polly.loop_header147 +; +; IR: polly.loop_header153: ; preds = %polly.stmt.for.body3.i44, %polly.loop_preheader154 +; IR-NEXT: %polly.indvar156 = phi i64 [ 0, %polly.loop_preheader154 ], [ %polly.indvar_next157, %polly.stmt.for.body3.i44 ] +; IR-NEXT: %60 = mul nsw i64 32, %polly.indvar138 +; IR-NEXT: %61 = add nsw i64 %60, %polly.indvar150 +; IR-NEXT: %62 = mul nsw i64 32, %polly.indvar144 +; IR-NEXT: %63 = add nsw i64 %62, %polly.indvar156 +; IR-NEXT: br label %polly.stmt.for.body3.i44 +; +; IR: polly.stmt.for.body3.i44: ; preds = %polly.loop_header153 +; IR-NEXT: %64 = shl i64 %61, 13 +; IR-NEXT: %65 = shl i64 %63, 3 +; IR-NEXT: %66 = add i64 %64, %65 +; IR-NEXT: %scevgep159 = getelementptr i8, i8* %1, i64 %66 +; IR-NEXT: %scevgep159160 = bitcast i8* %scevgep159 to double* +; IR-NEXT: %_p_scalar_ = load double, double* %scevgep159160, align 8, !alias.scope !11, !noalias !17 +; IR-NEXT: %p_mul.i43 = fmul double %_p_scalar_, 2.123000e+03 +; IR-NEXT: store double %p_mul.i43, double* %scevgep159160, align 8, !alias.scope !11, !noalias !17 +; IR-NEXT: %polly.access.cast.161 = bitcast i8* %1 to double* +; IR-NEXT: %67 = mul nsw i64 32, %polly.indvar138 +; IR-NEXT: %68 = add nsw i64 %67, %polly.indvar150 +; IR-NEXT: %polly.access.mul.162 = mul nsw i64 %68, 1024 +; IR-NEXT: %69 = mul nsw i64 32, %polly.indvar144 +; IR-NEXT: %70 = add nsw i64 %69, %polly.indvar156 +; IR-NEXT: %polly.access.add.163 = add nsw i64 %polly.access.mul.162, %70 +; IR-NEXT: %polly.access.164 = getelementptr double, double* %polly.access.cast.161, i64 %polly.access.add.163 +; IR-NEXT: store double %p_mul.i43, double* %polly.access.164, !alias.scope !11, !noalias !17 +; IR-NEXT: %polly.indvar_next157 = add nsw i64 %polly.indvar156, 1 +; IR-NEXT: %polly.loop_cond158 = icmp sle i64 %polly.indvar156, 30 +; IR-NEXT: br i1 %polly.loop_cond158, label %polly.loop_header153, label %polly.loop_exit155 +; +; IR: polly.loop_preheader154: ; preds = %polly.loop_header147 +; IR-NEXT: br label %polly.loop_header153 +; +; IR: polly.loop_header165: ; preds = %polly.loop_exit173, %polly.loop_preheader166 +; IR-NEXT: %polly.indvar168 = phi i64 [ 0, %polly.loop_preheader166 ], [ %polly.indvar_next169, %polly.loop_exit173 ] +; IR-NEXT: br label %polly.stmt.for.cond15.preheader.i +; +; IR: polly.stmt.for.cond15.preheader.i: ; preds = %polly.loop_header165 +; IR-NEXT: %71 = trunc i64 %polly.indvar168 to i32 +; IR-NEXT: %p_conv19.i = sitofp i32 %71 to double +; IR-NEXT: store double %p_conv19.i, double* %conv19.i.s2a +; IR-NEXT: br label %polly.loop_preheader172 +; +; IR: polly.loop_exit173: ; preds = %polly.stmt.for.body18.i +; IR-NEXT: %polly.indvar_next169 = add nsw i64 %polly.indvar168, 1 +; IR-NEXT: %polly.loop_cond170 = icmp sle i64 %polly.indvar168, 1022 +; IR-NEXT: br i1 %polly.loop_cond170, label %polly.loop_header165, label %polly.loop_exit167 +; +; IR: polly.loop_preheader166: ; preds = %polly.loop_exit137 +; IR-NEXT: br label %polly.loop_header165 +; +; IR: polly.loop_header171: ; preds = %polly.stmt.for.body18.i, %polly.loop_preheader172 +; IR-NEXT: %polly.indvar174 = phi i64 [ 0, %polly.loop_preheader172 ], [ %polly.indvar_next175, %polly.stmt.for.body18.i ] +; IR-NEXT: br label %polly.stmt.for.body18.i +; +; IR: polly.stmt.for.body18.i: ; preds = %polly.loop_header171 +; IR-NEXT: %conv19.i.s2a.reload = load double, double* %conv19.i.s2a +; IR-NEXT: %72 = trunc i64 %polly.indvar174 to i32 +; IR-NEXT: %p_conv20.i = sitofp i32 %72 to double +; IR-NEXT: %p_mul21.i = fmul double %conv19.i.s2a.reload, %p_conv20.i +; IR-NEXT: %p_div23.i = fmul double %p_mul21.i, 9.765625e-04 +; IR-NEXT: %73 = shl i64 %polly.indvar174, 3 +; IR-NEXT: %scevgep178 = getelementptr i8, i8* %scevgep177, i64 %73 +; IR-NEXT: %scevgep178179 = bitcast i8* %scevgep178 to double* +; IR-NEXT: store double %p_div23.i, double* %scevgep178179, align 8, !alias.scope !16, !noalias !18 +; IR-NEXT: %polly.indvar_next175 = add nsw i64 %polly.indvar174, 1 +; IR-NEXT: %polly.loop_cond176 = icmp sle i64 %polly.indvar174, 1022 +; IR-NEXT: br i1 %polly.loop_cond176, label %polly.loop_header171, label %polly.loop_exit173 +; +; IR: polly.loop_preheader172: ; preds = %polly.stmt.for.cond15.preheader.i +; IR-NEXT: %74 = shl i64 %polly.indvar168, 13 +; IR-NEXT: %scevgep177 = getelementptr i8, i8* %5, i64 %74 +; IR-NEXT: br label %polly.loop_header171 +; +; IR: polly.loop_header180: ; preds = %polly.loop_exit188, %polly.loop_preheader181 +; IR-NEXT: %polly.indvar183 = phi i64 [ 0, %polly.loop_preheader181 ], [ %polly.indvar_next184, %polly.loop_exit188 ] +; IR-NEXT: br label %polly.loop_preheader187 +; +; IR: polly.loop_exit188: ; preds = %polly.loop_exit194 +; IR-NEXT: %polly.indvar_next184 = add nsw i64 %polly.indvar183, 1 +; IR-NEXT: %polly.loop_cond185 = icmp sle i64 %polly.indvar183, 30 +; IR-NEXT: br i1 %polly.loop_cond185, label %polly.loop_header180, label %polly.loop_exit182 +; +; IR: polly.loop_preheader181: ; preds = %polly.loop_exit167 +; IR-NEXT: br label %polly.loop_header180 +; +; IR: polly.loop_header186: ; preds = %polly.loop_exit194, %polly.loop_preheader187 +; IR-NEXT: %polly.indvar189 = phi i64 [ 0, %polly.loop_preheader187 ], [ %polly.indvar_next190, %polly.loop_exit194 ] +; IR-NEXT: br label %polly.loop_preheader193 +; +; IR: polly.loop_exit194: ; preds = %polly.loop_exit200 +; IR-NEXT: %polly.indvar_next190 = add nsw i64 %polly.indvar189, 1 +; IR-NEXT: %polly.loop_cond191 = icmp sle i64 %polly.indvar189, 30 +; IR-NEXT: br i1 %polly.loop_cond191, label %polly.loop_header186, label %polly.loop_exit188 +; +; IR: polly.loop_preheader187: ; preds = %polly.loop_header180 +; IR-NEXT: br label %polly.loop_header186 +; +; IR: polly.loop_header192: ; preds = %polly.loop_exit200, %polly.loop_preheader193 +; IR-NEXT: %polly.indvar195 = phi i64 [ 0, %polly.loop_preheader193 ], [ %polly.indvar_next196, %polly.loop_exit200 ] +; IR-NEXT: br label %polly.loop_preheader199 +; +; IR: polly.loop_exit200: ; preds = %polly.loop_exit206 +; IR-NEXT: %polly.indvar_next196 = add nsw i64 %polly.indvar195, 1 +; IR-NEXT: %polly.loop_cond197 = icmp sle i64 %polly.indvar195, 30 +; IR-NEXT: br i1 %polly.loop_cond197, label %polly.loop_header192, label %polly.loop_exit194 +; +; IR: polly.loop_preheader193: ; preds = %polly.loop_header186 +; IR-NEXT: br label %polly.loop_header192 +; +; IR: polly.loop_header198: ; preds = %polly.loop_exit206, %polly.loop_preheader199 +; IR-NEXT: %polly.indvar201 = phi i64 [ 0, %polly.loop_preheader199 ], [ %polly.indvar_next202, %polly.loop_exit206 ] +; IR-NEXT: br label %polly.loop_preheader205 +; +; IR: polly.loop_exit206: ; preds = %polly.loop_exit212 +; IR-NEXT: %polly.indvar_next202 = add nsw i64 %polly.indvar201, 1 +; IR-NEXT: %polly.loop_cond203 = icmp sle i64 %polly.indvar201, 30 +; IR-NEXT: br i1 %polly.loop_cond203, label %polly.loop_header198, label %polly.loop_exit200 +; +; IR: polly.loop_preheader199: ; preds = %polly.loop_header192 +; IR-NEXT: br label %polly.loop_header198 +; +; IR: polly.loop_header204: ; preds = %polly.loop_exit212, %polly.loop_preheader205 +; IR-NEXT: %polly.indvar207 = phi i64 [ 0, %polly.loop_preheader205 ], [ %polly.indvar_next208, %polly.loop_exit212 ] +; IR-NEXT: br label %polly.loop_preheader211 +; +; IR: polly.loop_exit212: ; preds = %polly.stmt.for.body8.i +; IR-NEXT: %polly.indvar_next208 = add nsw i64 %polly.indvar207, 1 +; IR-NEXT: %polly.loop_cond209 = icmp sle i64 %polly.indvar207, 30 +; IR-NEXT: br i1 %polly.loop_cond209, label %polly.loop_header204, label %polly.loop_exit206 +; +; IR: polly.loop_preheader205: ; preds = %polly.loop_header198 +; IR-NEXT: br label %polly.loop_header204 +; +; IR: polly.loop_header210: ; preds = %polly.stmt.for.body8.i, %polly.loop_preheader211 +; IR-NEXT: %polly.indvar213 = phi i64 [ 0, %polly.loop_preheader211 ], [ %polly.indvar_next214, %polly.stmt.for.body8.i ] +; IR-NEXT: %75 = mul nsw i64 32, %polly.indvar183 +; IR-NEXT: %76 = add nsw i64 %75, %polly.indvar201 +; IR-NEXT: %77 = mul nsw i64 32, %polly.indvar189 +; IR-NEXT: %78 = add nsw i64 %77, %polly.indvar207 +; IR-NEXT: %79 = mul nsw i64 32, %polly.indvar195 +; IR-NEXT: %80 = add nsw i64 %79, %polly.indvar213 +; IR-NEXT: br label %polly.stmt.for.body8.i +; +; IR: polly.stmt.for.body8.i: ; preds = %polly.loop_header210 +; IR-NEXT: %polly.access.cast.216 = bitcast i8* %1 to double* +; IR-NEXT: %81 = mul nsw i64 32, %polly.indvar183 +; IR-NEXT: %82 = add nsw i64 %81, %polly.indvar201 +; IR-NEXT: %polly.access.mul.217 = mul nsw i64 %82, 1024 +; IR-NEXT: %83 = mul nsw i64 32, %polly.indvar189 +; IR-NEXT: %84 = add nsw i64 %83, %polly.indvar207 +; IR-NEXT: %polly.access.add.218 = add nsw i64 %polly.access.mul.217, %84 +; IR-NEXT: %polly.access.219 = getelementptr double, double* %polly.access.cast.216, i64 %polly.access.add.218 +; IR-NEXT: %polly.access.219.reload = load double, double* %polly.access.219, !alias.scope !11, !noalias !17 +; IR-NEXT: %85 = shl i64 %76, 13 +; IR-NEXT: %86 = shl i64 %80, 3 +; IR-NEXT: %87 = add i64 %85, %86 +; IR-NEXT: %scevgep220 = getelementptr i8, i8* %5, i64 %87 +; IR-NEXT: %scevgep220221 = bitcast i8* %scevgep220 to double* +; IR-NEXT: %_p_scalar_222 = load double, double* %scevgep220221, align 8, !alias.scope !16, !noalias !18 +; IR-NEXT: %p_mul13.i = fmul double %_p_scalar_222, 3.241200e+04 +; IR-NEXT: %88 = shl i64 %80, 13 +; IR-NEXT: %89 = shl i64 %78, 3 +; IR-NEXT: %90 = add i64 %88, %89 +; IR-NEXT: %scevgep223 = getelementptr i8, i8* %9, i64 %90 +; IR-NEXT: %scevgep223224 = bitcast i8* %scevgep223 to double* +; IR-NEXT: %_p_scalar_225 = load double, double* %scevgep223224, align 8, !alias.scope !8, !noalias !10 +; IR-NEXT: %p_mul18.i = fmul double %p_mul13.i, %_p_scalar_225 +; IR-NEXT: %p_add.i = fadd double %polly.access.219.reload, %p_mul18.i +; IR-NEXT: %91 = shl i64 %76, 13 +; IR-NEXT: %92 = shl i64 %78, 3 +; IR-NEXT: %93 = add i64 %91, %92 +; IR-NEXT: %scevgep226 = getelementptr i8, i8* %1, i64 %93 +; IR-NEXT: %scevgep226227 = bitcast i8* %scevgep226 to double* +; IR-NEXT: store double %p_add.i, double* %scevgep226227, align 8, !alias.scope !11, !noalias !17 +; IR-NEXT: %polly.access.cast.228 = bitcast i8* %1 to double* +; IR-NEXT: %94 = mul nsw i64 32, %polly.indvar183 +; IR-NEXT: %95 = add nsw i64 %94, %polly.indvar201 +; IR-NEXT: %polly.access.mul.229 = mul nsw i64 %95, 1024 +; IR-NEXT: %96 = mul nsw i64 32, %polly.indvar189 +; IR-NEXT: %97 = add nsw i64 %96, %polly.indvar207 +; IR-NEXT: %polly.access.add.230 = add nsw i64 %polly.access.mul.229, %97 +; IR-NEXT: %polly.access.231 = getelementptr double, double* %polly.access.cast.228, i64 %polly.access.add.230 +; IR-NEXT: store double %p_add.i, double* %polly.access.231, !alias.scope !11, !noalias !17 +; IR-NEXT: %polly.indvar_next214 = add nsw i64 %polly.indvar213, 1 +; IR-NEXT: %polly.loop_cond215 = icmp sle i64 %polly.indvar213, 30 +; IR-NEXT: br i1 %polly.loop_cond215, label %polly.loop_header210, label %polly.loop_exit212 +; +; IR: polly.loop_preheader211: ; preds = %polly.loop_header204 +; IR-NEXT: br label %polly.loop_header210 +; +; IR: polly.start234: ; preds = %polly.split_new_and_old232 +; IR-NEXT: br label %polly.loop_preheader237 +; +; IR: polly.loop_exit238: ; preds = %polly.stmt.for.body6.i +; IR-NEXT: br label %polly.exiting235 +; +; IR: polly.exiting235: ; preds = %polly.loop_exit238 +; IR-NEXT: br label %polly.merge_new_and_old233 +; +; IR: polly.loop_header236: ; preds = %polly.stmt.for.body6.i, %polly.loop_preheader237 +; IR-NEXT: %polly.indvar239 = phi i64 [ 0, %polly.loop_preheader237 ], [ %polly.indvar_next240, %polly.stmt.for.body6.i ] +; IR-NEXT: br label %polly.stmt.for.body6.i +; +; IR: polly.stmt.for.body6.i: ; preds = %polly.loop_header236 +; IR-NEXT: %98 = shl i64 %polly.indvar239, 3 +; IR-NEXT: %scevgep243 = getelementptr i8, i8* %scevgep242, i64 %98 +; IR-NEXT: %scevgep243244 = bitcast i8* %scevgep243 to i64* +; IR-NEXT: %_p_scalar_245 = load i64, i64* %scevgep243244, align 8, !alias.scope !19, !noalias !21 +; IR-NEXT: %p_block.sroa.0.0.extract.trunc138.i.i = trunc i64 %_p_scalar_245 to i8 +; IR-NEXT: %p_and.i.i = and i8 %p_block.sroa.0.0.extract.trunc138.i.i, 15 +; IR-NEXT: %p_add.i.i = or i8 %p_and.i.i, 48 +; IR-NEXT: %99 = shl i64 %polly.indvar239, 4 +; IR-NEXT: %scevgep246 = getelementptr i8, i8* %call.i, i64 %99 +; IR-NEXT: store i8 %p_add.i.i, i8* %scevgep246, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %scevgep248 = getelementptr i8, i8* %scevgep247, i64 %99 +; IR-NEXT: store i8 %p_add.i.i, i8* %scevgep248, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %p_block.sroa.0.1.extract.shift.i.i = lshr i64 %_p_scalar_245, 8 +; IR-NEXT: %p_conv13195.i.i = trunc i64 %p_block.sroa.0.1.extract.shift.i.i to i8 +; IR-NEXT: %p_and14.i.i = and i8 %p_conv13195.i.i, 15 +; IR-NEXT: %p_add15.i.i = or i8 %p_and14.i.i, 48 +; IR-NEXT: %100 = shl i64 %polly.indvar239, 4 +; IR-NEXT: %scevgep250 = getelementptr i8, i8* %scevgep249, i64 %100 +; IR-NEXT: store i8 %p_add15.i.i, i8* %scevgep250, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %scevgep252 = getelementptr i8, i8* %scevgep251, i64 %100 +; IR-NEXT: store i8 %p_add15.i.i, i8* %scevgep252, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %p_block.sroa.0.2.extract.shift.i.i = lshr i64 %_p_scalar_245, 16 +; IR-NEXT: %p_conv31201.i.i = trunc i64 %p_block.sroa.0.2.extract.shift.i.i to i8 +; IR-NEXT: %p_and32.i.i = and i8 %p_conv31201.i.i, 15 +; IR-NEXT: %p_add33.i.i = or i8 %p_and32.i.i, 48 +; IR-NEXT: %101 = shl i64 %polly.indvar239, 4 +; IR-NEXT: %scevgep254 = getelementptr i8, i8* %scevgep253, i64 %101 +; IR-NEXT: store i8 %p_add33.i.i, i8* %scevgep254, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %scevgep256 = getelementptr i8, i8* %scevgep255, i64 %101 +; IR-NEXT: store i8 %p_add33.i.i, i8* %scevgep256, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %p_.tr.i.i = trunc i64 %_p_scalar_245 to i32 +; IR-NEXT: %p_sext204207.i.i = lshr i32 %p_.tr.i.i, 24 +; IR-NEXT: %p_and50.i.i = and i32 %p_sext204207.i.i, 15 +; IR-NEXT: %p_add51.i.i = or i32 %p_and50.i.i, 48 +; IR-NEXT: %p_conv52.i.i = trunc i32 %p_add51.i.i to i8 +; IR-NEXT: %102 = shl i64 %polly.indvar239, 4 +; IR-NEXT: %scevgep258 = getelementptr i8, i8* %scevgep257, i64 %102 +; IR-NEXT: store i8 %p_conv52.i.i, i8* %scevgep258, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %scevgep260 = getelementptr i8, i8* %scevgep259, i64 %102 +; IR-NEXT: store i8 %p_conv52.i.i, i8* %scevgep260, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %p_block.sroa.0.4.extract.shift.i.i = lshr i64 %_p_scalar_245, 32 +; IR-NEXT: %p_conv67211.i.i = trunc i64 %p_block.sroa.0.4.extract.shift.i.i to i8 +; IR-NEXT: %p_and68.i.i = and i8 %p_conv67211.i.i, 15 +; IR-NEXT: %p_add69.i.i = or i8 %p_and68.i.i, 48 +; IR-NEXT: %103 = shl i64 %polly.indvar239, 4 +; IR-NEXT: %scevgep262 = getelementptr i8, i8* %scevgep261, i64 %103 +; IR-NEXT: store i8 %p_add69.i.i, i8* %scevgep262, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %scevgep264 = getelementptr i8, i8* %scevgep263, i64 %103 +; IR-NEXT: store i8 %p_add69.i.i, i8* %scevgep264, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %p_block.sroa.0.5.extract.shift.i.i = lshr i64 %_p_scalar_245, 40 +; IR-NEXT: %p_conv85217.i.i = trunc i64 %p_block.sroa.0.5.extract.shift.i.i to i8 +; IR-NEXT: %p_and86.i.i = and i8 %p_conv85217.i.i, 15 +; IR-NEXT: %p_add87.i.i = or i8 %p_and86.i.i, 48 +; IR-NEXT: %104 = shl i64 %polly.indvar239, 4 +; IR-NEXT: %scevgep266 = getelementptr i8, i8* %scevgep265, i64 %104 +; IR-NEXT: store i8 %p_add87.i.i, i8* %scevgep266, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %scevgep268 = getelementptr i8, i8* %scevgep267, i64 %104 +; IR-NEXT: store i8 %p_add87.i.i, i8* %scevgep268, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %p_block.sroa.0.6.extract.shift.i.i = lshr i64 %_p_scalar_245, 48 +; IR-NEXT: %p_conv103223.i.i = trunc i64 %p_block.sroa.0.6.extract.shift.i.i to i8 +; IR-NEXT: %p_and104.i.i = and i8 %p_conv103223.i.i, 15 +; IR-NEXT: %p_add105.i.i = or i8 %p_and104.i.i, 48 +; IR-NEXT: %105 = shl i64 %polly.indvar239, 4 +; IR-NEXT: %scevgep270 = getelementptr i8, i8* %scevgep269, i64 %105 +; IR-NEXT: store i8 %p_add105.i.i, i8* %scevgep270, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %scevgep272 = getelementptr i8, i8* %scevgep271, i64 %105 +; IR-NEXT: store i8 %p_add105.i.i, i8* %scevgep272, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %p_block.sroa.0.7.extract.shift.i.i = lshr i64 %_p_scalar_245, 56 +; IR-NEXT: %p_conv121229.i.i = trunc i64 %p_block.sroa.0.7.extract.shift.i.i to i8 +; IR-NEXT: %p_and122.i.i = and i8 %p_conv121229.i.i, 15 +; IR-NEXT: %p_add123.i.i = or i8 %p_and122.i.i, 48 +; IR-NEXT: %106 = shl i64 %polly.indvar239, 4 +; IR-NEXT: %scevgep274 = getelementptr i8, i8* %scevgep273, i64 %106 +; IR-NEXT: store i8 %p_add123.i.i, i8* %scevgep274, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %scevgep276 = getelementptr i8, i8* %scevgep275, i64 %106 +; IR-NEXT: store i8 %p_add123.i.i, i8* %scevgep276, align 1, !alias.scope !22, !noalias !23 +; IR-NEXT: %polly.indvar_next240 = add nsw i64 %polly.indvar239, 1 +; IR-NEXT: %polly.loop_cond241 = icmp sle i64 %polly.indvar239, 1022 +; IR-NEXT: br i1 %polly.loop_cond241, label %polly.loop_header236, label %polly.loop_exit238 +; +; IR: polly.loop_preheader237: ; preds = %polly.start234 +; IR-NEXT: %107 = shl i64 %47, 13 +; IR-NEXT: %scevgep242 = getelementptr i8, i8* %1, i64 %107 +; IR-NEXT: br label %polly.loop_header236 +; IR-NEXT: } +; +; IR: ; Function Attrs: nounwind +; IR-NEXT: declare i32 @posix_memalign(i8**, i64, i64) local_unnamed_addr #2 +; +; IR: ; Function Attrs: nounwind +; IR-NEXT: declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) local_unnamed_addr #2 +; +; IR: ; Function Attrs: noreturn nounwind +; IR-NEXT: declare void @exit(i32) local_unnamed_addr #6 +; +; IR: ; Function Attrs: nounwind +; IR-NEXT: declare noalias i8* @malloc(i64) local_unnamed_addr #2 +; +; IR: ; Function Attrs: nounwind +; IR-NEXT: declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture) local_unnamed_addr #2 +; +; IR: ; Function Attrs: nounwind +; IR-NEXT: declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #7 +; +; IR: ; Function Attrs: nounwind readnone +; IR-NEXT: declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) #8 +; +; IR: ; Function Attrs: nounwind readnone +; IR-NEXT: declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) #8 +; +; IR: attributes #0 = { norecurse nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; IR-NEXT: attributes #1 = { argmemonly nounwind } +; IR-NEXT: attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; IR-NEXT: attributes #3 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; IR-NEXT: attributes #4 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; IR-NEXT: attributes #5 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; IR-NEXT: attributes #6 = { noreturn nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; IR-NEXT: attributes #7 = { nounwind } +; IR-NEXT: attributes #8 = { nounwind readnone } +; IR-NEXT: attributes #9 = { cold nounwind } +; IR-NEXT: attributes #10 = { noreturn nounwind } +; +; IR: !llvm.ident = !{!0} +; +; IR: !0 = !{!"clang version 4.0.0 (trunk 278052) (llvm/trunk 278053)"} +; IR-NEXT: !1 = !{!2, !2, i64 0} +; IR-NEXT: !2 = !{!"double", !3, i64 0} +; IR-NEXT: !3 = !{!"omnipotent char", !4, i64 0} +; IR-NEXT: !4 = !{!"Simple C/C++ TBAA"} +; IR-NEXT: !5 = !{!6, !6, i64 0} +; IR-NEXT: !6 = !{!"any pointer", !3, i64 0} +; IR-NEXT: !7 = !{!3, !3, i64 0} +; IR-NEXT: !8 = distinct !{!8, !9, !"polly.alias.scope."} +; IR-NEXT: !9 = distinct !{!9, !"polly.alias.scope.domain"} +; IR-NEXT: !10 = !{!11, !12, !13, !14, !15, !16} +; IR-NEXT: !11 = distinct !{!11, !9, !"polly.alias.scope."} +; IR-NEXT: !12 = distinct !{!12, !9, !"polly.alias.scope.conv.i"} +; IR-NEXT: !13 = distinct !{!13, !9, !"polly.alias.scope.conv42.i"} +; IR-NEXT: !14 = distinct !{!14, !9, !"polly.alias.scope."} +; IR-NEXT: !15 = distinct !{!15, !9, !"polly.alias.scope.conv19.i"} +; IR-NEXT: !16 = distinct !{!16, !9, !"polly.alias.scope."} +; IR-NEXT: !17 = !{!8, !12, !13, !14, !15, !16} +; IR-NEXT: !18 = !{!11, !8, !12, !13, !14, !15} +; IR-NEXT: !19 = distinct !{!19, !20, !"polly.alias.scope."} +; IR-NEXT: !20 = distinct !{!20, !"polly.alias.scope.domain"} +; IR-NEXT: !21 = !{!22} +; IR-NEXT: !22 = distinct !{!22, !20, !"polly.alias.scope.call.i"} +; IR-NEXT: !23 = !{!19} Index: test/DeLICM/gemm.ll =================================================================== --- /dev/null +++ test/DeLICM/gemm.ll @@ -0,0 +1,124 @@ +; RUN: opt %loadPolly -basicaa -licm -polly-flatten-schedule -polly-delicm -analyze < %s | FileCheck %s +; +; dgemm kernel +; C := alpha*A*B + beta*C +; C[ni][nj] +; A[ni][nk] +; B[nk][nj] + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" + +define void @gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, double* noalias nonnull %C, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %ni.for + +ni.for: + %i = phi i32 [0, %entry], [%i.inc, %ni.inc] + %i.cmp = icmp slt i32 %i, 3 ; %ni + br i1 %i.cmp, label %nj.for, label %ni.exit + + nj.for: + %j = phi i32 [0, %ni.for], [%j.inc, %nj.inc] + %j.cmp = icmp slt i32 %j, 3 ; %nj + br i1 %j.cmp, label %nj_beta, label %nj.exit + + nj_beta: + %c_stride = mul nsw i32 %i, 3; %nj + %c_idx_i = getelementptr inbounds double, double* %C, i32 %c_stride + %c_idx_ij = getelementptr inbounds double, double* %c_idx_i, i32 %j + + ; C[i][j] *= beta + ;%c = load double, double* %c_idx_ij + ;%c_beta = fmul double %c, %beta + ;store double %c_beta, double* %c_idx_ij + + ;%c_init = load double, double* %c_idx_ij + br label %nk.for + + nk.for: + ;%c = phi double [%c_init, %nj_beta], [%beta_c_alpha_a_b, %nk.inc] + %k = phi i32 [0, %nj_beta], [%k.inc, %nk.inc] + %k.cmp = icmp slt i32 %k, 3 ; %nk + br i1 %k.cmp, label %nk_alpha, label %nk.exit + + nk_alpha: + %a_stride = mul nsw i32 %i, 3; %nk + %a_idx_i = getelementptr inbounds double, double* %A, i32 %a_stride + %a_idx_ik = getelementptr inbounds double, double* %a_idx_i, i32 %k + + %b_stride = mul nsw i32 %k, 3; %nj + %b_idx_k = getelementptr inbounds double, double* %B, i32 %b_stride + %b_idx_kj = getelementptr inbounds double, double* %b_idx_k, i32 %j + + ; C[i][j] += alpha * A[i][k] * B[k][j] + %a = load double, double* %a_idx_ik + %b = load double, double* %b_idx_kj + %beta_c = load double, double* %c_idx_ij + + %alpha_a = fmul double %a, %alpha + %alpha_a_b = fmul double %alpha_a, %b + %beta_c_alpha_a_b = fadd double %beta_c, %alpha_a_b + + store double %beta_c_alpha_a_b, double* %c_idx_ij + + br label %nk.inc + + nk.inc: + %k.inc = add nuw nsw i32 %k, 1 + br label %nk.for + + nk.exit: + ; store double %c, double* %c_idx_ij + br label %nj.inc + + nj.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %nj.for + + nj.exit: + br label %ni.inc + +ni.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %ni.for + +ni.exit: + br label %return + +return: + ret void +} + + +; [0] Stmt_nj_beta[0, 0] +; [1] Stmt_nk_alpha[0,0,0] +; (2) %c1 +; [2] Stmt_nk_inc[0,0,0] +; (3) %beta_c_alpha_a_b +; [3] Stmt_nk_alpha[0,0,1] + + +; CHECK: Schedule after flattening { +; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> [9i0 + 3i1 + i2] } +; CHECK-NEXT: } +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_nk_alpha[o0, i0 - 3o0, 2] -> Val_beta_c_alpha_a_b[{{\]\]}} : i1 >= 4 + 3i0 and 0 <= o0 <= 2 and -2 + i0 <= 3o0 <= i0; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_nk_alpha[o0, i0 - 3o0, -1 - 3i0 + i1] -> Val_beta_c_alpha_a_b[{{\]\]}} : 3i0 < i1 <= 3 + 3i0 and 0 <= o0 <= 2 and -2 + i0 <= 3o0 <= i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_nk_alpha[o0, i0 - 3o0, -3i0 + i1] -> Val_beta_c_alpha_a_b[{{\]\]}} : 3i0 <= i1 <= 2 + 3i0 and 0 <= o0 <= 2 and -2 + i0 <= 3o0 <= i0 } +; CHECK: Mapped scalars { +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_nk_alpha[o0, i0 - 3o0, 2] -> Val_beta_c_alpha_a_b[{{\]\]}} : i1 >= 4 + 3i0 and 0 <= o0 <= 2 and -2 + i0 <= 3o0 <= i0; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_nk_alpha[o0, i0 - 3o0, -1 - 3i0 + i1] -> Val_beta_c_alpha_a_b[{{\]\]}} : 3i0 < i1 <= 3 + 3i0 and 0 <= o0 <= 2 and -2 + i0 <= 3o0 <= i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_nk_alpha[o0, i0 - 3o0, -3i0 + i1] -> Val_beta_c_alpha_a_b[{{\]\]}} : 3i0 <= i1 <= 2 + 3i0 and 0 <= o0 <= 2 and -2 + i0 <= 3o0 <= i0 } +; CHECK: After Statements { +; CHECK-NEXT: Stmt_nk_alpha +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> MemRef_A[3i0 + i2] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> MemRef_B[i1 + 3i2] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[3i0 + i1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> MemRef_alpha[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[3i0 + i1] }; +; CHECK-NEXT: } Index: test/DeLICM/gemm_complete.ll =================================================================== --- /dev/null +++ test/DeLICM/gemm_complete.ll @@ -0,0 +1,152 @@ +; RUN: opt %loadPolly -basicaa -loop-rotate -licm -polly-scops -polly-delicm -analyze < %s | FileCheck %s +; +; dgemm kernel +; C := alpha*A*B + beta*C +; C[ni][nj] +; A[ni][nk] +; B[nk][nj] + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" + +define void @gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, double* noalias nonnull %C, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %ni.for + +ni.for: + %i = phi i32 [0, %entry], [%i.inc, %ni.inc] + %i.cmp = icmp slt i32 %i, %ni + br i1 %i.cmp, label %nj.for, label %ni.exit + + nj.for: + %j = phi i32 [0, %ni.for], [%j.inc, %nj.inc] + %j.cmp = icmp slt i32 %j, %nj + br i1 %j.cmp, label %nj_beta, label %nj.exit + + nj_beta: + %c_stride = mul nsw i32 %i, %nj + %c_idx_i = getelementptr inbounds double, double* %C, i32 %c_stride + %c_idx_ij = getelementptr inbounds double, double* %c_idx_i, i32 %j + + ; C[i][j] *= beta + %c = load double, double* %c_idx_ij + %c_beta = fmul double %c, %beta + store double %c_beta, double* %c_idx_ij + + br label %nk.for + + nk.for: + %k = phi i32 [0, %nj_beta], [%k.inc, %nk.inc] + %k.cmp = icmp slt i32 %k, %nk + br i1 %k.cmp, label %nk_alpha, label %nk.exit + + nk_alpha: + %a_stride = mul nsw i32 %i, %nk + %a_idx_i = getelementptr inbounds double, double* %A, i32 %a_stride + %a_idx_ik = getelementptr inbounds double, double* %a_idx_i, i32 %k + + %b_stride = mul nsw i32 %k, %nj + %b_idx_k = getelementptr inbounds double, double* %B, i32 %b_stride + %b_idx_kj = getelementptr inbounds double, double* %b_idx_k, i32 %j + + ; C[i][j] += alpha * A[i][k] * B[k][j] + %a = load double, double* %a_idx_ik + %b = load double, double* %b_idx_kj + %beta_c = load double, double* %c_idx_ij + + %alpha_a = fmul double %a, %alpha + %alpha_a_b = fmul double %alpha_a, %b + %beta_c_alpha_a_b = fadd double %beta_c, %alpha_a_b + + store double %beta_c_alpha_a_b, double* %c_idx_ij + + br label %nk.inc + + nk.inc: + %k.inc = add nuw nsw i32 %k, 1 + br label %nk.for + + nk.exit: + br label %nj.inc + + nj.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %nj.for + + nj.exit: + br label %ni.inc + +ni.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %ni.for + +ni.exit: + br label %return + +return: + ret void +} + + +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i2 > i0; [MemRef_C[i0, i1] -> [i0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i3 > i1; [MemRef_C[0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i2 > 0; [MemRef_C[i0, i1] -> [i0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i4 >= 4; [MemRef_C[0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i3 > i1; [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i4 >= 4; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i2 > i0; [MemRef_C[i0, i1] -> [i0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i3 > i1; [MemRef_C[0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i2 > 0; [MemRef_C[i0, i1] -> [i0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i4 > 0; [MemRef_C[0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i3 > i1; [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 0, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i4 > 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 0, 0, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 0, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 0, 0, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and i6 > 0 } + Unknown +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 0, 0, 0{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 0, 0, 0{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, 0{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 3, 0, 0{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj } +; CHECK: Mapped scalars { +; CHECK-NEXT: Scalar access Stmt_nk_for_nk_exit_crit_edge MK_PHI Merge MemRef_beta_c_alpha_a_b_lcssa__phi as %beta_c_alpha_a_b.lcssa [new: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_C[i0, i1] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> MemRef_C[0, i1] : nk > 0 and ni <= 0 and 0 <= i1 < nj }]: +; CHECK-NEXT: Accesses: 2 +; CHECK-NEXT: Target: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_C[i0, i1] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> MemRef_C[0, i1] : nk > 0 and ni <= 0 and 0 <= i1 < nj } +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> [i0, i1, 3, o3, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o3 < 0; Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> [i0, i1, 2, o3, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o3 >= nk; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> [0, i1, 3, o3, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o3 < 0; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> [0, i1, 2, o3, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o3 >= nk; Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> [i0, i1, 3, 0, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o4 <= 0; Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> [i0, i1, 2, -1 + nk, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o4 >= 2; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> [0, i1, 3, 0, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o4 <= 0; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> [0, i1, 2, -1 + nk, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o4 >= 2 } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 >= nk; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 2, -1 + nk, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 >= 2; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 >= nk; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 2, -1 + nk, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 >= 2 } + Undef +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk } +; CHECK-NEXT: Scalar access Stmt_nk_alpha MK_Value Define MemRef_beta_c_alpha_a_b as %beta_c_alpha_a_b [new: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }]: +; CHECK-NEXT: Accesses: 2 +; CHECK-NEXT: Target: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk } +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> [i0, i1, 2, i2, 1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> [0, i1, 2, i2, 1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk } + Undef +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 0{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 0{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk } +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i2 > i0; [MemRef_C[i0, i1] -> [i0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i3 > i1; [MemRef_C[0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i2 > 0; [MemRef_C[i0, i1] -> [i0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i4 >= 4; [MemRef_C[0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i3 > i1; [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i4 >= 4; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i2 > i0; [MemRef_C[i0, i1] -> [i0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i3 > i1; [MemRef_C[0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i2 > 0; [MemRef_C[i0, i1] -> [i0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i4 > 0; [MemRef_C[0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i3 > i1; [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 0, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i4 > 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 0, 0, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 0, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 0, 0, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and (i6 >= 2 or i6 <= 0); [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and (i5 < 0 or i5 >= nk); [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and (i6 >= 2 or i6 <= 0); [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and (i5 < 0 or i5 >= nk); [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk } + Unknown +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 3, 0, 0{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 3, 0, 0{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 0{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 0{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[i0, i1] -> [i0, i1, 0, 0, 0{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 0, 0, 0{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj } +; CHECK: After Statements { +; CHECK-NEXT: Stmt_nj_beta +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nj_beta[i0, i1] -> MemRef_C[i0, i1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nj_beta[i0, i1] -> MemRef_beta[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nj_beta[i0, i1] -> MemRef_C[i0, i1] }; +; CHECK-NEXT: Stmt_nk_alpha_lr_ph +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha_lr_ph[i0, i1] -> MemRef_C[i0, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha_lr_ph[i0, i1] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: Stmt_nk_alpha +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_beta_c_alpha_a_b[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_A[i0, i2] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_B[i2, i1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_alpha[] }; +; CHECK-NEXT: Stmt_nk_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_inc[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b_lcssa__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_inc[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: Stmt_nk_for_nk_exit_crit_edge +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_beta_c_alpha_a_b_lcssa__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_C[i0, i1] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> MemRef_C[0, i1] : nk > 0 and ni <= 0 and 0 <= i1 < nj }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_C[i0, i1] }; +; CHECK-NEXT: } Index: test/DeLICM/incomplete_phi.ll =================================================================== --- /dev/null +++ test/DeLICM/incomplete_phi.ll @@ -0,0 +1,158 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm -polly-scops -analyze < %s | FileCheck %s +; +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" + +define void @func(double* noalias nonnull %C) { +entry: + br label %outer.for + + + outer.for: + %i = phi i32 [0, %entry], [%i.inc, %outer.inc] + %i.cmp = icmp slt i32 %i, 3 + br i1 %i.cmp, label %outer.body, label %outer.exit + + outer.body: + br label %inner.for + + inner.for: + %phi = phi double [0.0, %outer.body], [%sum, %inner.inc] + %j = phi i32 [0, %outer.body], [%j.inc, %inner.inc] + %j.cmp = icmp slt i32 %j, 3 + br i1 %j.cmp, label %inner.body, label %inner.exit + + inner.body: + %sum = fadd double %phi, 1.0 + br label %inner.inc + + inner.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %inner.for + + inner.exit: + %C_i = getelementptr inbounds double, double* %C, i32 %i + store double %phi, double* %C_i + br label %outer.inc + + outer.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %outer.for + + outer.exit: + br label %return + + +return: + ret void +} + + +; +; 0) Stmt_outer_body[0] +; +; +; 1) Stmt_inner_for[0, 0] +; 2) Stmt_inner_body[0, 0] +; 3) Stmt_inner_inc[0, 0] +; +; 4) Stmt_inner_for[0, 1] +; 5) Stmt_inner_body[0, 1] +; 6) Stmt_inner_inc[0, 1] +; +; 7) Stmt_inner_for[0, 2] +; 8) Stmt_inner_body[0, 2] +; 9) Stmt_inner_inc[0, 2] +; +;10) Stmt_inner_for[0, 3] +; +; +;11) Stmt_inner_exit[1] +;12) Stmt_outer_body[1] +; +; +;13) Stmt_inner_for[1, 0] +;14) Stmt_inner_body[1, 0] +;15) Stmt_inner_inc[1, 0] +; +;16) Stmt_inner_for[1, 1] +;17) Stmt_inner_body[1, 1] +;18) Stmt_inner_inc[1, 1] +; +;19) Stmt_inner_for[1, 2] +;20) Stmt_inner_body[1, 2] +;21) Stmt_inner_inc[1, 2] +; +;22) Stmt_inner_for[1, 3] +; +; +;23) Stmt_inner_exit[1] +;24) Stmt_outer_body[2] +; +;25) Stmt_inner_for[2, 0] +;26) Stmt_inner_body[2, 0] +;27) Stmt_inner_inc[2, 0] +; +;28) Stmt_inner_for[2, 1] +;29) Stmt_inner_body[2, 1] +;30) Stmt_inner_inc[2, 1] +; +;31) Stmt_inner_for[2, 2] +;32) Stmt_inner_body[2, 2] +;33) Stmt_inner_inc[2, 2] +; +;34) Stmt_inner_for[2, 3] +; +; +;35) Stmt_inner_exit[0] + + +; CHECK: Schedule after flattening { +; CHECK-NEXT: { Stmt_outer_body[i0] -> [12i0] } +; CHECK-NEXT: { Stmt_inner_inc[i0, i1] -> [3 + 12i0 + 3i1] } +; CHECK-NEXT: { Stmt_inner_exit[i0] -> [11 + 12i0] } +; CHECK-NEXT: { Stmt_inner_body[i0, i1] -> [2 + 12i0 + 3i1] } +; CHECK-NEXT: { Stmt_inner_for[i0, i1] -> [1 + 12i0 + 3i1] } +; CHECK-NEXT: } +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> Undef[] : 0 <= i0 <= 2 and i1 <= 11 + 12i0; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, 3] -> Val_phi[{{\]\]}} : 0 <= i0 <= 2 and i1 >= 12 + 12i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [11 + 12i0{{\]\]}} -> [Stmt_inner_for[i0, 3] -> Val_phi[{{\]\]}} : 0 <= i0 <= 2 } +; CHECK: Mapped scalars { +; CHECK-NEXT: Scalar access Stmt_inner_for MK_Value Define MemRef_phi as %phi [new: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 4 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 }]: +; CHECK-NEXT: Accesses: 3 +; CHECK-NEXT: Target: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 4 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 } +; CHECK-NEXT: Lifetime: { Stmt_inner_for[i0, i1] -> [2 + 12i0 + 3i1] : 0 <= i0 <= 2 and 0 <= i1 <= 4 - i0 and i1 <= 3; Stmt_inner_for[2, 3] -> [35] } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 - 12i0 + i1 and i0 <= 2 and i1 >= 2 + 12i0 and 14 <= i1 <= 14 + 9i0 and i1 <= 11 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_for[0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 + i1 and 2 <= i1 <= 11; [MemRef_C[2] -> [35{{\]\]}} -> [Stmt_inner_for[2, 3] -> Val_phi[{{\]\]}} } + Undef +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 - 12i0 + i1 and i0 <= 2 and i1 > 12i0 and 13 <= i1 <= 10 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_for[0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 + i1 and 0 < i1 <= 10 } +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> Undef[] : 0 <= i0 <= 2 and i1 <= 11 + 12i0 and ((i1 <= 34 and 3*floor((-2 + i1)/3) <= -3 + i1) or (3*floor((-2 + i1)/3) = -2 + i1 and i1 <= 1 + 12i0)); [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 - 12i0 + i1 and i0 <= 2 and i1 >= 2 + 12i0 and 14 <= i1 <= 14 + 9i0 and i1 <= 11 + 12i0; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, 3] -> Val_phi[{{\]\]}} : 0 <= i0 <= 2 and i1 >= 12 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_for[0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 + i1 and 2 <= i1 <= 11; [MemRef_C[2] -> [35{{\]\]}} -> [Stmt_inner_for[2, 3] -> Val_phi[{{\]\]}} } + Unknown +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 - 12i0 + i1 and i0 <= 2 and i1 > 12i0 and 13 <= i1 <= 10 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_for[0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 + i1 and 0 < i1 <= 10; [MemRef_C[i0] -> [11 + 12i0{{\]\]}} -> [Stmt_inner_for[i0, 3] -> Val_phi[{{\]\]}} : 0 <= i0 <= 2 } +; CHECK: After Statements { +; CHECK-NEXT: Stmt_outer_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_outer_body[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Stmt_inner_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 4 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_inner_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_body[i0, i1] -> MemRef_sum[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_body[i0, i1] -> MemRef_C[i0] : i0 <= 2 and i1 >= 0 and 4 - 4i0 <= i1 <= 2; Stmt_inner_body[0, i1] -> MemRef_C[0] : 0 <= i1 <= 2 }; +; CHECK-NEXT: Stmt_inner_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_inc[i0, i1] -> MemRef_sum[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Stmt_inner_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_inner_exit[i0] -> MemRef_C[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_exit[i0] -> MemRef_C[i0] : 0 <= i0 <= 2 }; +; CHECK-NEXT: } Index: test/DeLICM/licm_potential_store.ll =================================================================== --- /dev/null +++ test/DeLICM/licm_potential_store.ll @@ -0,0 +1,148 @@ +; RUN: opt %loadPolly -basicaa -sroa -instcombine -simplifycfg -tailcallopt \ +; RUN: -simplifycfg -reassociate -loop-rotate -instcombine -indvars \ +; RUN: -polly-prepare -polly-delicm -analyze < %s \ +; RUN: \ +; RUN: | FileCheck %s --check-prefix=NOLICM +; +; RUN: opt %loadPolly -basicaa -sroa -instcombine -simplifycfg -tailcallopt \ +; RUN: -simplifycfg -reassociate -loop-rotate -instcombine -indvars -licm \ +; RUN: -polly-prepare -polly-delicm -analyze < %s \ +; RUN: \ +; RUN: | FileCheck %s --check-prefix=LICM +; +; void foo(int n, float A[static const restrict n], float x) { +; // (0) +; for (int i = 0; i < 5; i += 1) { +; for (int j = 0; j < n; j += 1) { +; x = 7; // (1) +; } +; A[0] = x; // (3) +; } +; // (4) +; } + + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(i32 %n, float* noalias nonnull %A, float %x) { +entry: + %n.addr = alloca i32, align 4 + %A.addr = alloca float*, align 8 + %x.addr = alloca float, align 4 + %i = alloca i32, align 4 + %j = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + store float* %A, float** %A.addr, align 8 + store float %x, float* %x.addr, align 4 + %tmp = load i32, i32* %n.addr, align 4 + %tmp1 = zext i32 %tmp to i64 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc.4, %entry + %tmp2 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %tmp2, 5 + br i1 %cmp, label %for.body, label %for.end.6 + +for.body: ; preds = %for.cond + store i32 0, i32* %j, align 4 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %tmp3 = load i32, i32* %j, align 4 + %tmp4 = load i32, i32* %n.addr, align 4 + %cmp2 = icmp slt i32 %tmp3, %tmp4 + br i1 %cmp2, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + store float 7.000000e+00, float* %x.addr, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %tmp5 = load i32, i32* %j, align 4 + %add = add nsw i32 %tmp5, 1 + store i32 %add, i32* %j, align 4 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + %tmp6 = load float, float* %x.addr, align 4 + %tmp7 = load float*, float** %A.addr, align 8 + %arrayidx = getelementptr inbounds float, float* %tmp7, i64 0 + store float %tmp6, float* %arrayidx, align 4 + br label %for.inc.4 + +for.inc.4: ; preds = %for.end + %tmp8 = load i32, i32* %i, align 4 + %add5 = add nsw i32 %tmp8, 1 + store i32 %add5, i32* %i, align 4 + br label %for.cond + +for.end.6: ; preds = %for.cond + ret void +} + +; CHECK: Statements { +; CHECK: Stmt_for_end +; CHECK: } + + +; NOLICM: Original zone: +; NOLICM-NEXT: Lifetime: [n] -> { [MemRef_A[0] -> [i1, i2, i3{{\]\]}} -> [Stmt_for_end[4] -> Val_x_addr_1_lcssa[{{\]\]}} : i1 >= 5; [MemRef_A[0] -> [4, i2, i3{{\]\]}} -> [Stmt_for_end[4] -> Val_x_addr_1_lcssa[{{\]\]}} : i2 >= 3; [MemRef_A[0] -> [4, 2, i3{{\]\]}} -> [Stmt_for_end[4] -> Val_x_addr_1_lcssa[{{\]\]}} : i3 > 0; [MemRef_A[0] -> [i1, i2, i3{{\]\]}} -> Undef[] : i1 <= 4 and (i1 <= 3 or (i1 >= 0 and i2 <= 1)); [MemRef_A[0] -> [i1, 2, i3{{\]\]}} -> Undef[] : 0 <= i1 <= 4 and i3 <= 0 } + Unknown +; NOLICM-NEXT: Written : [n] -> { [MemRef_A[0] -> [i1, 2, 0{{\]\]}} -> [Stmt_for_end[i1] -> Val_x_addr_1_lcssa[{{\]\]}} : 0 <= i1 <= 4 } +; NOLICM: Mapped scalars { +; NOLICM-NEXT: } +; NOLICM: After zone: +; NOLICM-NEXT: Lifetime: [n] -> { [MemRef_A[0] -> [i1, i2, i3{{\]\]}} -> [Stmt_for_end[4] -> Val_x_addr_1_lcssa[{{\]\]}} : i1 >= 5; [MemRef_A[0] -> [4, i2, i3{{\]\]}} -> [Stmt_for_end[4] -> Val_x_addr_1_lcssa[{{\]\]}} : i2 >= 3; [MemRef_A[0] -> [4, 2, i3{{\]\]}} -> [Stmt_for_end[4] -> Val_x_addr_1_lcssa[{{\]\]}} : i3 > 0; [MemRef_A[0] -> [i1, i2, i3{{\]\]}} -> Undef[] : i1 <= 4 and (i1 <= 3 or (i1 >= 0 and i2 <= 1)); [MemRef_A[0] -> [i1, 2, i3{{\]\]}} -> Undef[] : 0 <= i1 <= 4 and i3 <= 0 } + Unknown +; NOLICM-NEXT: Written : [n] -> { [MemRef_A[0] -> [i1, 2, 0{{\]\]}} -> [Stmt_for_end[i1] -> Val_x_addr_1_lcssa[{{\]\]}} : 0 <= i1 <= 4 } +; NOLICM: After Statements { +; NOLICM-NEXT: Stmt_for_cond_1_preheader +; NOLICM-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; NOLICM-NEXT: [n] -> { Stmt_for_cond_1_preheader[i0] -> MemRef_x_addr_04__phi[] }; +; NOLICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; NOLICM-NEXT: [n] -> { Stmt_for_cond_1_preheader[i0] -> MemRef_x_addr_1__phi[] }; +; NOLICM-NEXT: Stmt_for_cond_1 +; NOLICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; NOLICM-NEXT: [n] -> { Stmt_for_cond_1[i0, i1] -> MemRef_x_addr_1__phi[] }; +; NOLICM-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; NOLICM-NEXT: [n] -> { Stmt_for_cond_1[i0, i1] -> MemRef_x_addr_1__phi[] }; +; NOLICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; NOLICM-NEXT: [n] -> { Stmt_for_cond_1[i0, i1] -> MemRef_x_addr_1_lcssa__phi[] }; +; NOLICM-NEXT: Stmt_for_end +; NOLICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; NOLICM-NEXT: [n] -> { Stmt_for_end[i0] -> MemRef_x_addr_04__phi[] }; +; NOLICM-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; NOLICM-NEXT: [n] -> { Stmt_for_end[i0] -> MemRef_x_addr_1_lcssa__phi[] }; +; NOLICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; NOLICM-NEXT: [n] -> { Stmt_for_end[i0] -> MemRef_A[0] }; +; NOLICM-NEXT: } + +; LICM: Original zone: +; LICM-NEXT: Lifetime: [n] -> { } + Unknown +; LICM-NEXT: Written : [n] -> { } +; LICM: Mapped scalars { +; LICM-NEXT: } +; LICM: After zone: +; LICM-NEXT: Lifetime: [n] -> { } + Unknown +; LICM-NEXT: Written : [n] -> { } +; LICM: After Statements { +; LICM-NEXT: Stmt_for_cond_1_preheader +; LICM-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; LICM-NEXT: [n] -> { Stmt_for_cond_1_preheader[i0] -> MemRef_x_addr_04__phi[] }; +; LICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; LICM-NEXT: [n] -> { Stmt_for_cond_1_preheader[i0] -> MemRef_x_addr_1__phi[] }; +; LICM-NEXT: Stmt_for_cond_1 +; LICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; LICM-NEXT: [n] -> { Stmt_for_cond_1[i0, i1] -> MemRef_x_addr_1__phi[] }; +; LICM-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; LICM-NEXT: [n] -> { Stmt_for_cond_1[i0, i1] -> MemRef_x_addr_1__phi[] }; +; LICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; LICM-NEXT: [n] -> { Stmt_for_cond_1[i0, i1] -> MemRef_x_addr_1_lcssa__phi[] }; +; LICM-NEXT: Stmt_for_end +; LICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; LICM-NEXT: [n] -> { Stmt_for_end[i0] -> MemRef_x_addr_04__phi[] }; +; LICM-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; LICM-NEXT: [n] -> { Stmt_for_end[i0] -> MemRef_x_addr_1_lcssa__phi[] }; +; LICM-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; LICM-NEXT: [n] -> { Stmt_for_end[i0] -> MemRef_x_addr_1_lcssa[] }; +; LICM-NEXT: } Index: test/DeLICM/licm_reduction_nested.ll =================================================================== --- test/DeLICM/licm_reduction_nested.ll +++ test/DeLICM/licm_reduction_nested.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s --check-prefix=NOLICM +; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s --check-prefix=LICM ; ; XFAIL: * ; @@ -55,14 +55,3 @@ for.end.8: ; preds = %for.cond ret void } - - -; CHECK: Statements { -; CHECK: Stmt_for_body_3 -; CHECK-DAG: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body_3[i0, i1] -> MemRef_B[i0 + i1] }; -; CHECK-DAG: ReadAccess := [Reduction Type: +] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body_3[i0, i1] -> MemRef_A[j] }; -; CHECK-DAG: MustWriteAccess := [Reduction Type: +] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body_3[i0, i1] -> MemRef_A[j] }; -; CHECK: } Index: test/DeLICM/licm_store.ll =================================================================== --- /dev/null +++ test/DeLICM/licm_store.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s --check-prefix=NOLICM +; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s --check-prefix=LICM +; +; XFAIL: * +; +; void foo(float *restrict A, float *restrict B, long j) { +; for (long i = 0; i < 100; i++) +; A[j] = B[i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(float* noalias %A, float* noalias %B, i64 %j) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i64 %i.0, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds float, float* %B, i64 %i.0 + %tmp = bitcast float* %arrayidx to i32* + %tmp1 = load i32, i32* %tmp, align 4 + %arrayidx1 = getelementptr inbounds float, float* %A, i64 %j + %tmp2 = bitcast float* %arrayidx1 to i32* + store i32 %tmp1, i32* %tmp2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nuw nsw i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; CHECK: Printing analysis 'Basic Alias Analysis (stateless AA impl)' for function 'foo': +; CHECK-NEXT: Pass::print not implemented for pass: 'Basic Alias Analysis (stateless AA impl)'! +; CHECK-NEXT: Printing analysis 'Rotate Loops': +; CHECK-NEXT: Pass::print not implemented for pass: 'Rotate Loops'! +; CHECK-NEXT: Printing analysis 'Induction Variable Simplification': +; CHECK-NEXT: Pass::print not implemented for pass: 'Induction Variable Simplification'! +; CHECK-NEXT: Printing analysis 'Polly - Prepare code for polly' for function 'foo': +; CHECK-NEXT: Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.body => for.end' in function 'foo': +; CHECK-NEXT: Function: foo +; CHECK-NEXT: Region: %for.body---%for.end +; CHECK-NEXT: Max Loop Depth: 1 +; CHECK-NEXT: Invariant Accesses: { +; CHECK-NEXT: } +; CHECK-NEXT: Context: +; CHECK-NEXT: [j] -> { : -9223372036854775808 <= j <= 9223372036854775807 } +; CHECK-NEXT: Assumed Context: +; CHECK-NEXT: [j] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [j] -> { : 1 = 0 } +; CHECK-NEXT: p0: %j +; CHECK-NEXT: Arrays { +; CHECK-NEXT: i32 MemRef_B[*]; // Element size 4 +; CHECK-NEXT: i32 MemRef_A[*]; // Element size 4 +; CHECK-NEXT: } +; CHECK-NEXT: Arrays (Bounds as pw_affs) { +; CHECK-NEXT: i32 MemRef_B[*]; // Element size 4 +; CHECK-NEXT: i32 MemRef_A[*]; // Element size 4 +; CHECK-NEXT: } +; CHECK-NEXT: Alias Groups (0): +; CHECK-NEXT: n/a +; CHECK-NEXT: Statements { +; CHECK-NEXT: Stmt_for_body +; CHECK-NEXT: Domain := +; CHECK-NEXT: [j] -> { Stmt_for_body[i0] : 0 <= i0 <= 99 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [j] -> { Stmt_for_body[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [j] -> { Stmt_for_body[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [j] -> { Stmt_for_body[i0] -> MemRef_A[j] }; +; CHECK-NEXT: } +; CHECK-NEXT: Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'entry => ' in function 'foo': +; CHECK-NEXT: Invalid Scop! Index: test/DeLICM/macroblock0.ll =================================================================== --- /dev/null +++ test/DeLICM/macroblock0.ll @@ -0,0 +1,162 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s | FileCheck %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Applications/JM/ldecod/macroblock.c + +; ModuleID = '/tmp/bugpoint-il6f3dtt/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-4da5cc1.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120 = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.Slice.10.120.318.384.538.736.934.1132.1242.1440.1682.1880.2034.2144.2298.2496.2760.2804.2826.2848.2870.2914.2936.2958.2980.3024.3046.3068.3090.3134.3156.3178.3244.3266.3332.3354.3376.3464.3508.3574.3596.3618.3684.3772.3838.3904.3926.3970.3992.4014.4102.4388.4454.4630.4696.4784.4960.5114.5312.5554.5620.5796.5862.5906.6148.6456.6478.7182.7204.7226.7248.7270.7292.8436.8480.8502.8524.8568.8854.8876.9096.9116*, %struct.macroblock.11.121.319.385.539.737.935.1133.1243.1441.1683.1881.2035.2145.2299.2497.2761.2805.2827.2849.2871.2915.2937.2959.2981.3025.3047.3069.3091.3135.3157.3179.3245.3267.3333.3355.3377.3465.3509.3575.3597.3619.3685.3773.3839.3905.3927.3971.3993.4015.4103.4389.4455.4631.4697.4785.4961.5115.5313.5555.5621.5797.5863.5907.6149.6457.6479.7183.7205.7227.7249.7271.7293.8437.8481.8503.8525.8569.8855.8877.9097.9117*, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.0.110.308.374.528.726.924.1122.1232.1430.1672.1870.2024.2134.2288.2486.2750.2794.2816.2838.2860.2904.2926.2948.2970.3014.3036.3058.3080.3124.3146.3168.3234.3256.3322.3344.3366.3454.3498.3564.3586.3608.3674.3762.3828.3894.3916.3960.3982.4004.4092.4378.4444.4620.4686.4774.4950.5104.5302.5544.5610.5786.5852.5896.6138.6446.6468.7172.7194.7216.7238.7260.7282.8426.8470.8492.8514.8558.8844.8866.9086.9118*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i64, i64, %struct.timeb.12.122.320.386.540.738.936.1134.1244.1442.1684.1882.2036.2146.2300.2498.2762.2806.2828.2850.2872.2916.2938.2960.2982.3026.3048.3070.3092.3136.3158.3180.3246.3268.3334.3356.3378.3466.3510.3576.3598.3620.3686.3774.3840.3906.3928.3972.3994.4016.4104.4390.4456.4632.4698.4786.4962.5116.5314.5556.5622.5798.5864.5908.6150.6458.6480.7184.7206.7228.7250.7272.7294.8438.8482.8504.8526.8570.8856.8878.9098.9119, %struct.timeb.12.122.320.386.540.738.936.1134.1244.1442.1684.1882.2036.2146.2300.2498.2762.2806.2828.2850.2872.2916.2938.2960.2982.3026.3048.3070.3092.3136.3158.3180.3246.3268.3334.3356.3378.3466.3510.3576.3598.3620.3686.3774.3840.3906.3928.3972.3994.4016.4104.4390.4456.4632.4698.4786.4962.5116.5314.5556.5622.5798.5864.5908.6150.6458.6480.7184.7206.7228.7250.7272.7294.8438.8482.8504.8526.8570.8856.8878.9098.9119, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.Slice.10.120.318.384.538.736.934.1132.1242.1440.1682.1880.2034.2144.2298.2496.2760.2804.2826.2848.2870.2914.2936.2958.2980.3024.3046.3068.3090.3134.3156.3178.3244.3266.3332.3354.3376.3464.3508.3574.3596.3618.3684.3772.3838.3904.3926.3970.3992.4014.4102.4388.4454.4630.4696.4784.4960.5114.5312.5554.5620.5796.5862.5906.6148.6456.6478.7182.7204.7226.7248.7270.7292.8436.8480.8502.8524.8568.8854.8876.9096.9116 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.datapartition.5.115.313.379.533.731.929.1127.1237.1435.1677.1875.2029.2139.2293.2491.2755.2799.2821.2843.2865.2909.2931.2953.2975.3019.3041.3063.3085.3129.3151.3173.3239.3261.3327.3349.3371.3459.3503.3569.3591.3613.3679.3767.3833.3899.3921.3965.3987.4009.4097.4383.4449.4625.4691.4779.4955.5109.5307.5549.5615.5791.5857.5901.6143.6451.6473.7177.7199.7221.7243.7265.7287.8431.8475.8497.8519.8563.8849.8871.9091.9111*, %struct.MotionInfoContexts.7.117.315.381.535.733.931.1129.1239.1437.1679.1877.2031.2141.2295.2493.2757.2801.2823.2845.2867.2911.2933.2955.2977.3021.3043.3065.3087.3131.3153.3175.3241.3263.3329.3351.3373.3461.3505.3571.3593.3615.3681.3769.3835.3901.3923.3967.3989.4011.4099.4385.4451.4627.4693.4781.4957.5111.5309.5551.5617.5793.5859.5903.6145.6453.6475.7179.7201.7223.7245.7267.7289.8433.8477.8499.8521.8565.8851.8873.9093.9113*, %struct.TextureInfoContexts.8.118.316.382.536.734.932.1130.1240.1438.1680.1878.2032.2142.2296.2494.2758.2802.2824.2846.2868.2912.2934.2956.2978.3022.3044.3066.3088.3132.3154.3176.3242.3264.3330.3352.3374.3462.3506.3572.3594.3616.3682.3770.3836.3902.3924.3968.3990.4012.4100.4386.4452.4628.4694.4782.4958.5112.5310.5552.5618.5794.5860.5904.6146.6454.6476.7180.7202.7224.7246.7268.7290.8434.8478.8500.8522.8566.8852.8874.9094.9114*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120*, %struct.inp_par.9.119.317.383.537.735.933.1131.1241.1439.1681.1879.2033.2143.2297.2495.2759.2803.2825.2847.2869.2913.2935.2957.2979.3023.3045.3067.3089.3133.3155.3177.3243.3265.3331.3353.3375.3463.3507.3573.3595.3617.3683.3771.3837.3903.3925.3969.3991.4013.4101.4387.4453.4629.4695.4783.4959.5113.5311.5553.5619.5795.5861.5905.6147.6455.6477.7181.7203.7225.7247.7269.7291.8435.8479.8501.8523.8567.8853.8875.9095.9115*)*, i32, i32, i32, i32 } +%struct.datapartition.5.115.313.379.533.731.929.1127.1237.1435.1677.1875.2029.2139.2293.2491.2755.2799.2821.2843.2865.2909.2931.2953.2975.3019.3041.3063.3085.3129.3151.3173.3239.3261.3327.3349.3371.3459.3503.3569.3591.3613.3679.3767.3833.3899.3921.3965.3987.4009.4097.4383.4449.4625.4691.4779.4955.5109.5307.5549.5615.5791.5857.5901.6143.6451.6473.7177.7199.7221.7243.7265.7287.8431.8475.8497.8519.8563.8849.8871.9091.9111 = type { %struct.Bitstream.2.112.310.376.530.728.926.1124.1234.1432.1674.1872.2026.2136.2290.2488.2752.2796.2818.2840.2862.2906.2928.2950.2972.3016.3038.3060.3082.3126.3148.3170.3236.3258.3324.3346.3368.3456.3500.3566.3588.3610.3676.3764.3830.3896.3918.3962.3984.4006.4094.4380.4446.4622.4688.4776.4952.5106.5304.5546.5612.5788.5854.5898.6140.6448.6470.7174.7196.7218.7240.7262.7284.8428.8472.8494.8516.8560.8846.8868.9088.9108*, %struct.DecodingEnvironment.3.113.311.377.531.729.927.1125.1235.1433.1675.1873.2027.2137.2291.2489.2753.2797.2819.2841.2863.2907.2929.2951.2973.3017.3039.3061.3083.3127.3149.3171.3237.3259.3325.3347.3369.3457.3501.3567.3589.3611.3677.3765.3831.3897.3919.3963.3985.4007.4095.4381.4447.4623.4689.4777.4953.5107.5305.5547.5613.5789.5855.5899.6141.6449.6471.7175.7197.7219.7241.7263.7285.8429.8473.8495.8517.8561.8847.8869.9089.9109, i32 (%struct.syntaxelement.4.114.312.378.532.730.928.1126.1236.1434.1676.1874.2028.2138.2292.2490.2754.2798.2820.2842.2864.2908.2930.2952.2974.3018.3040.3062.3084.3128.3150.3172.3238.3260.3326.3348.3370.3458.3502.3568.3590.3612.3678.3766.3832.3898.3920.3964.3986.4008.4096.4382.4448.4624.4690.4778.4954.5108.5306.5548.5614.5790.5856.5900.6142.6450.6472.7176.7198.7220.7242.7264.7286.8430.8474.8496.8518.8562.8848.8870.9090.9110*, %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120*, %struct.datapartition.5.115.313.379.533.731.929.1127.1237.1435.1677.1875.2029.2139.2293.2491.2755.2799.2821.2843.2865.2909.2931.2953.2975.3019.3041.3063.3085.3129.3151.3173.3239.3261.3327.3349.3371.3459.3503.3569.3591.3613.3679.3767.3833.3899.3921.3965.3987.4009.4097.4383.4449.4625.4691.4779.4955.5109.5307.5549.5615.5791.5857.5901.6143.6451.6473.7177.7199.7221.7243.7265.7287.8431.8475.8497.8519.8563.8849.8871.9091.9111*)* } +%struct.Bitstream.2.112.310.376.530.728.926.1124.1234.1432.1674.1872.2026.2136.2290.2488.2752.2796.2818.2840.2862.2906.2928.2950.2972.3016.3038.3060.3082.3126.3148.3170.3236.3258.3324.3346.3368.3456.3500.3566.3588.3610.3676.3764.3830.3896.3918.3962.3984.4006.4094.4380.4446.4622.4688.4776.4952.5106.5304.5546.5612.5788.5854.5898.6140.6448.6470.7174.7196.7218.7240.7262.7284.8428.8472.8494.8516.8560.8846.8868.9088.9108 = type { i32, i32, i32, i32, i8*, i32 } +%struct.DecodingEnvironment.3.113.311.377.531.729.927.1125.1235.1433.1675.1873.2027.2137.2291.2489.2753.2797.2819.2841.2863.2907.2929.2951.2973.3017.3039.3061.3083.3127.3149.3171.3237.3259.3325.3347.3369.3457.3501.3567.3589.3611.3677.3765.3831.3897.3919.3963.3985.4007.4095.4381.4447.4623.4689.4777.4953.5107.5305.5547.5613.5789.5855.5899.6141.6449.6471.7175.7197.7219.7241.7263.7285.8429.8473.8495.8517.8561.8847.8869.9089.9109 = type { i32, i32, i32, i32, i32, i8*, i32* } +%struct.syntaxelement.4.114.312.378.532.730.928.1126.1236.1434.1676.1874.2028.2138.2292.2490.2754.2798.2820.2842.2864.2908.2930.2952.2974.3018.3040.3062.3084.3128.3150.3172.3238.3260.3326.3348.3370.3458.3502.3568.3590.3612.3678.3766.3832.3898.3920.3964.3986.4008.4096.4382.4448.4624.4690.4778.4954.5108.5306.5548.5614.5790.5856.5900.6142.6450.6472.7176.7198.7220.7242.7264.7286.8430.8474.8496.8518.8562.8848.8870.9090.9110 = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.syntaxelement.4.114.312.378.532.730.928.1126.1236.1434.1676.1874.2028.2138.2292.2490.2754.2798.2820.2842.2864.2908.2930.2952.2974.3018.3040.3062.3084.3128.3150.3172.3238.3260.3326.3348.3370.3458.3502.3568.3590.3612.3678.3766.3832.3898.3920.3964.3986.4008.4096.4382.4448.4624.4690.4778.4954.5108.5306.5548.5614.5790.5856.5900.6142.6450.6472.7176.7198.7220.7242.7264.7286.8430.8474.8496.8518.8562.8848.8870.9090.9110*, %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120*, %struct.DecodingEnvironment.3.113.311.377.531.729.927.1125.1235.1433.1675.1873.2027.2137.2291.2489.2753.2797.2819.2841.2863.2907.2929.2951.2973.3017.3039.3061.3083.3127.3149.3171.3237.3259.3325.3347.3369.3457.3501.3567.3589.3611.3677.3765.3831.3897.3919.3963.3985.4007.4095.4381.4447.4623.4689.4777.4953.5107.5305.5547.5613.5789.5855.5899.6141.6449.6471.7175.7197.7219.7241.7263.7285.8429.8473.8495.8517.8561.8847.8869.9089.9109*)* } +%struct.MotionInfoContexts.7.117.315.381.535.733.931.1129.1239.1437.1679.1877.2031.2141.2295.2493.2757.2801.2823.2845.2867.2911.2933.2955.2977.3021.3043.3065.3087.3131.3153.3175.3241.3263.3329.3351.3373.3461.3505.3571.3593.3615.3681.3769.3835.3901.3923.3967.3989.4011.4099.4385.4451.4627.4693.4781.4957.5111.5309.5551.5617.5793.5859.5903.6145.6453.6475.7179.7201.7223.7245.7267.7289.8433.8477.8499.8521.8565.8851.8873.9093.9113 = type { [4 x [11 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [2 x [9 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [2 x [10 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [2 x [6 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [4 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112], [4 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112], [3 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112] } +%struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112 = type { i16, i8 } +%struct.TextureInfoContexts.8.118.316.382.536.734.932.1130.1240.1438.1680.1878.2032.2142.2296.2494.2758.2802.2824.2846.2868.2912.2934.2956.2978.3022.3044.3066.3088.3132.3154.3176.3242.3264.3330.3352.3374.3462.3506.3572.3594.3616.3682.3770.3836.3902.3924.3968.3990.4012.4100.4386.4452.4628.4694.4782.4958.5112.5310.5552.5618.5794.5860.5904.6146.6454.6476.7180.7202.7224.7246.7268.7290.8434.8478.8500.8522.8566.8852.8874.9094.9114 = type { [2 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112], [4 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112], [3 x [4 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [10 x [4 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [10 x [15 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [10 x [15 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [10 x [5 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [10 x [5 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [10 x [15 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]], [10 x [15 x %struct.BiContextType.6.116.314.380.534.732.930.1128.1238.1436.1678.1876.2030.2140.2294.2492.2756.2800.2822.2844.2866.2910.2932.2954.2976.3020.3042.3064.3086.3130.3152.3174.3240.3262.3328.3350.3372.3460.3504.3570.3592.3614.3680.3768.3834.3900.3922.3966.3988.4010.4098.4384.4450.4626.4692.4780.4956.5110.5308.5550.5616.5792.5858.5902.6144.6452.6474.7178.7200.7222.7244.7266.7288.8432.8476.8498.8520.8564.8850.8872.9092.9112]] } +%struct.inp_par.9.119.317.383.537.735.933.1131.1241.1439.1681.1879.2033.2143.2297.2495.2759.2803.2825.2847.2869.2913.2935.2957.2979.3023.3045.3067.3089.3133.3155.3177.3243.3265.3331.3353.3375.3463.3507.3573.3595.3617.3683.3771.3837.3903.3925.3969.3991.4013.4101.4387.4453.4629.4695.4783.4959.5113.5311.5553.5619.5795.5861.5905.6147.6455.6477.7181.7203.7225.7247.7269.7291.8435.8479.8501.8523.8567.8853.8875.9095.9115 = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.macroblock.11.121.319.385.539.737.935.1133.1243.1441.1683.1881.2035.2145.2299.2497.2761.2805.2827.2849.2871.2915.2937.2959.2981.3025.3047.3069.3091.3135.3157.3179.3245.3267.3333.3355.3377.3465.3509.3575.3597.3619.3685.3773.3839.3905.3927.3971.3993.4015.4103.4389.4455.4631.4697.4785.4961.5115.5313.5555.5621.5797.5863.5907.6149.6457.6479.7183.7205.7227.7249.7271.7293.8437.8481.8503.8525.8569.8855.8877.9097.9117 = type { i32, [2 x i32], i32, i32, %struct.macroblock.11.121.319.385.539.737.935.1133.1243.1441.1683.1881.2035.2145.2299.2497.2761.2805.2827.2849.2871.2915.2937.2959.2981.3025.3047.3069.3091.3135.3157.3179.3245.3267.3333.3355.3377.3465.3509.3575.3597.3619.3685.3773.3839.3905.3927.3971.3993.4015.4103.4389.4455.4631.4697.4785.4961.5115.5313.5555.5621.5797.5863.5907.6149.6457.6479.7183.7205.7227.7249.7271.7293.8437.8481.8503.8525.8569.8855.8877.9097.9117*, %struct.macroblock.11.121.319.385.539.737.935.1133.1243.1441.1683.1881.2035.2145.2299.2497.2761.2805.2827.2849.2871.2915.2937.2959.2981.3025.3047.3069.3091.3135.3157.3179.3245.3267.3333.3355.3377.3465.3509.3575.3597.3619.3685.3773.3839.3905.3927.3971.3993.4015.4103.4389.4455.4631.4697.4785.4961.5115.5313.5555.5621.5797.5863.5907.6149.6457.6479.7183.7205.7227.7249.7271.7293.8437.8481.8503.8525.8569.8855.8877.9097.9117*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.DecRefPicMarking_s.0.110.308.374.528.726.924.1122.1232.1430.1672.1870.2024.2134.2288.2486.2750.2794.2816.2838.2860.2904.2926.2948.2970.3014.3036.3058.3080.3124.3146.3168.3234.3256.3322.3344.3366.3454.3498.3564.3586.3608.3674.3762.3828.3894.3916.3960.3982.4004.4092.4378.4444.4620.4686.4774.4950.5104.5302.5544.5610.5786.5852.5896.6138.6446.6468.7172.7194.7216.7238.7260.7282.8426.8470.8492.8514.8558.8844.8866.9086.9118 = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.0.110.308.374.528.726.924.1122.1232.1430.1672.1870.2024.2134.2288.2486.2750.2794.2816.2838.2860.2904.2926.2948.2970.3014.3036.3058.3080.3124.3146.3168.3234.3256.3322.3344.3366.3454.3498.3564.3586.3608.3674.3762.3828.3894.3916.3960.3982.4004.4092.4378.4444.4620.4686.4774.4950.5104.5302.5544.5610.5786.5852.5896.6138.6446.6468.7172.7194.7216.7238.7260.7282.8426.8470.8492.8514.8558.8844.8866.9086.9118* } +%struct.timeb.12.122.320.386.540.738.936.1134.1244.1442.1684.1882.2036.2146.2300.2498.2762.2806.2828.2850.2872.2916.2938.2960.2982.3026.3048.3070.3092.3136.3158.3180.3246.3268.3334.3356.3378.3466.3510.3576.3598.3620.3686.3774.3840.3906.3928.3972.3994.4016.4104.4390.4456.4632.4698.4786.4962.5116.5314.5556.5622.5798.5864.5908.6150.6458.6480.7184.7206.7228.7250.7272.7294.8438.8482.8504.8526.8570.8856.8878.9098.9119 = type { i64, i16, i16, i16 } + +; Function Attrs: nounwind uwtable +define void @readCBPandCoeffsFromNAL(%struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120* %img) local_unnamed_addr #0 { +entry: + %qp_rem_uv = alloca [2 x i32], align 4 + %m6 = alloca [4 x i32], align 16 + %0 = load i32, i32* undef, align 8 + %switch.tableidx = add i32 %0, -9 + %1 = icmp ult i32 %switch.tableidx, 6 + %switch.downshift = lshr i6 -5, undef + %2 = and i6 %switch.downshift, 1 + %switch.masked = icmp ne i6 %2, 0 + %3 = and i1 %1, %switch.masked + switch i32 undef, label %if.end431 [ + i32 10, label %if.then263 + i32 14, label %if.then263 + ] + +land.lhs.true142: ; No predecessors! + unreachable + +if.then263: ; preds = %entry, %entry + unreachable + +if.end431: ; preds = %entry + %arrayidx2163 = getelementptr inbounds [4 x i32], [4 x i32]* %m6, i64 0, i64 0 + br label %for.body1071 + +for.body1071: ; preds = %for.inc2450.for.body1071_crit_edge, %if.end431 + %indvars.iv53 = phi i64 [ 0, %for.inc2450.for.body1071_crit_edge ], [ 4, %if.end431 ] + switch i32 undef, label %if.else1893 [ + i32 1, label %if.then1076 + i32 2, label %if.then1391 + ] + +if.then1076: ; preds = %for.body1071 + unreachable + +if.then1391: ; preds = %for.body1071 + unreachable + +if.else1893: ; preds = %for.body1071 + br i1 undef, label %if.then1903, label %for.cond1947.preheader + +for.cond1947.preheader: ; preds = %if.else1893 + %4 = add nuw nsw i64 0, 7 + br label %for.cond2067.preheader + +if.then1903: ; preds = %if.else1893 + unreachable + +for.cond2143.preheader.lr.ph: ; preds = %for.cond2067.preheader + %arrayidx2195 = getelementptr inbounds [2 x i32], [2 x i32]* %qp_rem_uv, i64 0, i64 0 + %5 = load i32, i32* %arrayidx2195, align 4 + br label %for.cond2143.preheader + +for.cond2067.preheader: ; preds = %for.cond2067.preheader, %for.cond1947.preheader + %indvars.iv55 = phi i64 [ %indvars.iv.next56, %for.cond2067.preheader ], [ %indvars.iv53, %for.cond1947.preheader ] + %arrayidx2077 = getelementptr inbounds %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120, %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120* %img, i64 0, i32 28, i64 0, i64 %indvars.iv55, i64 0, i64 0 + %arrayidx2077.3 = getelementptr inbounds %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120, %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120* %img, i64 0, i32 28, i64 3, i64 %indvars.iv55, i64 0, i64 0 + store i32 undef, i32* %arrayidx2077, align 8 + store i32 undef, i32* %arrayidx2077.3, align 8 + %indvars.iv.next56 = add nuw nsw i64 %indvars.iv55, 1 + %cmp2059.not = icmp slt i64 %indvars.iv55, %4 + br i1 %cmp2059.not, label %for.cond2067.preheader, label %for.cond2143.preheader.lr.ph + +for.cond2143.preheader: ; preds = %for.inc2445, %for.cond2143.preheader.lr.ph + %indvars.iv73 = phi i64 [ %indvars.iv.next74, %for.inc2445 ], [ 0, %for.cond2143.preheader.lr.ph ] + store i32 undef, i32* %arrayidx2163, align 16 + br label %for.body2179 + +for.body2179: ; preds = %for.inc2442.for.body2179_crit_edge, %for.cond2143.preheader + %indvars.iv61 = phi i64 [ %indvars.iv.next62, %for.inc2442.for.body2179_crit_edge ], [ 0, %for.cond2143.preheader ] + br i1 %3, label %if.then2188, label %if.else2255 + +if.then2188: ; preds = %for.body2179 + %arrayidx2254 = getelementptr inbounds %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120, %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120* %img, i64 0, i32 28, i64 %indvars.iv73, i64 0, i64 0, i64 0 + store i32 undef, i32* %arrayidx2254, align 8 + br label %for.inc2442 + +if.else2255: ; preds = %for.body2179 + %6 = add nuw nsw i64 %indvars.iv61, 0 + %arrayidx2288 = getelementptr inbounds %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120, %struct.img_par.13.123.321.387.541.739.937.1135.1245.1443.1685.1883.2037.2147.2301.2499.2763.2807.2829.2851.2873.2917.2939.2961.2983.3027.3049.3071.3093.3137.3159.3181.3247.3269.3335.3357.3379.3467.3511.3577.3599.3621.3687.3775.3841.3907.3929.3973.3995.4017.4105.4391.4457.4633.4699.4787.4963.5117.5315.5557.5623.5799.5865.5909.6151.6459.6481.7185.7207.7229.7251.7273.7295.8439.8483.8505.8527.8571.8857.8879.9099.9120* %img, i64 0, i32 28, i64 %indvars.iv73, i64 %6, i64 0, i64 0 + store i32 undef, i32* %arrayidx2288, align 8 + br label %for.inc2442 + +for.inc2442: ; preds = %if.else2255, %if.then2188 + %indvars.iv.next62 = add nuw nsw i64 %indvars.iv61, 1 + %exitcond72 = icmp eq i64 %indvars.iv.next62, 2 + br i1 %exitcond72, label %for.inc2445, label %for.inc2442.for.body2179_crit_edge + +for.inc2442.for.body2179_crit_edge: ; preds = %for.inc2442 + br label %for.body2179 + +for.inc2445: ; preds = %for.inc2442 + %indvars.iv.next74 = add nuw nsw i64 %indvars.iv73, 1 + %cmp2135.not = icmp sgt i64 %indvars.iv73, 2 + br i1 %cmp2135.not, label %for.inc2450.loopexit, label %for.cond2143.preheader + +for.inc2450.loopexit: ; preds = %for.inc2445 + br i1 undef, label %for.inc2450.for.body1071_crit_edge, label %if.end2453 + +for.inc2450.for.body1071_crit_edge: ; preds = %for.inc2450.loopexit + br label %for.body1071 + +if.end2453: ; preds = %for.inc2450.loopexit + ret void +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 277335)"} + + +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: null + Unknown +; CHECK-NEXT: Written : null +; CHECK: Mapped scalars { +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: null + Unknown +; CHECK-NEXT: Written : null +; CHECK: After Statements { +; CHECK-NEXT: Stmt_for_cond2067_preheader +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [indvars_iv53, p_1, switch_downshift] -> { Stmt_for_cond2067_preheader[i0] -> MemRef_img[602 + 16indvars_iv53 + 16i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [indvars_iv53, p_1, switch_downshift] -> { Stmt_for_cond2067_preheader[i0] -> MemRef_img[1178 + 16indvars_iv53 + 16i0] }; +; CHECK-NEXT: Stmt_for_cond2143_preheader +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [indvars_iv53, p_1, switch_downshift] -> { Stmt_for_cond2143_preheader[i0] -> MemRef_m6[0] }; +; CHECK-NEXT: Stmt_if_then2188 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [indvars_iv53, p_1, switch_downshift] -> { Stmt_if_then2188[i0, i1] -> MemRef_img[602 + 192i0] }; +; CHECK-NEXT: Stmt_if_else2255 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [indvars_iv53, p_1, switch_downshift] -> { Stmt_if_else2255[i0, i1] -> MemRef_img[602 + 192i0 + 16i1] }; +; CHECK-NEXT: } Index: test/DeLICM/oggenc.ll =================================================================== --- /dev/null +++ test/DeLICM/oggenc.ll @@ -0,0 +1,57 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Applications/oggenc/oggenc.c + +; ModuleID = '/tmp/bugpoint-6hr85lwk/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-78c5572.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.vorbis_info_floor1.19.199.319.739.799.919.979.1159.1219.1279.1399.1459.1759.1879.1939.1999.2779.2839.2899.2959.4027.4195.4363.4475.4531.4568 = type { i32, [31 x i32], [16 x i32], [16 x i32], [16 x i32], [16 x [8 x i32]], i32, [65 x i32], float, float, float, float, float, i32 } + +; Function Attrs: nounwind uwtable +define void @floor1_encode(i32* %post) local_unnamed_addr #0 { +entry: + %0 = load %struct.vorbis_info_floor1.19.199.319.739.799.919.979.1159.1219.1279.1399.1459.1759.1879.1939.1999.2779.2839.2899.2959.4027.4195.4363.4475.4531.4568*, %struct.vorbis_info_floor1.19.199.319.739.799.919.979.1159.1219.1279.1399.1459.1759.1879.1939.1999.2779.2839.2899.2959.4027.4195.4363.4475.4531.4568** undef, align 8 + %1 = load i32, i32* undef, align 4 + %conv = sext i32 %1 to i64 + %mult = getelementptr inbounds %struct.vorbis_info_floor1.19.199.319.739.799.919.979.1159.1219.1279.1399.1459.1759.1879.1939.1999.2779.2839.2899.2959.4027.4195.4363.4475.4531.4568, %struct.vorbis_info_floor1.19.199.319.739.799.919.979.1159.1219.1279.1399.1459.1759.1879.1939.1999.2779.2839.2899.2959.4027.4195.4363.4475.4531.4568* %0, i64 0, i32 6 + br label %for.body + +for.body: ; preds = %sw.epilog, %entry + %arrayidx = getelementptr inbounds i32, i32* %post, i64 0 + %2 = load i32, i32* %arrayidx, align 4 + switch i32 0, label %sw.epilog [ + i32 1, label %sw.bb + i32 2, label %sw.bb4 + i32 3, label %sw.bb6 + i32 4, label %sw.bb7 + ] + +sw.bb: ; preds = %for.body + br label %sw.epilog + +sw.bb4: ; preds = %for.body + br label %sw.epilog + +sw.bb6: ; preds = %for.body + br label %sw.epilog + +sw.bb7: ; preds = %for.body + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb7, %sw.bb6, %sw.bb4, %sw.bb, %for.body + %and10 = and i32 %2, 32768 + store i32 undef, i32* %arrayidx, align 4 + %exitcond17 = icmp eq i64 0, %conv + br i1 %exitcond17, label %for.end, label %for.body + +for.end: ; preds = %sw.epilog + ret void +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 278052) (llvm/trunk 278053)"} Index: test/DeLICM/reduction.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction.ll @@ -0,0 +1,127 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A, int m) { +; double red = A[0]; +; for (int i = 0; i < m; i += 1) /* reduction loop */ +; red += 4.2; +; A[0] = red; +; } + +define void @func(double* noalias nonnull %A, i32 %m) { +entry: + br label %reduction.for + +; Domain: { Stmt_reduction_for[i0] : 0 <= i0 <= 4 }; +; Schedule: { Stmt_reduction_for[i0] -> [3i0] : 0 <= i0 <= 4 }; +; Read: { Stmt_reduction_for[i0] -> MemRef_phi__phi[] }; +; MustWrite: { Stmt_reduction_for[i0] -> MemRef_phi[] }; +reduction.for: + %i = phi i32 [0, %entry], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %entry], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + +; Domain: { Stmt_body[i0] : 0 <= i0 <= 3 }; +; Schedule: { Stmt_body[i0] -> [1 + 3i0] : 0 <= i0 <= 3 }; +; Read: { Stmt_body[i0] -> MemRef_phi[] }; +; MustWrite: { Stmt_body[i0] -> MemRef_add[] }; +body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + +; Domain: { Stmt_reduction_inc[i0] : 0 <= i0 <= 3 }; +; Schedule: { Stmt_reduction_inc[i0] -> [2 + 3i0] : 0 <= i0 <= 3 }; +; Read: { Stmt_reduction_inc[i0] -> MemRef_add[] }; +; MustWrite: { Stmt_reduction_inc[i0] -> MemRef_phi__phi[] }; +reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + +; Domain: { Stmt_reduction_exit[] }; +; Scatter: { Stmt_reduction_exit[] -> [13] }; +; Read: { Stmt_reduction_exit[] -> MemRef_phi[] }; +; MustWrite: { Stmt_reduction_exit[] -> MemRef_A[0] }; +reduction.exit: + store double %phi, double* %A + br label %return + +return: + ret void +} + +; [ 0] Stmt_reduction_for[0] +; ( 1) Def: %phi + +; [ 1] body[0] + +; [ 2] Stmt_reduction_inc[0] + +; [ 3] Stmt_reduction_for[1] +; ( 4) Def: %phi + +; [ 4] body[1] + +; [ 5] Stmt_reduction_inc[1] + +; [ 6] Stmt_reduction_for[2] +; ( 7) Def: %phi + +; [ 7] body[2] + +; [ 8] Stmt_reduction_inc[2] + +; [ 9] Stmt_reduction_for[3] +; (10) Def: %phi + +; [10] body[3] + +; [11] Stmt_reduction_inc[3] + + +; [12] Stmt_reduction_for[4] +; (13) Def: %phi + +; (< 13) Overwritten: A[0] (???) +; (< 13) NoUse: A[0] (???) + +; [13] reduction.exit[] +; MustOverwrite: A[0] +; (14) Known: A[0] = %phi + + +; CHECK: Schedule after flattening { +; CHECK-NEXT: { Stmt_body[i0] -> [1 + 3i0] } +; CHECK-NEXT: { Stmt_reduction_inc[i0] -> [2 + 3i0] } +; CHECK-NEXT: { Stmt_reduction_for[i0] -> [3i0] } +; CHECK-NEXT: { Stmt_reduction_exit[] -> [13] } +; CHECK-NEXT: } +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: { [MemRef_A[0] -> [i1{{\]\]}} -> [Stmt_reduction_for[4] -> Val_phi[{{\]\]}} : i1 >= 14; [MemRef_A[0] -> [i1{{\]\]}} -> Undef[] : i1 <= 13 } + Unknown +; CHECK-NEXT: Written : { [MemRef_A[0] -> [13{{\]\]}} -> [Stmt_reduction_for[4] -> Val_phi[{{\]\]}} } +; CHECK: Mapped scalars { +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: { [MemRef_A[0] -> [i1{{\]\]}} -> [Stmt_reduction_for[4] -> Val_phi[{{\]\]}} : i1 >= 14; [MemRef_A[0] -> [i1{{\]\]}} -> Undef[] : i1 <= 13 } + Unknown +; CHECK-NEXT: Written : { [MemRef_A[0] -> [13{{\]\]}} -> [Stmt_reduction_for[4] -> Val_phi[{{\]\]}} } +; CHECK: After Statements { +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0] -> MemRef_phi[] }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0] -> MemRef_add[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0] -> MemRef_phi[] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0] -> MemRef_add[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[] -> MemRef_A[0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[] -> MemRef_phi[] }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_embedded.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_embedded.ll @@ -0,0 +1,137 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A { +; for (int j = 0; j < 1; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.for + +outer.for: + %j = phi i32 [0, %entry], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.for, label %outer.exit + + + + reduction.for: + %i = phi i32 [0, %outer.for], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %outer.for], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + +; Unrolled flattened schedule: +; [0] Stmt_outer_for[0] +; [1] Stmt_reduction_for[0, 0] +; [2] Stmt_body[0, 0] +; [3] Stmt_reduction_inc[0, 0] +; [4] Stmt_reduction_for[0, 1] +; [5] Stmt_body[0, 1] +; [6] Stmt_reduction_inc[0, 1] +; [7] Stmt_reduction_for[0, 2] +; [8] Stmt_body[0, 2] +; [9] Stmt_reduction_inc[0, 2] +; [10] Stmt_reduction_for[0, 3] +; [11] Stmt_body[0, 3] +; [12] Stmt_reduction_inc[0, 3] +; [13] Stmt_reduction_for[0, 4] +; [14] Stmt_reduction_exit[0] +; [15] Stmt_outer_for[1] +; [16] Stmt_reduction_for[1, 0] +; [17] Stmt_body[1, 0] +; [18] Stmt_reduction_inc[1, 0] +; [19] Stmt_reduction_for[1, 1] +; [20] Stmt_body[1, 1] +; [21] Stmt_reduction_inc[1, 1] +; [22] Stmt_reduction_for[1, 2] +; [23] Stmt_body[1, 2] +; [24] Stmt_reduction_inc[1, 2] +; [25] Stmt_reduction_for[1, 3] +; [26] Stmt_body[1, 3] +; [27] Stmt_reduction_inc[1, 3] +; [28] Stmt_reduction_for[1, 4] +; [29] Stmt_reduction_exit[1] +; [30] Stmt_outer_for[2] + +; CHECK: Schedule after flattening { +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> [1 + 15i0 + 3i1] } +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> [14 + 15i0] } +; CHECK-NEXT: { Stmt_body[i0, i1] -> [2 + 15i0 + 3i1] } +; CHECK-NEXT: { Stmt_outer_for[i0] -> [15i0] } +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> [3 + 15i0 + 3i1] } +; CHECK-NEXT: } +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: { [MemRef_A[i0] -> [i1{{\]\]}} -> Undef[] : 0 <= i0 <= 1 and i1 <= 14 + 15i0; [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, 4] -> Val_phi[{{\]\]}} : 0 <= i0 <= 1 and i1 >= 15 + 15i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_A[i0] -> [14 + 15i0{{\]\]}} -> [Stmt_reduction_for[i0, 4] -> Val_phi[{{\]\]}} : 0 <= i0 <= 1 } +; CHECK: Mapped scalars { +; CHECK-NEXT: Scalar access Stmt_reduction_for MK_Value Define MemRef_phi as %phi [new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 }]: +; CHECK-NEXT: Accesses: 3 +; CHECK-NEXT: Target: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 } +; CHECK-NEXT: Lifetime: { Stmt_reduction_for[i0, i1] -> [2 + 15i0 + 3i1] : 0 <= i0 <= 1 and 0 <= i1 <= 3; Stmt_reduction_for[1, 4] -> [29]; Stmt_reduction_for[0, 4] -> [14] } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: { [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 - 15i0 + i1 and 0 <= i0 <= 1 and 2 + 15i0 <= i1 <= 11 + 15i0; [MemRef_A[1] -> [29{{\]\]}} -> [Stmt_reduction_for[1, 4] -> Val_phi[{{\]\]}}; [MemRef_A[0] -> [14{{\]\]}} -> [Stmt_reduction_for[0, 4] -> Val_phi[{{\]\]}} } + Undef +; CHECK-NEXT: Written : { [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 - 15i0 + i1 and 0 <= i0 <= 1 and 15i0 < i1 <= 13 + 15i0 } +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: { [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 - 15i0 + i1 and 0 <= i0 <= 1 and 2 + 15i0 <= i1 <= 11 + 15i0; [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, 4] -> Val_phi[{{\]\]}} : 0 <= i0 <= 1 and i1 >= 15 + 15i0; [MemRef_A[1] -> [29{{\]\]}} -> [Stmt_reduction_for[1, 4] -> Val_phi[{{\]\]}}; [MemRef_A[0] -> [14{{\]\]}} -> [Stmt_reduction_for[0, 4] -> Val_phi[{{\]\]}}; [MemRef_A[i0] -> [i1{{\]\]}} -> Undef[] : 0 <= i0 <= 1 and ((i1 <= 13 and 3*floor((-2 + i1)/3) <= -3 + i1) or (3*floor((-2 + i1)/3) = -2 + i1 and i1 <= 1 + 15i0)); [MemRef_A[1] -> [i1{{\]\]}} -> Undef[] : 14 <= i1 <= 28 and 3*floor((-2 + i1)/3) <= -3 + i1 } + Unknown +; CHECK-NEXT: Written : { [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 - 15i0 + i1 and 0 <= i0 <= 1 and 15i0 < i1 <= 13 + 15i0; [MemRef_A[i0] -> [14 + 15i0{{\]\]}} -> [Stmt_reduction_for[i0, 4] -> Val_phi[{{\]\]}} : 0 <= i0 <= 1 } +; CHECK: After Statements { +; CHECK-NEXT: Stmt_outer_for +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_outer_for[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and i1 >= 0 and -5i0 <= i1 <= 8 - 5i0 and i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/DeLICM/tsc.ll =================================================================== --- /dev/null +++ test/DeLICM/tsc.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s | FileCheck %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Benchmarks/TSVC/ControlFlow-flt/tsc.c + +source_filename = "bugpoint-output-6a47ed9.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%structA = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float], [5 x i32], [12 x i8], [32000 x float], [7 x i32], [4 x i8], [32000 x float], [11 x i32], [4 x i8], [32000 x float], [13 x i32], [12 x i8], [256 x [256 x float]], [17 x i32], [12 x i8], [256 x [256 x float]], [19 x i32], [4 x i8], [256 x [256 x float]], [23 x i32], [4 x i8], [256 x [256 x float]] } + +@global_data = external global %structA, align 16 + +; Function Attrs: nounwind uwtable +define void @init() { +entry: + br label %for.body28.i943 + +for.body28.i943: ; preds = %for.body28.i943, %entry + %indvars.iv1038 = phi i64 [ %indvars.iv.next1039.9, %for.body28.i943 ], [ 0, %entry ] + %indvars.iv.next1039.3 = add nsw i64 %indvars.iv1038, 4 + %arrayidx30.i941.4 = getelementptr inbounds %structA, %structA* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.3 + store float 1.000000e+00, float* %arrayidx30.i941.4, align 8 + %indvars.iv.next1039.4 = add nsw i64 %indvars.iv1038, 5 + %arrayidx30.i941.5 = getelementptr inbounds %structA, %structA* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.4 + store float 1.000000e+00, float* %arrayidx30.i941.5, align 4 + %indvars.iv.next1039.5 = add nsw i64 %indvars.iv1038, 6 + %arrayidx30.i941.6 = getelementptr inbounds %structA, %structA* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.5 + store float 1.000000e+00, float* %arrayidx30.i941.6, align 8 + %indvars.iv.next1039.6 = add nsw i64 %indvars.iv1038, 7 + %arrayidx30.i941.7 = getelementptr inbounds %structA, %structA* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.6 + store float 1.000000e+00, float* %arrayidx30.i941.7, align 4 + %indvars.iv.next1039.7 = add nsw i64 %indvars.iv1038, 8 + %arrayidx30.i941.8 = getelementptr inbounds %structA, %structA* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.7 + store float 1.000000e+00, float* %arrayidx30.i941.8, align 8 + %indvars.iv.next1039.8 = add nsw i64 %indvars.iv1038, 9 + %arrayidx30.i941.9 = getelementptr inbounds %structA, %structA* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.8 + + store float 1.000000e+00, float* %arrayidx30.i941.9, align 4 + %indvars.iv.next1039.9 = add nsw i64 %indvars.iv1038, 10 + %exitcond1040.9 = icmp eq i64 %indvars.iv.next1039.9, 32000 + br i1 %exitcond1040.9, label %for.body.i935, label %for.body28.i943 + +for.body.i935: ; preds = %for.body28.i943 + %arrayidx.i934 = getelementptr inbounds %structA, %structA* @global_data, i64 0, i32 3, i64 0 + store float undef, float* %arrayidx.i934, align 8 + store float -1.000000e+00, float* getelementptr inbounds (%structA, %structA* @global_data, i64 0, i32 0, i64 31999), align 4 + br label %if.end1110 + +if.end1110: ; preds = %for.body.i935 + ret void +} + + +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: { [MemRef_global_data[31999] -> [i1, i2{{\]\]}} -> Val_1_000000e_00[] : i1 >= 2; [MemRef_global_data[31999] -> [1, i2{{\]\]}} -> Val_1_000000e_00[] : i2 > 0; [MemRef_global_data[i0] -> [i1, i2{{\]\]}} -> Undef[] : 0 <= i0 <= 31999 and i1 < 0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[32004] -> [i1, i2{{\]\]}} -> Undef[]; [MemRef_global_data[31999] -> [i1, i2{{\]\]}} -> Undef[] : i1 <= 0; [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Undef[] : 0 <= i0 <= 31999 and 10i2 <= i0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[31999] -> [1, i2{{\]\]}} -> Undef[] : i2 <= 0; [MemRef_global_data[i0] -> [i1, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 31999 and i1 > 0 and 9*floor((i0)/10) >= -3207 + i0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 31999 and 10i2 > i0 and 9*floor((i0)/10) >= -3207 + i0 and 10*floor((i0)/10) <= -4 + i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i2 <= 3199 and -9 + i0 <= 10i2 <= -4 + i0; [MemRef_global_data[31999] -> [1, 0{{\]\]}} -> Val_1_000000e_00[]; [MemRef_global_data[32004] -> [1, 0{{\]\]}} -> Undef[] } +; CHECK: Mapped scalars { +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: { [MemRef_global_data[31999] -> [i1, i2{{\]\]}} -> Val_1_000000e_00[] : i1 >= 2; [MemRef_global_data[31999] -> [1, i2{{\]\]}} -> Val_1_000000e_00[] : i2 > 0; [MemRef_global_data[i0] -> [i1, i2{{\]\]}} -> Undef[] : 0 <= i0 <= 31999 and i1 < 0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[32004] -> [i1, i2{{\]\]}} -> Undef[]; [MemRef_global_data[31999] -> [i1, i2{{\]\]}} -> Undef[] : i1 <= 0; [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Undef[] : 0 <= i0 <= 31999 and 10i2 <= i0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[31999] -> [1, i2{{\]\]}} -> Undef[] : i2 <= 0; [MemRef_global_data[i0] -> [i1, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 31999 and i1 > 0 and 9*floor((i0)/10) >= -3207 + i0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 31999 and 10i2 > i0 and 9*floor((i0)/10) >= -3207 + i0 and 10*floor((i0)/10) <= -4 + i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i2 <= 3199 and -9 + i0 <= 10i2 <= -4 + i0; [MemRef_global_data[31999] -> [1, 0{{\]\]}} -> Val_1_000000e_00[]; [MemRef_global_data[32004] -> [1, 0{{\]\]}} -> Undef[] } +; CHECK: After Statements { +; CHECK-NEXT: Stmt_for_body28_i943 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body28_i943[i0] -> MemRef_global_data[4 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body28_i943[i0] -> MemRef_global_data[5 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body28_i943[i0] -> MemRef_global_data[6 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body28_i943[i0] -> MemRef_global_data[7 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body28_i943[i0] -> MemRef_global_data[8 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body28_i943[i0] -> MemRef_global_data[9 + 10i0] }; +; CHECK-NEXT: Stmt_for_body_i935 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body_i935[] -> MemRef_global_data[32004] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body_i935[] -> MemRef_global_data[31999] }; +; CHECK-NEXT: } Index: test/DeLICM/tsc0.ll =================================================================== --- /dev/null +++ test/DeLICM/tsc0.ll @@ -0,0 +1,87 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s | FileCheck %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Benchmarks/TSVC/ControlFlow-dbl/tsc.c + +; ModuleID = '/tmp/bugpoint-ksc80en4/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-7eb0c3b.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151 = type { [32000 x double], [3 x i32], [20 x i8], [32000 x double], [5 x i32], [12 x i8], [32000 x double], [7 x i32], [32000 x double], [11 x i32], [20 x i8], [32000 x double], [13 x i32], [12 x i8], [256 x [256 x double]], [17 x i32], [28 x i8], [256 x [256 x double]], [19 x i32], [20 x i8], [256 x [256 x double]], [23 x i32], [256 x [256 x double]] } + +@global_data = external global %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, align 32 + +; Function Attrs: nounwind uwtable +define void @init() local_unnamed_addr #0 { +entry: + br label %for.body24.i944 + +for.body24.i944: ; preds = %for.body24.i944, %entry + %indvars.iv1038 = phi i64 [ %indvars.iv.next1039.9, %for.body24.i944 ], [ 0, %entry ] + %indvars.iv.next1039.3 = add nsw i64 %indvars.iv1038, 4 + %arrayidx26.i942.4 = getelementptr inbounds %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.3 + store double 1.000000e+00, double* %arrayidx26.i942.4, align 16 + %indvars.iv.next1039.4 = add nsw i64 %indvars.iv1038, 5 + %arrayidx26.i942.5 = getelementptr inbounds %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.4 + store double 1.000000e+00, double* %arrayidx26.i942.5, align 8 + %indvars.iv.next1039.5 = add nsw i64 %indvars.iv1038, 6 + %arrayidx26.i942.6 = getelementptr inbounds %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.5 + store double 1.000000e+00, double* %arrayidx26.i942.6, align 16 + %indvars.iv.next1039.6 = add nsw i64 %indvars.iv1038, 7 + %arrayidx26.i942.7 = getelementptr inbounds %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.6 + store double 1.000000e+00, double* %arrayidx26.i942.7, align 8 + %indvars.iv.next1039.7 = add nsw i64 %indvars.iv1038, 8 + %arrayidx26.i942.8 = getelementptr inbounds %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.7 + store double 1.000000e+00, double* %arrayidx26.i942.8, align 16 + %indvars.iv.next1039.8 = add nsw i64 %indvars.iv1038, 9 + %arrayidx26.i942.9 = getelementptr inbounds %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151* @global_data, i64 0, i32 0, i64 %indvars.iv.next1039.8 + store double 1.000000e+00, double* %arrayidx26.i942.9, align 8 + %indvars.iv.next1039.9 = add nsw i64 %indvars.iv1038, 10 + %exitcond1040.9 = icmp eq i64 %indvars.iv.next1039.9, 32000 + br i1 %exitcond1040.9, label %for.body.i936, label %for.body24.i944 + +for.body.i936: ; preds = %for.body24.i944 + %arrayidx.i935 = getelementptr inbounds %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151* @global_data, i64 0, i32 3, i64 0 + store double undef, double* %arrayidx.i935, align 16 + store double -1.000000e+00, double* getelementptr inbounds (%struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151, %struct.GlobalData.0.9.20.29.42.59.74.83.94.103.116.133.150.159.172.189.208.209.210.211.212.213.214.215.217.218.221.222.223.224.225.226.227.228.230.231.232.233.234.235.237.238.239.240.241.242.243.244.245.247.248.249.250.252.253.254.255.256.257.260.261.263.264.265.266.267.269.270.271.272.273.274.280.281.282.283.284.285.286.287.289.290.292.293.294.295.296.297.298.299.300.301.305.307.308.310.311.312.313.315.316.317.318.320.323.324.326.327.328.329.331.332.333.334.335.337.338.339.340.341.343.344.346.347.348.349.351.353.354.355.356.357.360.363.364.366.367.368.369.370.371.373.374.375.376.377.378.379.380.381.382.384.385.386.387.389.390.392.393.394.395.396.397.398.399.402.403.404.405.406.407.408.411.413.414.415.416.418.421.423.424.427.428.429.431.432.433.434.435.439.440.441.442.443.444.445.446.447.448.449.452.453.454.455.457.459.460.463.464.465.466.468.472.473.475.476.477.478.485.488.489.494.495.499.500.503.505.507.508.510.511.513.515.516.517.520.522.523.524.525.527.530.531.532.533.536.537.540.542.543.544.545.546.547.548.549.550.551.552.554.555.557.558.559.560.562.563.564.565.568.569.570.573.574.575.576.582.583.584.586.587.590.591.593.594.596.597.601.605.607.608.609.613.614.615.616.619.622.623.626.628.631.632.633.636.637.638.639.642.643.644.647.654.655.656.659.662.663.669.671.674.677.679.681.682.683.684.686.690.691.693.698.702.703.704.705.706.710.718.723.729.732.736.738.739.740.742.743.744.747.751.754.762.763.765.768.770.776.779.784.792.796.805.809.813.815.825.828.829.840.841.859.860.867.881.883.886.887.891.906.915.928.932.934.935.954.962.971.972.979.981.982.992.1008.1012.1033.1084.1086.1087.1089.1091.1092.1093.1094.1095.1151* @global_data, i64 0, i32 0, i64 31999), align 8 + br label %if.end1110 + +if.end1110: ; preds = %for.body.i936 + ret void +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 278052) (llvm/trunk 278053)"} + + +; CHECK: Original zone: +; CHECK-NEXT: Lifetime: { [MemRef_global_data[31999] -> [i1, i2{{\]\]}} -> Val_1_000000e_00[] : i1 >= 2; [MemRef_global_data[31999] -> [1, i2{{\]\]}} -> Val_1_000000e_00[] : i2 > 0; [MemRef_global_data[i0] -> [i1, i2{{\]\]}} -> Undef[] : 0 <= i0 <= 31999 and i1 < 0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[32004] -> [i1, i2{{\]\]}} -> Undef[]; [MemRef_global_data[31999] -> [i1, i2{{\]\]}} -> Undef[] : i1 <= 0; [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Undef[] : 0 <= i0 <= 31999 and 10i2 <= i0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[31999] -> [1, i2{{\]\]}} -> Undef[] : i2 <= 0; [MemRef_global_data[i0] -> [i1, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 31999 and i1 > 0 and 9*floor((i0)/10) >= -3207 + i0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 31999 and 10i2 > i0 and 9*floor((i0)/10) >= -3207 + i0 and 10*floor((i0)/10) <= -4 + i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i2 <= 3199 and -9 + i0 <= 10i2 <= -4 + i0; [MemRef_global_data[31999] -> [1, 0{{\]\]}} -> Val_1_000000e_00[]; [MemRef_global_data[32004] -> [1, 0{{\]\]}} -> Undef[] } +; CHECK: Mapped scalars { +; CHECK-NEXT: } +; CHECK: After zone: +; CHECK-NEXT: Lifetime: { [MemRef_global_data[31999] -> [i1, i2{{\]\]}} -> Val_1_000000e_00[] : i1 >= 2; [MemRef_global_data[31999] -> [1, i2{{\]\]}} -> Val_1_000000e_00[] : i2 > 0; [MemRef_global_data[i0] -> [i1, i2{{\]\]}} -> Undef[] : 0 <= i0 <= 31999 and i1 < 0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[32004] -> [i1, i2{{\]\]}} -> Undef[]; [MemRef_global_data[31999] -> [i1, i2{{\]\]}} -> Undef[] : i1 <= 0; [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Undef[] : 0 <= i0 <= 31999 and 10i2 <= i0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[31999] -> [1, i2{{\]\]}} -> Undef[] : i2 <= 0; [MemRef_global_data[i0] -> [i1, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 31999 and i1 > 0 and 9*floor((i0)/10) >= -3207 + i0 and 10*floor((i0)/10) <= -4 + i0; [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 31999 and 10i2 > i0 and 9*floor((i0)/10) >= -3207 + i0 and 10*floor((i0)/10) <= -4 + i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_global_data[i0] -> [0, i2{{\]\]}} -> Val__000000e_00[] : 0 <= i2 <= 3199 and -9 + i0 <= 10i2 <= -4 + i0; [MemRef_global_data[31999] -> [1, 0{{\]\]}} -> Val_1_000000e_00[]; [MemRef_global_data[32004] -> [1, 0{{\]\]}} -> Undef[] } +; CHECK: After Statements { +; CHECK-NEXT: Stmt_for_body24_i944 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24_i944[i0] -> MemRef_global_data[4 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24_i944[i0] -> MemRef_global_data[5 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24_i944[i0] -> MemRef_global_data[6 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24_i944[i0] -> MemRef_global_data[7 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24_i944[i0] -> MemRef_global_data[8 + 10i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24_i944[i0] -> MemRef_global_data[9 + 10i0] }; +; CHECK-NEXT: Stmt_for_body_i936 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body_i936[] -> MemRef_global_data[32004] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body_i936[] -> MemRef_global_data[31999] }; +; CHECK-NEXT: } Index: test/ScopInfo/NonAffine/non_affine_loop_used_later.ll =================================================================== --- test/ScopInfo/NonAffine/non_affine_loop_used_later.ll +++ test/ScopInfo/NonAffine/non_affine_loop_used_later.ll @@ -86,10 +86,6 @@ ; CHECK-NEXT: [N] -> { Stmt_bb23[i0] -> MemRef_j_0__phi[] }; ; CHECK-NEXT: } ; -; Due to the scalar accesses we are not able to distribute the outer loop, thus we do not consider the region profitable. -; -; PROFIT-NOT: Statements -; ; void f(int *A, int N, int M) { ; int i = 0, j = 0; ; for (i = 0; i < N; i++) { Index: test/ScopInfo/licm_load.ll =================================================================== --- test/ScopInfo/licm_load.ll +++ /dev/null @@ -1,54 +0,0 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare \ -; RUN: -polly-invariant-load-hoisting=true -polly-scops -analyze < %s \ -; RUN: | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare \ -; RUN: -polly-invariant-load-hoisting=true -polly-scops -analyze < %s \ -; RUN: | FileCheck %s -; -; void foo(int n, float A[static const restrict n], -; float B[static const restrict n], int j) { -; for (int i = 0; i < n; i++) -; A[i] = B[j]; -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(i32 %n, float* noalias nonnull %A, float* noalias nonnull %B, i32 %j) { -entry: - %tmp = sext i32 %n to i64 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] - %cmp = icmp slt i64 %indvars.iv, %tmp - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %idxprom = sext i32 %j to i64 - %arrayidx = getelementptr inbounds float, float* %B, i64 %idxprom - %tmp1 = bitcast float* %arrayidx to i32* - %tmp2 = load i32, i32* %tmp1, align 4 - %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv - %tmp3 = bitcast float* %arrayidx2 to i32* - store i32 %tmp2, i32* %tmp3, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; CHECK: Invariant Accesses: { -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [n, j] -> { Stmt_{{[a-zA-Z_]*}}[{{[i0]*}}] -> MemRef_B[j] }; -; CHECK-NEXT: Execution Context: [n, j] -> { : n > 0 } -; CHECK-NEXT: } -; -; CHECK: Statements { -; CHECK: Stmt_for_body -; CHECK-DAG: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [n, j] -> { Stmt_for_body[i0] -> MemRef_A[i0] }; -; CHECK: } Index: test/ScopInfo/licm_potential_store.ll =================================================================== --- test/ScopInfo/licm_potential_store.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: opt %loadPolly -basicaa -sroa -instcombine -simplifycfg -tailcallopt \ -; RUN: -simplifycfg -reassociate -loop-rotate -instcombine -indvars \ -; RUN: -polly-prepare -polly-scops -analyze < %s \ -; RUN: \ -; RUN: | FileCheck %s --check-prefix=NOLICM - -; RUN: opt %loadPolly -basicaa -sroa -instcombine -simplifycfg -tailcallopt \ -; RUN: -simplifycfg -reassociate -loop-rotate -instcombine -indvars -licm \ -; RUN: -polly-prepare -polly-scops -analyze < %s \ -; RUN: \ -; RUN: | FileCheck %s --check-prefix=LICM - -; void foo(int n, float A[static const restrict n], float x) { -; // (0) -; for (int i = 0; i < 5; i += 1) { -; for (int j = 0; j < n; j += 1) { -; x = 7; // (1) -; } -; A[0] = x; // (3) -; } -; // (4) -; } - -; LICM: Statements -; NOLICM: Statements - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(i32 %n, float* noalias nonnull %A, float %x) { -entry: - %n.addr = alloca i32, align 4 - %A.addr = alloca float*, align 8 - %x.addr = alloca float, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - store i32 %n, i32* %n.addr, align 4 - store float* %A, float** %A.addr, align 8 - store float %x, float* %x.addr, align 4 - %tmp = load i32, i32* %n.addr, align 4 - %tmp1 = zext i32 %tmp to i64 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc.4, %entry - %tmp2 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %tmp2, 5 - br i1 %cmp, label %for.body, label %for.end.6 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond.1 - -for.cond.1: ; preds = %for.inc, %for.body - %tmp3 = load i32, i32* %j, align 4 - %tmp4 = load i32, i32* %n.addr, align 4 - %cmp2 = icmp slt i32 %tmp3, %tmp4 - br i1 %cmp2, label %for.body.3, label %for.end - -for.body.3: ; preds = %for.cond.1 - store float 7.000000e+00, float* %x.addr, align 4 - br label %for.inc - -for.inc: ; preds = %for.body.3 - %tmp5 = load i32, i32* %j, align 4 - %add = add nsw i32 %tmp5, 1 - store i32 %add, i32* %j, align 4 - br label %for.cond.1 - -for.end: ; preds = %for.cond.1 - %tmp6 = load float, float* %x.addr, align 4 - %tmp7 = load float*, float** %A.addr, align 8 - %arrayidx = getelementptr inbounds float, float* %tmp7, i64 0 - store float %tmp6, float* %arrayidx, align 4 - br label %for.inc.4 - -for.inc.4: ; preds = %for.end - %tmp8 = load i32, i32* %i, align 4 - %add5 = add nsw i32 %tmp8, 1 - store i32 %add5, i32* %i, align 4 - br label %for.cond - -for.end.6: ; preds = %for.cond - ret void -} - -; CHECK: Statements { -; CHECK: Stmt_for_end -; CHECK: } Index: test/ScopInfo/licm_reduction.ll =================================================================== --- test/ScopInfo/licm_reduction.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; -; XFAIL: * -; -; void test(int n, double B[static const restrict n], int j) { -; for (int i = 0; i < n; i += 1) { -; B[j] += i; -; } -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @test(i32 %n, double* noalias nonnull %B, i32 %j) { -entry: - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %i.0 = phi i32 [ 0, %entry ], [ %add1, %for.inc ] - %cmp = icmp slt i32 %i.0, %n - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %conv = sitofp i32 %i.0 to double - %idxprom = sext i32 %j to i64 - %arrayidx = getelementptr inbounds double, double* %B, i64 %idxprom - %tmp = load double, double* %arrayidx, align 8 - %add = fadd double %tmp, %conv - store double %add, double* %arrayidx, align 8 - br label %for.inc - -for.inc: ; preds = %for.body - %add1 = add nuw nsw i32 %i.0, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - - -; CHECK: Statements { -; CHECK: Stmt_for_body -; CHECK-DAG: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [n, j] -> { Stmt_for_body[i0] -> MemRef_B[j] }; -; CHECK-DAG: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [n, j] -> { Stmt_for_body[i0] -> MemRef_B[j] }; -; CHECK: } Index: test/ScopInfo/licm_store.ll =================================================================== --- test/ScopInfo/licm_store.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; -; XFAIL: * -; -; void foo(float *restrict A, float *restrict B, long j) { -; for (long i = 0; i < 100; i++) -; A[j] = B[i]; -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(float* noalias %A, float* noalias %B, i64 %j) { -entry: - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] - %exitcond = icmp ne i64 %i.0, 100 - br i1 %exitcond, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %arrayidx = getelementptr inbounds float, float* %B, i64 %i.0 - %tmp = bitcast float* %arrayidx to i32* - %tmp1 = load i32, i32* %tmp, align 4 - %arrayidx1 = getelementptr inbounds float, float* %A, i64 %j - %tmp2 = bitcast float* %arrayidx1 to i32* - store i32 %tmp1, i32* %tmp2, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %inc = add nuw nsw i64 %i.0, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; CHECK: Statements { -; CHECK: Stmt_for_body -; CHECK-DAG: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body[i0] -> MemRef_B[i0] }; -; CHECK-DAG: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body[i0] -> MemRef_A[j] }; -; CHECK: } Index: test/compute.ll =================================================================== --- /dev/null +++ test/compute.ll @@ -0,0 +1,40 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Benchmarks/Olden/power/compute.c + +; ModuleID = '/tmp/bugpoint-ms_vln77/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-a8d50b5.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @make_orthogonal(double* %v_mod) #0 { +entry: + br label %for.body + +for.cond3.preheader: ; preds = %for.body + br label %for.body5 + +for.body: ; preds = %for.body, %entry + %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.body ], [ 0, %entry ] + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + br i1 false, label %for.body, label %for.cond3.preheader + +for.body5: ; preds = %for.body5, %for.cond3.preheader + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.body5 ], [ 0, %for.cond3.preheader ] + %length.03 = phi double [ 0.000000e+00, %for.cond3.preheader ], [ undef, %for.body5 ] + %arrayidx10 = getelementptr inbounds double, double* %v_mod, i64 %indvars.iv1 + store double undef, double* %arrayidx10, align 8 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + %exitcond3 = icmp ne i64 %indvars.iv.next2, 2 + br i1 %exitcond3, label %for.body5, label %for.end19 + +for.end19: ; preds = %for.body5 + unreachable +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/correlation.ll =================================================================== --- /dev/null +++ test/correlation.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Polybench/datamining/correlation/correlation.c + +; ModuleID = '/tmp/bugpoint-xo0jvlru/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-62fa5d1.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @kernel_correlation(i32 %m, i32 %n, double* %mean) #0 { +entry: + br label %for.body + +for.cond.for.cond15.preheader_crit_edge: ; preds = %for.end + unreachable + +for.body: ; preds = %for.end, %entry + %indvars.iv31 = phi i64 [ %indvars.iv.next32, %for.end ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double, double* %mean, i64 %indvars.iv31 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body + %indvars.iv27 = phi i64 [ %indvars.iv.next28, %for.body3 ], [ 0, %for.body ] + %0 = phi double [ 0.000000e+00, %for.body ], [ undef, %for.body3 ] + store double undef, double* %arrayidx, align 8 + %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1 + %wide.trip.count29 = zext i32 %n to i64 + %exitcond30 = icmp ne i64 %indvars.iv.next28, %wide.trip.count29 + br i1 %exitcond30, label %for.body3, label %for.cond1.for.end_crit_edge + +for.cond1.for.end_crit_edge: ; preds = %for.body3 + br label %for.end + +for.end: ; preds = %for.cond1.for.end_crit_edge + %indvars.iv.next32 = add nuw nsw i64 %indvars.iv31, 1 + %wide.trip.count33 = zext i32 %m to i64 + %exitcond34 = icmp ne i64 %indvars.iv.next32, %wide.trip.count33 + br i1 %exitcond34, label %for.body, label %for.cond.for.cond15.preheader_crit_edge +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/fasta.ll =================================================================== --- /dev/null +++ test/fasta.ll @@ -0,0 +1,43 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/Large/fasta.c + +; ModuleID = '/tmp/bugpoint-rup_b6vx/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-59d46bf.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.aminoacid_t.0.3.6.15.30.36.42.54.105.126 = type { float, i8 } + +; Function Attrs: inlinehint nounwind uwtable +define void @accumulate_probabilities(%struct.aminoacid_t.0.3.6.15.30.36.42.54.105.126* %genelist, i64 %len) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br i1 false, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry.split + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %i.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %cp.02 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ undef, %for.body ] + %p = getelementptr inbounds %struct.aminoacid_t.0.3.6.15.30.36.42.54.105.126, %struct.aminoacid_t.0.3.6.15.30.36.42.54.105.126* %genelist, i64 %i.03, i32 0 + store float undef, float* %p, align 4 + %inc = add nuw i64 %i.03, 1 + %exitcond = icmp ne i64 %inc, %len + br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + ret void +} + +attributes #0 = { inlinehint nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/gcc-loops.ll =================================================================== --- /dev/null +++ test/gcc-loops.ll @@ -0,0 +1,40 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/UnitTests/Vectorizer/gcc-loops.cpp + +; ModuleID = '/tmp/bugpoint-kr6hts1h/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-b1774e2.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @_Z17init_memory_floatPfS_(float* %start, float* %end) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br i1 false, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry.split + br label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + %state.03 = phi float [ 1.000000e+00, %while.body.lr.ph ], [ undef, %while.body ] + %start.addr.02 = phi float* [ %start, %while.body.lr.ph ], [ %incdec.ptr, %while.body ] + store float undef, float* %start.addr.02, align 4 + %incdec.ptr = getelementptr inbounds float, float* %start.addr.02, i64 1 + %cmp = icmp eq float* %incdec.ptr, %end + br i1 %cmp, label %while.cond.while.end_crit_edge, label %while.body + +while.cond.while.end_crit_edge: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.cond.while.end_crit_edge, %entry.split + ret void +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/libclamav_autoit.ll =================================================================== --- /dev/null +++ test/libclamav_autoit.ll @@ -0,0 +1,49 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Applications/ClamAV/libclamav_autoit.c + +; ModuleID = '/tmp/bugpoint-ja1nfpyv/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-35be842.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.MT.7.17.27.54.81.126.135.144.153.180.207.279.288.297.306.495.596 = type { [624 x i32], i32, i32* } + +; Function Attrs: nounwind uwtable +define void @MT_getnext(%struct.MT.7.17.27.54.81.126.135.144.153.180.207.279.288.297.306.495.596* %MT) #0 { +entry: + br label %for.body + +for.cond20.preheader: ; preds = %for.body + %0 = trunc i64 227 to i32 + %1 = zext i32 %0 to i64 + br label %for.body22 + +for.body: ; preds = %for.body, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.body ], [ 0, %entry ] + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br i1 false, label %for.body, label %for.cond20.preheader + +for.body22: ; preds = %for.body22, %for.cond20.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body22 ], [ %1, %for.cond20.preheader ] + %2 = phi i32 [ undef, %for.cond20.preheader ], [ %5, %for.body22 ] + %arrayidx24 = getelementptr inbounds %struct.MT.7.17.27.54.81.126.135.144.153.180.207.279.288.297.306.495.596, %struct.MT.7.17.27.54.81.126.135.144.153.180.207.279.288.297.306.495.596* %MT, i64 0, i32 0, i64 %indvars.iv + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + store i32 undef, i32* %arrayidx24, align 4 + %3 = trunc i64 %indvars.iv to i32 + %sub41 = add i32 %3, -226 + %idxprom42 = zext i32 %sub41 to i64 + %4 = getelementptr inbounds %struct.MT.7.17.27.54.81.126.135.144.153.180.207.279.288.297.306.495.596, %struct.MT.7.17.27.54.81.126.135.144.153.180.207.279.288.297.306.495.596* %MT, i64 0, i32 0, i64 %idxprom42 + %5 = load i32, i32* %4, align 4 + %exitcond = icmp ne i64 %indvars.iv.next, 623 + br i1 %exitcond, label %for.body22, label %for.cond20.for.end49_crit_edge + +for.cond20.for.end49_crit_edge: ; preds = %for.body22 + ret void +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/oggenc.ll =================================================================== --- /dev/null +++ test/oggenc.ll @@ -0,0 +1,40 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/MultiSource/Applications/oggenc/oggenc.c + +; ModuleID = '/tmp/bugpoint-m4umgwu_/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-7ffe896.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @local_book_besterror() #0 { +entry: + %0 = load i64, i64* undef, align 8 + %conv = trunc i64 %0 to i32 + br label %for.body64 + +for.body64: ; preds = %if.end92, %entry + %bestf.011 = phi float [ 0.000000e+00, %entry ], [ %this.0.bestf.0, %if.end92 ] + br label %for.body74 + +for.body74: ; preds = %for.body74, %for.body64 + br i1 false, label %for.body74, label %for.cond71.for.end85_crit_edge + +for.cond71.for.end85_crit_edge: ; preds = %for.body74 + %cmp88 = fcmp olt float undef, %bestf.011 + %this.0.bestf.0 = select i1 undef, float undef, float %bestf.011 + br label %if.end92 + +if.end92: ; preds = %for.cond71.for.end85_crit_edge + br i1 undef, label %for.body64, label %for.cond60.if.end96.loopexit_crit_edge + +for.cond60.if.end96.loopexit_crit_edge: ; preds = %if.end92 + ret void +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/oopack_v1p8.ll =================================================================== --- /dev/null +++ test/oopack_v1p8.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Misc-C++/oopack_v1p8.cpp + +; ModuleID = '/tmp/bugpoint-lfqv27dw/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-327cbd4.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@E = external global [2500 x double], align 16 + +; Function Attrs: nounwind uwtable +define void @_ZNK15MatrixBenchmark7c_styleEv() unnamed_addr #0 align 2 { +entry: + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.end, %entry + %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.end ], [ 0, %entry ] + br label %for.body7 + +for.body7: ; preds = %for.body7, %for.cond5.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body7 ], [ 0, %for.cond5.preheader ] + %sum.01 = phi double [ 0.000000e+00, %for.cond5.preheader ], [ undef, %for.body7 ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 50 + br i1 %exitcond, label %for.body7, label %for.end + +for.end: ; preds = %for.body7 + %add13.lcssa = phi double [ undef, %for.body7 ] + %0 = add nuw nsw i64 %indvars.iv4, 0 + %arrayidx17 = getelementptr inbounds [2500 x double], [2500 x double]* @E, i64 0, i64 %0 + store double %add13.lcssa, double* %arrayidx17, align 8 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + %exitcond7 = icmp ne i64 %indvars.iv.next5, 50 + br i1 %exitcond7, label %for.cond5.preheader, label %for.inc21 + +for.inc21: ; preds = %for.end + ret void +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/oourafft.ll =================================================================== --- /dev/null +++ test/oourafft.ll @@ -0,0 +1,43 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Misc/oourafft.c + +; ModuleID = '/tmp/bugpoint-dw4h9zhh/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-7b19655.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @putdata(i32 %nend, double* %a) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br i1 false, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry.split + %0 = sext i32 %nend to i64 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.lr.ph ] + %seed.03 = phi i32 [ 0, %for.body.lr.ph ], [ %rem, %for.body ] + %rem = srem i32 undef, 259200 + %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double undef, double* %arrayidx, align 8 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %cmp = icmp slt i64 %indvars.iv, %0 + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + ret void +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/stepanov_container.ll =================================================================== --- /dev/null +++ test/stepanov_container.ll @@ -0,0 +1,40 @@ +; RUN: opt %loadPolly -basicaa -scoped-noalias -tbaa -polly-delicm -analyze < %s + +; Derived from test-suite/../../../../../mnt/c/Users/Meinersbur/src/llvm/projects/test-suite/SingleSource/Benchmarks/Misc-C++/stepanov_container.cpp + +; ModuleID = '/tmp/bugpoint-h9oaz7fr/bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-f608da4.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @_Z10initializePdS_(double* %first, double* %last) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br i1 false, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry.split + br label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + %value.03 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ undef, %while.body ] + %first.addr.02 = phi double* [ %first, %while.body.lr.ph ], [ %incdec.ptr, %while.body ] + %incdec.ptr = getelementptr inbounds double, double* %first.addr.02, i64 1 + store double %value.03, double* %first.addr.02, align 8 + %cmp = icmp eq double* %incdec.ptr, %last + br i1 %cmp, label %while.cond.while.end_crit_edge, label %while.body + +while.cond.while.end_crit_edge: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.cond.while.end_crit_edge, %entry.split + ret void +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 283725) (llvm/trunk 283723)"} Index: test/update_check.py =================================================================== --- test/update_check.py +++ test/update_check.py @@ -9,6 +9,7 @@ import subprocess import shlex import re +import shutil polly_src_dir = '''@POLLY_SOURCE_DIR@''' @@ -18,9 +19,10 @@ link_polly_into_tools = not '''@LINK_POLLY_INTO_TOOLS@'''.lower() in {'','0','n','no','off','false','notfound','link_polly_into_tools-notfound'} runre = re.compile(r'\s*\;\s*RUN\s*\:(?P.*)') -filecheckre = re.compile(r'\s*(?P.*)\|\s*(?PFileCheck\s[^|]*)') +filecheckre = re.compile(r'\s*(?P.*)\|\s*(?PFileCheck[^|]*)') emptyline = re.compile(r'\s*(\;\s*)?') commentline = re.compile(r'\s*(\;.*)?') +xfailre = re.compile(r'^\s*\;\s*XFAIL\:\s*\*\s*$') def ltrim_emptylines(lines,meta=None): @@ -182,6 +184,44 @@ if line == '}': break line = i.__next__() + elif line.startswith( 'Known zone:'): + yield {'KnownZone'} + elif line == 'Mapped knowns {': + while True: + yield {'MappedKnowns'} + if line == '}': + break + line = i.__next__() + elif line.startswith( 'Original zone:'): + yield {'OriginalZone'} + line = i.__next__() + yield {'OriginalZone','OriginalZoneLifetime','Lifetime'} + line = i.__next__() + yield {'OriginalZone','OriginalZoneWritten','Written'} + elif line.startswith( 'After zone:'): + yield {'AfterZone'} + line = i.__next__() + yield {'AfterZone','AfterZoneLifetime','Lifetime'} + line = i.__next__() + yield {'AfterZone','AfterZoneWritten','Written'} + elif line == 'Mapped scalars {': + while True: + yield {'MappedScalars'} + if line == '}': + break + line = i.__next__() + elif line == 'Cleanups {': + while True: + yield {'Cleanups'} + if line == '}': + break + line = i.__next__() + elif line == 'After Statements {': + while True: + yield {'AfterStatements'} + if line == '}': + break + line = i.__next__() else: yield set() line = i.__next__() @@ -224,6 +264,7 @@ parser.add_argument('--testdir',help="Root dir for unit tests") parser.add_argument('--inplace','-i',action='store_true',help="Replace input file") parser.add_argument('--output','-o',help="Write changed input to this file") + parser.add_argument('--autorule',action='store_true',help="Select rules derived from file path") known = parser.parse_args() if not known.inplace and known.output is None: @@ -235,8 +276,6 @@ outfile = known.output - filecheckparser = argparse.ArgumentParser(add_help=False) - filecheckparser.add_argument('-check-prefix','--check-prefix',default='CHECK') filename = known.testfile for dir in ['.', known.testdir, os.path.join(polly_src_dir,'test'), polly_src_dir]: @@ -250,6 +289,50 @@ if known.inplace: outfile = filename + if known.autorule: + update_autorule(filename,outfile=outfile,known=known) + else: + update_check(filename,outfile=outfile,known=known,CheckInclude=set(known.check_include),CheckLabelInclude=set(known.check_label_include)) + + +def update_autorule(filename,outfile,known): + rempath,filepart = os.path.split(filename) + cat = (filepart,) + while True: + rempath,tail = os.path.split(rempath) + if tail == 'test' or len(tail)==0: + break + cat = (tail,) + cat + + success=True + if len(tail)==0: + success=False + elif cat[:-1] == ('DeLICM',): + update_check_rule(filename, outfile=outfile, known=known, CheckInclude={'ScheduleAfterFlattening','OriginalZone','MappedScalars','Cleanups','AfterZone','AfterStatements'}) + elif cat[:-1] == ('Known',): + update_check_rule(filename, outfile=outfile, known=known, CheckInclude={'ScheduleAfterFlattening','KnownZone','MappedKnowns','AfterStatements'}) + else: + success=False + + if not success: + # No rule for this file, don't change it + if outfile!=filename: + shutil.copy(filename,outfile) + + +def update_check_rule(filename,outfile,known,CheckInclude=set(),CheckLabelInclude=set()): + update_check(filename,outfile,known,CheckInclude=set(known.check_include)|CheckInclude,CheckLabelInclude=set(known.check_label_include)|CheckLabelInclude) + +def unmodified(outfile,oldlines): + with open(outfile, 'w', newline='') as file: + for line in oldlines: + file.write(line) + file.write('\n') + +def update_check(filename,outfile,known,CheckInclude,CheckLabelInclude): + filecheckparser = argparse.ArgumentParser(add_help=False) + filecheckparser.add_argument('-check-prefix','--check-prefix',default='CHECK') + allchecklines = [] checkprefixes = [] @@ -258,6 +341,8 @@ runlines = [] for line in oldlines: + if xfailre.match(line): + return unmodified(outfile,oldlines) m = runre.match(line) if m: runlines.append(m.group('tool')) @@ -323,7 +408,7 @@ retlines = subprocess.check_output(tool,universal_newlines=True,stderr=optstderr) retlines = [line.replace('\t', ' ') for line in retlines.splitlines()] check_include = [] - for checkme in known.check_include + known.check_label_include: + for checkme in CheckInclude|CheckLabelInclude: parts = checkme.split('=') if len(parts) == 2: if parts[0] == check_prefix: @@ -358,15 +443,15 @@ previous_was_empty = True for line,kind in zip(retlines,classified_retlines): if line: - if known.check_style == 'CHECK' and known.check_label_include: - if not kind.isdisjoint(known.check_label_include): + if known.check_style == 'CHECK' and CheckLabelInclude: + if not kind.isdisjoint(CheckLabelInclude): checklines.append('; ' + check_prefix + '-LABEL: ' + line) else: checklines.append('; ' + check_prefix + ': ' + line) elif known.check_style == 'CHECK': checklines.append('; ' + check_prefix + ': ' + line) - elif known.check_label_include and known.check_label_include: - if not kind.isdisjoint(known.check_label_include): + elif CheckLabelInclude: + if not kind.isdisjoint(CheckLabelInclude): checklines.append('; ' + check_prefix + '-LABEL: ' + line) elif previous_was_empty: checklines.append('; ' + check_prefix + ': ' + line) @@ -385,7 +470,7 @@ allchecklines.append(checklines) if not checkprefixes: - return + return unmodified(outfile,oldlines) checkre = re.compile(r'^\s*\;\s*(' + '|'.join([re.escape(s) for s in checkprefixes]) + ')(\-NEXT|\-DAG|\-NOT|\-LABEL|\-SAME)?\s*\:') firstcheckline = None Index: unittests/CMakeLists.txt =================================================================== --- unittests/CMakeLists.txt +++ unittests/CMakeLists.txt @@ -16,8 +16,9 @@ set_property(TARGET ${test_name} PROPERTY FOLDER "Polly") endif() - target_link_libraries(${test_name} Polly LLVMCore LLVMSupport) + target_link_libraries(${test_name} Polly LLVMCore LLVMSupport LLVMScalarOpts LLVMipo) endfunction() add_subdirectory(Isl) add_subdirectory(Flatten) +add_subdirectory(DeLICM) Index: unittests/DeLICM/CMakeLists.txt =================================================================== --- /dev/null +++ unittests/DeLICM/CMakeLists.txt @@ -0,0 +1,3 @@ +add_polly_unittest(DeLICMTests + DeLICMTest.cpp + ) Index: unittests/DeLICM/DeLICMTest.cpp =================================================================== --- /dev/null +++ unittests/DeLICM/DeLICMTest.cpp @@ -0,0 +1,620 @@ +//===- DeLICMTest.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "polly/DeLICM.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "gtest/gtest.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace polly; + +#define indef \ + (-1) /* For test cases where the true/false value should have no effect on \ + the outcome. */ + +#define BOOL_FOR(VAR) \ + for (int VAR##_iter = (VAR == indef) ? 0 : VAR; \ + VAR##_iter <= ((VAR == indef) ? 1 : VAR); VAR##_iter += 1) \ + for (bool VAR = VAR##_iter, VAR##_repeat = true; VAR##_repeat; \ + VAR##_repeat = false) + +namespace { + +void computeReachingDefinitionZone_check(const char *ScheduleStr, + const char *DefsStr, int IncludeDef, + int IncludeRedef, + const char *ExpectedStr) { + BOOL_FOR(IncludeDef) + BOOL_FOR(IncludeRedef) { + auto *Ctx = isl_ctx_alloc(); + + { + auto Schedule = give(isl_union_map_read_from_str(Ctx, ScheduleStr)); + auto Defs = give(isl_union_map_read_from_str(Ctx, DefsStr)); + auto Expected = give(isl_union_map_read_from_str(Ctx, ExpectedStr)); + + auto Result = + computeReachingDefinition(Schedule, Defs, IncludeDef, IncludeRedef); + + auto Success = isl_union_map_is_equal(Result.keep(), Expected.keep()); + assert(isl_bool_true == Success); + EXPECT_EQ(isl_bool_true, Success); + } + + isl_ctx_free(Ctx); + } +} + +TEST(DeLICM, ReachingDefinitionZone) { + EXPECT_FALSE(computeReachingDefinition(nullptr, nullptr, false, false)); + + computeReachingDefinitionZone_check("{ Dom[] -> [0] }", "{ Dom[] -> Elt[] }", + true, indef, + "{ [Elt[] -> [i]] -> Dom[] : 0 <= i }"); + computeReachingDefinitionZone_check("{ Dom[] -> [0] }", "{ Dom[] -> Elt[] }", + false, indef, + "{ [Elt[] -> [i]] -> Dom[] : 0 < i }"); + + computeReachingDefinitionZone_check( + "{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }", true, false, + "{ [Elt[] -> [i]] -> Dom1[] : 0 <= i < 10; [Elt[] -> [i]] -> Dom2[] : 10 " + "<= i }"); + computeReachingDefinitionZone_check( + "{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }", false, false, + "{ [Elt[] -> [i]] -> Dom1[] : 0 < i < 10; [Elt[] -> [i]] -> Dom2[] : 10 " + "< i }"); + computeReachingDefinitionZone_check("{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }", + false, true, "{ [Elt[] -> [i]] -> Dom1[] " + ": 0 < i <= 10; [Elt[] -> " + "[i]] -> Dom2[] : 10 < i }"); + computeReachingDefinitionZone_check("{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }", + true, true, "{ [Elt[] -> [i]] -> Dom1[] " + ": 0 <= i <= 10; [Elt[] -> " + "[i]] -> Dom2[] : 10 <= i }"); + + computeReachingDefinitionZone_check("{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt1[]; Dom2[] -> Elt2[] }", + true, indef, + "{ [Elt1[] -> [i]] -> Dom1[] : 0 <= i; " + "[Elt2[] -> [i]] -> Dom2[] : 10 <= i }"); + + computeReachingDefinitionZone_check( + "{ Dom[i] -> [i] }", "{ Dom[i] -> Elt[]; Dom2[] -> Elt[] }", true, false, + "{ [Elt[] -> [i]] -> Dom[i] }"); + + computeReachingDefinitionZone_check("{ Dom[1] -> [0]; Dom[2] -> [10] }", + "{ Dom[1] -> Elt[]; Dom[2] -> Elt[] }", + false, true, "{ [Elt[] -> [i]] -> Dom[1] " + ": 0 < i <= 10; [Elt[] -> " + "[i]] -> Dom[2] : 10 < i }"); + + computeReachingDefinitionZone_check( + "{ Stmt_reduction_for[i] -> [3i] : 0 <= i <= 4 }", + "{ Stmt_reduction_for[i] -> Elt[] : 0 <= i <= 4 }", false, true, + "{ [Elt[] -> [i]] -> Stmt_reduction_for[0] : 0 < i <= 3; [Elt[] -> [i]] " + "-> Stmt_reduction_for[1] : 3 < i <= 6; [Elt[] -> [i]] -> " + "Stmt_reduction_for[2] : 6 < i <= 9; [Elt[] -> [i]] -> " + "Stmt_reduction_for[3] : 9 < i <= 12; [Elt[] -> [i]] -> " + "Stmt_reduction_for[4] : 12 < i }"); +} + +void checkComputeArrayLifetime(const char *ScheduleStr, const char *WritesStr, + const char *ReadsStr, int ReadEltInSameInst, + int InclWrite, int InclLastRead, int ExitReads, + const char *ExpectedStr) { + BOOL_FOR(ReadEltInSameInst) + BOOL_FOR(InclWrite) BOOL_FOR(InclLastRead) BOOL_FOR(ExitReads) { + isl_ctx *Ctx = isl_ctx_alloc(); + + { + auto Schedule = give(isl_union_map_read_from_str(Ctx, ScheduleStr)); + auto Writes = give(isl_union_map_read_from_str(Ctx, WritesStr)); + auto Reads = give(isl_union_map_read_from_str(Ctx, ReadsStr)); + auto Expected = give(isl_union_map_read_from_str(Ctx, ExpectedStr)); + + auto Result = + computeArrayLifetime(Schedule, Writes, Reads, ReadEltInSameInst, + InclWrite, InclLastRead, ExitReads); + auto Success = isl_union_map_is_equal(Result.keep(), Expected.keep()); + + EXPECT_EQ(isl_bool_true, Success); + } + + isl_ctx_free(Ctx); + } +} + +TEST(DeLICM, ArrayPerWriteLifetimeZone) { + checkComputeArrayLifetime("{ }", "{ }", "{ }", indef, indef, indef, indef, + "{ }"); + checkComputeArrayLifetime("{ Read[] -> [10] }", "{ Read[] -> A[] }", "{ }", + indef, indef, indef, false, "{ }"); + + checkComputeArrayLifetime("{ Def[] -> [10] }", "{ Def[] -> A[] }", "{ }", + indef, indef, indef, false, "{ }"); + checkComputeArrayLifetime("{ Def[] -> [10] }", "{ Def[] -> A[] }", "{ }", + indef, false, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 < i }"); + checkComputeArrayLifetime("{ Def[] -> [10] }", "{ Def[] -> A[] }", "{ }", + indef, true, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 <= i }"); + + checkComputeArrayLifetime("{ Def[] -> [10]; Read[] -> [20] }", + "{ Def[] -> A[] }", "{ Read[] -> A[] }", indef, + false, false, false, + "{ [A[] -> Def[]] -> [i] : 10 < i < 20 }"); + checkComputeArrayLifetime("{ Def[] -> [10]; Read[] -> [20] }", + "{ Def[] -> A[] }", "{ Read[] -> A[] }", indef, + true, false, false, + "{ [A[] -> Def[]] -> [i] : 10 <= i < 20 }"); + checkComputeArrayLifetime("{ Def[] -> [10]; Read[] -> [20] }", + "{ Def[] -> A[] }", "{ Read[] -> A[] }", indef, + false, true, false, + "{ [A[] -> Def[]] -> [i] : 10 < i <= 20 }"); + checkComputeArrayLifetime("{ Def[] -> [10]; Read[] -> [20] }", + "{ Def[] -> A[] }", "{ Read[] -> A[] }", indef, + true, true, false, + "{ [A[] -> Def[]] -> [i] : 10 <= i <= 20 }"); + checkComputeArrayLifetime("{ Def[] -> [10]; Read[] -> [20] }", + "{ Def[] -> A[] }", "{ Read[] -> A[] }", indef, + false, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 < i }"); + checkComputeArrayLifetime("{ Def[] -> [10]; Read[] -> [20] }", + "{ Def[] -> A[] }", "{ Read[] -> A[] }", indef, + true, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 <= i }"); + + checkComputeArrayLifetime( + "{ Def[] -> [10]; Read1[] -> [20]; Read2[] -> [30] }", "{ Def[] -> A[] }", + "{ Read1[] -> A[]; Read2[] -> A[] }", indef, true, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 <= i }"); + checkComputeArrayLifetime( + "{ Def[] -> [10]; Read1[] -> [20]; Read2[] -> [30] }", "{ Def[] -> A[] }", + "{ Read1[] -> A[]; Read2[] -> A[] }", indef, true, true, false, + "{ [A[] -> Def[]] -> [i] : 10 <= i <= 30 }"); + + checkComputeArrayLifetime("{ Def1[] -> [0]; Read[] -> [10]; Def2[] -> [20] }", + "{ Def1[] -> A[]; Def2[] -> A[] }", + "{ Read[] -> A[] }", indef, true, true, false, + "{ [A[] -> Def1[]] -> [i] : 0 <= i <= 10 }"); + checkComputeArrayLifetime("{ Def1[] -> [0]; Read[] -> [10]; Def2[] -> [20] }", + "{ Def1[] -> A[]; Def2[] -> A[] }", + "{ Read[] -> A[] }", indef, true, true, true, + "{ [A[] -> Def1[]] -> [i] : 0 <= i <= 10; [A[] -> " + "Def2[]] -> [i] : 20 <= i }"); + + checkComputeArrayLifetime("{ Def1[] -> [0]; Def2[] -> [10]; Read[] -> [10] }", + "{ Def1[] -> A[]; Def2[] -> A[] }", + "{ Read[] -> A[] }", false, true, true, true, + "{ [A[] -> Def1[]] -> [i] : 0 <= i <= 10; [A[] -> " + "Def2[]] -> [i] : 10 <= i }"); + checkComputeArrayLifetime("{ Def1[] -> [0]; Def2[] -> [10]; Read[] -> [10] }", + "{ Def1[] -> A[]; Def2[] -> A[] }", + "{ Read[] -> A[] }", true, true, true, true, + "{ [A[] -> Def2[]] -> [i] : 10 <= i }"); +} + +void computeReachingOverwriteZone_check(const char *ScheduleStr, + const char *DefsStr, + int IncludePrevWrite, + int IncludeOverwrite, + const char *ExpectedStr) { + BOOL_FOR(IncludePrevWrite) + BOOL_FOR(IncludeOverwrite) { + isl_ctx *Ctx = isl_ctx_alloc(); + { + auto Schedule = give(isl_union_map_read_from_str(Ctx, ScheduleStr)); + auto Defs = give(isl_union_map_read_from_str(Ctx, DefsStr)); + auto Expected = give(isl_union_map_read_from_str(Ctx, ExpectedStr)); + + auto Result = computeReachingOverwrite(Schedule, Defs, IncludePrevWrite, + IncludeOverwrite); + auto Success = isl_union_map_is_equal(Result.keep(), Expected.keep()); + + EXPECT_EQ(isl_bool_true, Success); + } + + isl_ctx_free(Ctx); + } +} + +TEST(DeLICM, ReachingOverwriteZone) { + computeReachingOverwriteZone_check("{ Write[] -> [0] }", + "{ Write[] -> Elt[] }", indef, false, + "{ [Elt[] -> [i]] -> Write[] : i < 0 }"); + computeReachingOverwriteZone_check("{ Write[] -> [0] }", + "{ Write[] -> Elt[] }", indef, true, + "{ [Elt[] -> [i]] -> Write[] : i <= 0 }"); + + computeReachingOverwriteZone_check( + "{ Write[0] -> [0]; Write[1] -> [10] }", "{ Write[i] -> Elt[]; }", false, + false, "{ [Elt[] -> [i]] -> Write[0] : i < 0 ; [Elt[] -> [i]] -> " + "Write[1] : 0 < i < 10 }"); + computeReachingOverwriteZone_check( + "{ Write[0] -> [0]; Write[1] -> [10] }", "{ Write[i] -> Elt[]; }", false, + true, "{ [Elt[] -> [i]] -> Write[0] : i <= 0 ; [Elt[] -> [i]] -> " + "Write[1] : 0 < i <= 10 }"); + computeReachingOverwriteZone_check( + "{ Write[0] -> [0]; Write[1] -> [10] }", "{ Write[i] -> Elt[]; }", true, + false, "{ [Elt[] -> [i]] -> Write[0] : i < 0 ; [Elt[] -> [i]] -> " + "Write[1] : 0 <= i < 10 }"); + computeReachingOverwriteZone_check( + "{ Write[0] -> [0]; Write[1] -> [10] }", "{ Write[i] -> Elt[]; }", true, + true, "{ [Elt[] -> [i]] -> Write[0] : i <= 0 ; [Elt[] -> [i]] -> " + "Write[1] : 0 <= i <= 10 }"); +} + +void computeArrayUnusedZone_check(const char *ScheduleStr, + const char *WritesStr, const char *ReadsStr, + int ReadEltInSameInst, int IncludeLastRead, + int IncludeWrite, const char *ExpectedStr) { + BOOL_FOR(ReadEltInSameInst) BOOL_FOR(IncludeLastRead) BOOL_FOR(IncludeWrite) { + isl_ctx *Ctx = isl_ctx_alloc(); + + { + auto Schedule = give(isl_union_map_read_from_str(Ctx, ScheduleStr)); + auto Writes = give(isl_union_map_read_from_str(Ctx, WritesStr)); + auto Reads = give(isl_union_map_read_from_str(Ctx, ReadsStr)); + auto Expected = give(isl_union_map_read_from_str(Ctx, ExpectedStr)); + + auto Result = + computeArrayUnused(Schedule, Writes, Reads, ReadEltInSameInst, + IncludeLastRead, IncludeWrite); + auto Success = isl_union_map_is_equal(Result.keep(), Expected.keep()); + + EXPECT_EQ(isl_bool_true, Success); + } + + isl_ctx_free(Ctx); + } +} + +TEST(DeLICM, ArrayUnused) { + computeArrayUnusedZone_check("{ Read[] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", "{ Read[] -> Elt[] }", + indef, false, false, + "{ Elt[] -> [i] : 0 < i < 10 }"); + computeArrayUnusedZone_check("{ Read[] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", "{ Read[] -> Elt[] }", + indef, false, true, + "{ Elt[] -> [i] : 0 < i <= 10 }"); + computeArrayUnusedZone_check("{ Read[] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", "{ Read[] -> Elt[] }", + indef, true, false, + "{ Elt[] -> [i] : 0 <= i < 10 }"); + computeArrayUnusedZone_check("{ Read[] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", "{ Read[] -> Elt[] }", + indef, true, true, + "{ Elt[] -> [i] : 0 <= i <= 10 }"); + + computeArrayUnusedZone_check( + "{ Read[0] -> [-10]; Read[1] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", "{ Read[i] -> Elt[] }", indef, false, true, + "{ Elt[] -> [i] : 0 < i <= 10 }"); + computeArrayUnusedZone_check("{ Read[] -> [0]; }", "{ }", + "{ Read[] -> Elt[] }", indef, indef, indef, + "{ }"); + computeArrayUnusedZone_check("{ Write[] -> [0]; }", "{ Write[] -> Elt[] }", + "{ }", indef, indef, true, + "{ Elt[] -> [i] : i <= 0 }"); + + computeArrayUnusedZone_check("{ RW[] -> [0] }", "{ RW[] -> Elt[] }", + "{ RW[] -> Elt[] }", true, indef, false, + "{ Elt[] -> [i] : i < 0 }"); + computeArrayUnusedZone_check("{ RW[] -> [0] }", "{ RW[] -> Elt[] }", + "{ RW[] -> Elt[] }", true, indef, true, + "{ Elt[] -> [i] : i <= 0 }"); + computeArrayUnusedZone_check("{ RW[] -> [0] }", "{ RW[] -> Elt[] }", + "{ RW[] -> Elt[] }", false, true, true, + "{ Elt[] -> [0] }"); +} + +IslPtr emptyUMap(isl_ctx *Ctx) { + return give(isl_union_map_empty(isl_space_params_alloc(Ctx, 0))); +} +IslPtr emptyUSet(isl_ctx *Ctx) { + return give(isl_union_set_empty(isl_space_params_alloc(Ctx, 0))); +} + +void isConflicting_check(const char *ThisKnownStr, const char *ThisUndefStr, + const char *ThisWrittenStr, const char *ThatKnownStr, + const char *ThatUnknownStr, const char *ThatWrittenStr, + bool Expected) { + isl_ctx *Ctx = isl_ctx_alloc(); + LLVMContext C; + + { + auto ThisKnown = ThisKnownStr + ? give(isl_union_map_read_from_str(Ctx, ThisKnownStr)) + : emptyUMap(Ctx); + auto ThisUndef = ThisUndefStr + ? give(isl_union_set_read_from_str(Ctx, ThisUndefStr)) + : emptyUSet(Ctx); + auto ThisWritten = + ThisWrittenStr ? give(isl_union_map_read_from_str(Ctx, ThisWrittenStr)) + : emptyUMap(Ctx); + + auto ThatKnown = ThatKnownStr + ? give(isl_union_map_read_from_str(Ctx, ThatKnownStr)) + : emptyUMap(Ctx); + auto ThatUnknown = + ThatUnknownStr ? give(isl_union_set_read_from_str(Ctx, ThatUnknownStr)) + : emptyUSet(Ctx); + auto ThatWritten = + ThatWrittenStr ? give(isl_union_map_read_from_str(Ctx, ThatWrittenStr)) + : emptyUMap(Ctx); + + auto UndefVal = UndefValue::get(IntegerType::get(C, 8)); + auto UndefId = give(isl_id_alloc(Ctx, "Undef", UndefVal)); + auto UndefSpace = give(isl_space_set_alloc(Ctx, 0, 0)); + UndefSpace = give( + isl_space_set_tuple_id(UndefSpace.take(), isl_dim_set, UndefId.take())); + auto UndefSet = give(isl_set_universe(UndefSpace.take())); + auto UndefUSet = give(isl_union_set_from_set(UndefSet.take())); + + auto UnknownSpace = give(isl_space_set_alloc(Ctx, 0, 0)); + auto UnknownSet = give(isl_set_universe(UnknownSpace.take())); + auto UnknownUSet = give(isl_union_set_from_set(UnknownSet.take())); + + auto ThisLifetime = give(isl_union_map_union( + ThisKnown.take(), isl_union_map_from_domain_and_range( + ThisUndef.take(), UndefUSet.take()))); + auto ThatLifetime = give(isl_union_map_union( + ThatKnown.take(), isl_union_map_from_domain_and_range( + ThatUnknown.take(), UnknownUSet.take()))); + + auto Result = polly::isConflicting(ThisLifetime, true, ThisWritten, + ThatLifetime, false, ThatWritten); + + EXPECT_EQ(Expected, Result); + } + + isl_ctx_free(Ctx); +} + +IslPtr unionSpace(NonowningIslPtr USet) { + auto Result = give(isl_union_set_empty(isl_union_set_get_space(USet.keep()))); + foreachElt(USet, [=, &Result](IslPtr Set) { + auto Space = give(isl_set_get_space(Set.keep())); + auto Universe = give(isl_set_universe(Space.take())); + Result = give(isl_union_set_add_set(Result.take(), Universe.take())); + }); + return Result; +} + +bool checkIsConflicting( + const char *ExistingKnownStr, const char *ExistingUnknownStr /*necessary?*/, + const char *ExistingUndefStr, + const char *ExistingWrittenStr, /* const char *ExistingWrittenUnknownStr,*/ + const char *ProposedKnownStr, const char *ProposedUnknownStr /*necessary?*/, + const char *ProposedUndefStr, + const char + *ProposedWrittenStr /*, const char *ProposedWrittenUnknownStr*/) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + LLVMContext C; + + auto ExistingKnown = + give(isl_union_map_read_from_str(Ctx.get(), ExistingKnownStr)); + auto ExistingUnknown = + ExistingUnknownStr + ? give(isl_union_set_read_from_str(Ctx.get(), ExistingUnknownStr)) + : nullptr; + auto ExistingUndef = + ExistingUndefStr + ? give(isl_union_set_read_from_str(Ctx.get(), ExistingUndefStr)) + : nullptr; + auto ExistingWritten = + give(isl_union_map_read_from_str(Ctx.get(), ExistingWrittenStr)); + // auto ExistingWrittenUnknown = give(isl_union_set_read_from_str(Ctx.get(), + // ExistingWrittenUnknownStr)); + + auto ProposedKnown = + give(isl_union_map_read_from_str(Ctx.get(), ProposedKnownStr)); + auto ProposedUnknown = + ProposedUnknownStr + ? give(isl_union_set_read_from_str(Ctx.get(), ProposedUnknownStr)) + : nullptr; + auto ProposedUndef = + ProposedUndefStr + ? give(isl_union_set_read_from_str(Ctx.get(), ProposedUndefStr)) + : nullptr; + auto ProposedWritten = + give(isl_union_map_read_from_str(Ctx.get(), ProposedWrittenStr)); + // auto ProposedWrittenUnknown = give(isl_union_set_read_from_str(Ctx.get(), + // ProposedWrittenUnknownStr)); + + auto UndefVal = UndefValue::get(IntegerType::get(C, 8)); + auto UndefId = give(isl_id_alloc(Ctx.get(), "Undef", UndefVal)); + auto UndefSpace = give(isl_space_set_tuple_id( + isl_space_set_alloc(Ctx.get(), 0, 0), isl_dim_set, UndefId.take())); + auto UndefSet = give(isl_set_universe(UndefSpace.take())); + auto UndefUSet = give(isl_union_set_from_set(UndefSet.take())); + + auto ExistingDefined = give(isl_union_map_domain(ExistingKnown.copy())); + auto ExistingLifetime = ExistingKnown; + if (ExistingUnknown) { + ExistingDefined = give( + isl_union_set_union(ExistingDefined.take(), ExistingUnknown.copy())); + ExistingLifetime = give( + isl_union_map_union(ExistingLifetime.take(), + isl_union_map_from_domain(ExistingUnknown.copy()))); + } + if (ExistingUndef) { + ExistingDefined = + give(isl_union_set_union(ExistingDefined.take(), ExistingUndef.copy())); + ExistingLifetime = give(isl_union_map_union( + ExistingLifetime.take(), isl_union_map_from_domain_and_range( + ExistingUndef.copy(), UndefUSet.copy()))); + } + + auto ProposedDefined = give(isl_union_map_domain(ProposedKnown.copy())); + auto ProposedLifetime = ProposedKnown; + if (ProposedUnknown) { + ProposedDefined = give( + isl_union_set_union(ProposedDefined.take(), ProposedUnknown.copy())); + ProposedLifetime = give( + isl_union_map_union(ProposedLifetime.take(), + isl_union_map_from_domain(ProposedUnknown.copy()))); + } + if (ProposedUndef) { + ProposedDefined = + give(isl_union_set_union(ProposedDefined.take(), ProposedUndef.copy())); + ProposedLifetime = give(isl_union_map_union( + ProposedLifetime.take(), isl_union_map_from_domain_and_range( + ProposedUndef.copy(), UndefUSet.copy()))); + } + + auto ExistingUniverse = unionSpace(ExistingDefined); + ExistingUniverse = give(isl_union_set_union( + ExistingUniverse.take(), + unionSpace(give(isl_union_map_domain(ExistingWritten.copy()))).take())); + // ExistingUniverse = give(isl_union_set_union(ExistingUniverse.take(), + // unionSpace( ExistingWrittenUnknown ).take() )); + auto ProposedUniverse = unionSpace(ProposedDefined); + ProposedUniverse = give(isl_union_set_union( + ProposedUniverse.take(), + unionSpace(give(isl_union_map_domain(ProposedWritten.copy()))).take())); + // ProposedUniverse = give(isl_union_set_union(ProposedUniverse.take(), + // unionSpace( ProposedWrittenUnknown ).take() )); + auto Universe = give( + isl_union_set_union(ExistingUniverse.take(), ProposedUniverse.take())); + + // if (!ExistingUnknownStr) + // ExistingLifetime = give(isl_union_map_union( ExistingLifetime.take(), + // isl_union_map_from_domain(isl_union_set_subtract(Universe.copy(), + // ExistingDefined.copy() )) )); + if (!ExistingUndefStr) + ExistingLifetime = give(isl_union_map_union( + ExistingLifetime.take(), + isl_union_map_from_domain_and_range( + isl_union_set_subtract(Universe.copy(), ExistingDefined.copy()), + UndefUSet.copy()))); + + if (!ProposedUnknownStr) + ExistingLifetime = give( + isl_union_map_union(ExistingLifetime.take(), + isl_union_map_from_domain(isl_union_set_subtract( + Universe.copy(), ExistingDefined.copy())))); + // if (!ProposedUndefStr) + // ExistingLifetime = give(isl_union_map_union( ExistingLifetime.take(), + // isl_union_map_from_domain_and_range(isl_union_set_subtract(Universe.copy(), + // ExistingDefined.copy() ), UndefUSet.copy()) )); + + return polly::isConflicting(ExistingLifetime, true, ExistingWritten, + ProposedLifetime, false, ProposedWritten); +} + +void isConflicting_checksymmetric(const char *ThisKnownStr, + const char *ThisWrittenStr, + const char *ThatKnownStr, + const char *ThatWrittenStr, bool Expected) { + isConflicting_check(ThisKnownStr, nullptr, ThisWrittenStr, ThatKnownStr, + nullptr, ThatWrittenStr, Expected); + isConflicting_check(ThatKnownStr, nullptr, ThatWrittenStr, ThisKnownStr, + nullptr, ThisWrittenStr, Expected); +} + +TEST(DeLICM, IsConflicting) { + isConflicting_check(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + false); + isConflicting_check(nullptr, nullptr, "{ Dom[0] -> Val[] }", nullptr, nullptr, + nullptr, false); + isConflicting_check(nullptr, nullptr, "{ Dom[0] -> [] }", nullptr, nullptr, + nullptr, false); + isConflicting_checksymmetric("{ Dom[0] -> Val[] }", nullptr, + "{ Dom[0] -> Val[] }", nullptr, false); + // computeArrayUnusedZone_checksymmetric("{ Dom[i] -> Val[] : 0 < i < 10 }", + // nullptr, nullptr, "{ Dom[0] -> [] }", false ); + isConflicting_check("{ Dom[i] -> Val[] : 0 < i }", "{ Dom[i] : i <= 0 }", + nullptr, nullptr, nullptr, "{ Dom[-1] -> [] }", false); + isConflicting_check("{ Dom[i] -> Val[] : i <= 10 }", "{ Dom[i] : 10 < i }", + nullptr, nullptr, nullptr, "{ Dom[10] -> [] }", false); + isConflicting_check(nullptr, "{ Dom[0] }", nullptr, "{ Dom[0] -> Val[] }", + nullptr, nullptr, false); + isConflicting_check(nullptr, "{ Dom[i] }", nullptr, nullptr, nullptr, + "{ Dom[0] -> Val[] }", false); + // isConflicting_check(nullptr, "{ Dom[i] }", "{ Dom[0] -> [] }", nullptr, + // nullptr, "{ Dom[0] -> Val[] }", false); + EXPECT_TRUE(checkIsConflicting("{}", "{ Dom[i] }", nullptr, "{ }", "{}", + nullptr, "{}", "{ Dom[0] -> Val[] }")); + + isConflicting_check(nullptr, "{ Dom[i] }", "{ Dom[0] -> Val[] }", + "{ Dom[i] -> Val[] }", nullptr, nullptr, false); + + isConflicting_checksymmetric("{ Dom[0] -> ValA[] }", nullptr, + "{ Dom[0] -> ValB[] }", nullptr, true); + isConflicting_check("{ Dom[0] -> Val[] }", nullptr, nullptr, nullptr, + "{ Dom[0] }", nullptr, true); + isConflicting_check(nullptr, nullptr, nullptr, "{ Dom[] -> Val[] }", nullptr, + nullptr, true); + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{}", + "{ Dom[] -> Val[] }", "{}", nullptr, "{}")); + + // computeArrayUnusedZone_checksymmetric("{ Dom[i] -> Val[] : 0 < i < 10 }", + // nullptr, nullptr, "{ Dom[0] -> [] }", true ); + isConflicting_check("{ Dom[i] -> Val[] : 0 < i <= 10 }", nullptr, nullptr, + nullptr, nullptr, "{ Dom[1] -> [] }", true); + isConflicting_check("{ Dom[i] -> Val[] : 0 < i <= 10 }", nullptr, nullptr, + nullptr, nullptr, "{ Dom[9] -> [] }", true); + + isConflicting_checksymmetric("{ Dom[i] -> ValA[] }", nullptr, + "{ Dom[i] -> ValA[] }", "{ Dom[0] -> ValB[] }", + true); + isConflicting_checksymmetric("{ Dom[i] -> Val[] }", nullptr, + "{ Dom[i] -> Val[] }", "{ Dom[0] -> [] }", true); + isConflicting_check("{ Dom[i] -> [] }", nullptr, nullptr, nullptr, nullptr, + "{ Dom[0] -> [] }", true); + isConflicting_check(nullptr, "{ Dom[i] }", "{ Dom[0] -> [] }", + "{ Dom[i] -> [] }", nullptr, nullptr, true); + + isConflicting_check(nullptr, nullptr, nullptr, nullptr, nullptr, + "{ Dom[0] -> Val[] }", true); + isConflicting_check(nullptr, nullptr, nullptr, nullptr, nullptr, + "{ Dom[0] -> [] }", true); + + isConflicting_check(nullptr, nullptr, "{ Dom[0] -> Val[] }", nullptr, + "{ Dom[i] }", nullptr, true); + isConflicting_check(nullptr, nullptr, "{ Dom[0] -> [] }", nullptr, + "{ Dom[i] }", nullptr, true); + + isConflicting_check(nullptr, nullptr, "{ Dom[0] -> Val[] }", + "{ Dom[i] -> [] }", nullptr, nullptr, true); + isConflicting_check(nullptr, nullptr, "{ Dom[0] -> [] }", + "{ Dom[i] -> Val[] }", nullptr, nullptr, true); + isConflicting_check(nullptr, nullptr, "{ Dom[0] -> [] }", "{ Dom[i] -> [] }", + nullptr, nullptr, true); + + EXPECT_TRUE(checkIsConflicting("{}", "{}", nullptr, "{ Dom[0] -> ValA[] }", + "{}", "{}", nullptr, "{ Dom[0] -> ValB[] }")); + EXPECT_TRUE(checkIsConflicting("{}", "{}", nullptr, "{ Dom[0] -> [] }", "{}", + "{}", nullptr, "{ Dom[0] -> Val[] }")); + EXPECT_TRUE(checkIsConflicting("{}", "{}", nullptr, "{ Dom[0] -> Val[] }", + "{}", "{}", nullptr, "{ Dom[0] -> [] }")); + + EXPECT_TRUE(checkIsConflicting("{}", "{}", nullptr, "{ Dom[0] -> [] }", "{}", + "{}", nullptr, "{ Dom[0] -> [] }")); + + EXPECT_FALSE(checkIsConflicting("{}", "{}", nullptr, + "{ Dom[0] -> ValA[]; Dom[0] -> ValB[] }", + "{}", "{}", nullptr, "{ }")); +} + +} // anonymous namespace