diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -61,6 +61,16 @@ OMPBuilder.initialize(); } + /// Loop fusion + struct OMPLoopFusion { + // + std::map call_init_fini_mapping; + std::map> call_map; + std::map store_op0_op1; + std::map args_map; + CallInst *current_call_init_instruction = nullptr; + }; + /// Generic information that describes a runtime function struct RuntimeFunctionInfo { /// The kind, as described by the RuntimeFunction enum. @@ -114,11 +124,203 @@ Changed |= deduplicateRuntimeCalls(); Changed |= deleteParallelRegions(); + Changed |= runTheOMPLoopFusion(); return Changed; } private: + /// Try to fuse OpenMP for loop with static scheduling + /// check if all parameters are same and the loops are adjacent + /// the two for loops can be used + + bool runTheOMPLoopFusion() { + bool Changed = false; + + for (Function *F : SCC) { + OMPLoopFusion OLF; + runOverTheBlock(F, &OLF); + checkTheCompatibility(&OLF); + Changed = cleanInstructions(&OLF); + replace_UseValues(F, &OLF); + // printFunction(F); + } + return Changed; + } + + /// + // + bool find(CallInst *I, std::map> m) { + if (m.size() == 0) + return false; + for (auto itr = m.begin(); itr != m.end(); ++itr) { + if ((itr->second).size() == 0) + continue; + else { + for (auto itr1 = (itr->second).begin(); itr1 != (itr->second).end(); + ++itr1) { + if (I == *itr1) + return true; + } + } + } + return false; + } + + /// The following functions check the compatibility of the + // __kmpc_for_static_init_4 call instructions for the fusion + + bool checkTheCompatibility(OMPLoopFusion *OLF) { + bool compatible = true; + for (auto itr = OLF->call_init_fini_mapping.begin(); + itr != OLF->call_init_fini_mapping.end(); ++itr) { + std::vector v; + if (find(itr->first, OLF->call_map)) + continue; + for (auto itr1 = itr; itr1 != OLF->call_init_fini_mapping.end(); ++itr1) { + if (itr == itr1) + continue; + else { + std::vector v1, v2; + for (Value *arg : (itr->first)->args()) { + v1.push_back(arg); + } + for (Value *arg2 : (itr1->first)->args()) { + v2.push_back(arg2); + } + for (auto i = v1.begin(), j = v2.begin(); + i != v1.end() && j != v2.end(); ++i, ++j) { + if (isa(*i) && isa(*j)) { + if (*i != *j) { + compatible = false; + break; + } + } else { // we have a pointer argument + if (OLF->store_op0_op1.find(*j)->second != + OLF->store_op0_op1.find(*i)->second) { + compatible = false; + break; + } + } + } + if (compatible) { + errs() << "Success" + << "\n"; + v.push_back(itr1->first); + } else + break; + } + } + + OLF->call_map.insert({itr->first, v}); + if (!compatible) { + compatible = true; + } + } + return false; + } + + /// The function goes through each BB and check the call instructions + /// __kmpc_for_static_init_4 and __kmpc_for_static_fini + /// store the operands of the store instructios + void runOverTheBlock(Function *F, OMPLoopFusion *OLF) { + for (Function::iterator FI = F->begin(); FI != F->end(); ++FI) { + for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ++BBI) { + if (CallInst *c = dyn_cast(BBI)) { + if (c->getCalledFunction()->getName() == "__kmpc_for_static_init_4") { + OLF->current_call_init_instruction = c; + } else if (c->getCalledFunction()->getName() == + "__kmpc_for_static_fini") { + OLF->call_init_fini_mapping.insert( + {OLF->current_call_init_instruction, c}); + } + } else if (StoreInst *store = dyn_cast(BBI)) { + OLF->store_op0_op1.insert( + {store->getOperand(1), store->getOperand(0)}); + } + } + } + } + + bool cleanInstructions(OMPLoopFusion *OLF) { + bool changed = false; + errs() << "[starting cleaning]" + << "\n"; + for (auto itr = OLF->call_map.begin(); itr != OLF->call_map.end(); itr++) { + Instruction *I = OLF->call_init_fini_mapping.find(itr->first)->second; + int count = (itr->second).size(); + if (count == 0) + continue; + else + I->eraseFromParent(); + changed = true; + for (auto itr1 = (itr->second).begin(); itr1 != (itr->second).end(); + itr1++) { + Instruction *I1 = *itr1; + I1->eraseFromParent(); + for (auto itr2 = OLF->call_init_fini_mapping.begin(); + itr2 != OLF->call_init_fini_mapping.end(); itr2++) { + if (*itr1 == itr2->first) { + Instruction *I2 = itr2->second; + if (count == 1 || count == 0) + break; + else { + I2->eraseFromParent(); + count--; + } + } + } + } + } + errs() << "[Ending Cleaning]" + << "\n"; + return changed; + } + + void replace_UseValues(Function *F, OMPLoopFusion *OLF) { + std::vector remove; + for (auto itr = OLF->call_map.begin(); itr != OLF->call_map.end(); itr++) { + std::vector vm; + for (Value *arg : (itr->first)->args()) { + vm.push_back(arg); + } + for (auto itr1 = (itr->second).begin(); itr1 != (itr->second).end(); + itr1++) { + std::vector vs; + for (Value *arg : (*itr1)->args()) { + vs.push_back(arg); + } + for (auto vmitr = vm.begin(), vsitr = vs.begin(); + vmitr != vm.end() && vsitr != vs.end(); vmitr++, vsitr++) { + if (isa(*vmitr)) + continue; + for (auto &BB : *F) { + for (auto &II : BB) { + Instruction *It = &II; + if (isa(It)) + continue; + for (unsigned int k = 0; k < It->getNumOperands(); k++) { + if (It->getOperand(k) == *vsitr) { + It->setOperand(k, *vmitr); + if (isa(It) && k > 0) { + remove.push_back(It); + } + } + } + } + } + } + } + } + for (auto r = remove.begin(); r != remove.end(); r++) + (*r)->eraseFromParent(); + } + + void printFunction(Function *F) { + errs() << "\nThis is the new IR for the fused stuff\n\n"; + F->print(errs(), nullptr); + } + /// Try to delete parallel regions if possible bool deleteParallelRegions() { const unsigned CallbackCalleeOperand = 2;