Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -61,13 +61,16 @@ OMPBuilder.initialize(); } - /// Loop fusion + /// Data structure to hold information for the deleting + /// redundent OpenMP for loop call struct OMPLoopFusion { - // + /// Keeps map of __kmpc_static_init4 and its __kmpc_static_fini calls for each OpenMP for loop std::map call_init_fini_mapping; + /// Keeps map of __kmpc_static_init4 and all its compatilable __kmpc_static_init4 in a vector std::map> call_map; - std::map store_op0_op1; - std::map args_map; + /// store_op0_op01 keeps map of operand 1 and operand 0 + /// args_map keeps map of arguments of __kmpc_static_init4 for later cleaning + std::map store_op0_op1, args_map; CallInst *current_call_init_instruction = nullptr; }; @@ -124,202 +127,190 @@ Changed |= deduplicateRuntimeCalls(); Changed |= deleteParallelRegions(); - Changed |= runTheOMPLoopFusion(); + Changed |= deleteStaticScheduleCalls(); return Changed; } private: - /// Try to fuse OpenMP for loop with static scheduling + /// Combine "OpenMP for loop with static scheduling" /// check if all parameters are same and the loops are adjacent - /// the two for loops can be used - - bool runTheOMPLoopFusion() { - bool Changed = false; - + /// See https://openmp.llvm.org/Reference.pdf. See section 5.8.3.24 for parameters + /// The two for loops can share the same __kmpc_static_init4() and __kmpc_static_fini() + /// calls. Consider the following example + /// #pragma omp parallel + /// { + /// #pragma omp for + /// for (int i=0; i < 10; i++) + /// printf(i); // Loop-1 + /// #pragma omp for + /// for (int j=0; j < 10; j++) + /// printf(j); // Loop-2 + /// } + /// The __kmpc_static_fini() of Loop-1 and __kmpc_static_init4() of Loop-2 + /// can be removed and two loops can be run under a single pair of __kmpc_static_init4() and __kmpc_static_fini() calls + + /// The following function is the main pipeline for the whole process + bool deleteStaticScheduleCalls() { + bool Changed = true; for (Function *F : SCC) { OMPLoopFusion OLF; - runOverTheBlock(F, &OLF); + /// Run over the function and prepare the data and data structures + runOverTheBlock(*F, &OLF); + /// The following check the compatibility of between two + /// adjacent OpenMP for loops checkTheCompatibility(&OLF); + /// The following cleans the redundent instructions in the combined loop Changed = cleanInstructions(&OLF); - replace_UseValues(F, &OLF); - // printFunction(F); + /// The following replaces the use values in the combined loop + if (Changed) replace_UseValues(*F, &OLF); } return Changed; } - /// - // - bool find(CallInst *I, std::map> m) { - if (m.size() == 0) - return false; - for (auto itr = m.begin(); itr != m.end(); ++itr) { - if ((itr->second).size() == 0) - continue; - else { - for (auto itr1 = (itr->second).begin(); itr1 != (itr->second).end(); - ++itr1) { - if (I == *itr1) - return true; - } - } - } + /// The following function determines wheither a call instructions has + /// already been mapped. If yes then there is no need to test its compatibilility again + bool find(CallInst *I, std::map> m) { + for (auto itr :m) + for (auto itr1 : (itr.second)) + if (I == itr1) return true; return false; - } + } /// The following functions check the compatibility of the - // __kmpc_for_static_init_4 call instructions for the fusion + /// __kmpc_for_static_init_4 call instructions for + /// deletion. - bool checkTheCompatibility(OMPLoopFusion *OLF) { +void checkTheCompatibility(OMPLoopFusion *OLF) { bool compatible = true; - for (auto itr = OLF->call_init_fini_mapping.begin(); - itr != OLF->call_init_fini_mapping.end(); ++itr) { + /// for each __kmpc_static_init4 call instruction + for (auto itr : OLF->call_init_fini_mapping) { + /// check wheither it has been mapped already + if (find(itr.first, OLF->call_map)) continue; std::vector v; - if (find(itr->first, OLF->call_map)) - continue; - for (auto itr1 = itr; itr1 != OLF->call_init_fini_mapping.end(); ++itr1) { - if (itr == itr1) - continue; - else { - std::vector v1, v2; - for (Value *arg : (itr->first)->args()) { + std::vector v1; + /// if not then store the arguments of __kmpc_static_init4 in vector v1 + for (Value *arg : (itr.first)->args()) v1.push_back(arg); - } - for (Value *arg2 : (itr1->first)->args()) { + /// check the other __kmpc_static_init4 call instructions + for (auto itr1 : OLF->call_init_fini_mapping) { + if ((itr.first) == (itr1.first)) continue; + std::vector v2; + /// Store the arguments of __kmpc_static_init4 in vector v2 + for (Value *arg2 : (itr1.first)->args()) v2.push_back(arg2); - } - for (auto i = v1.begin(), j = v2.begin(); - i != v1.end() && j != v2.end(); ++i, ++j) { + /// check wheither each arguments from both call __kmpc_static_init4 are same + for (auto i = v1.begin(), j = v2.begin(); i != v1.end() && j != v2.end(); ++i, ++j) { + OLF->args_map.insert({*j,*i}); if (isa(*i) && isa(*j)) { if (*i != *j) { + // if any constant is not equal, the compatibility test fails compatible = false; - break; - } + break;} } else { // we have a pointer argument - if (OLF->store_op0_op1.find(*j)->second != - OLF->store_op0_op1.find(*i)->second) { + if (OLF->store_op0_op1.find(*j)->second != OLF->store_op0_op1.find(*i)->second) { + // if any pointer value is not equal, the compatibility test fails compatible = false; - break; - } + break;} } } - if (compatible) { - errs() << "Success" - << "\n"; - v.push_back(itr1->first); - } else - break; - } - } - - OLF->call_map.insert({itr->first, v}); - if (!compatible) { - compatible = true; + if (compatible) + v.push_back(itr1.first); + else + break; // the adjacent for omp loop is not compatible so there is no need to check others + // therefore we need to break out of the second for loop } + /// if a call instruction has some compatible call instructions then put in the call_map container + if (v.size() !=0) OLF->call_map.insert({itr.first, v}); + /// make the flag true again for the next instruction checking + if (!compatible) compatible = true; } - return false; } /// The function goes through each BB and check the call instructions /// __kmpc_for_static_init_4 and __kmpc_for_static_fini - /// store the operands of the store instructios - void runOverTheBlock(Function *F, OMPLoopFusion *OLF) { - for (Function::iterator FI = F->begin(); FI != F->end(); ++FI) { - for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ++BBI) { - if (CallInst *c = dyn_cast(BBI)) { - if (c->getCalledFunction()->getName() == "__kmpc_for_static_init_4") { + /// store the operands of the store instructions + void runOverTheBlock(Function &F, OMPLoopFusion *OLF) { + for (auto &BB: F) { + for (auto &BBI: BB) { + if (CallInst *c = dyn_cast(&BBI)) { + /// if its a __kmpc_static_init4 + if (c->getCalledFunction()->getName() == "__kmpc_for_static_init_4") OLF->current_call_init_instruction = c; - } else if (c->getCalledFunction()->getName() == - "__kmpc_for_static_fini") { - OLF->call_init_fini_mapping.insert( - {OLF->current_call_init_instruction, c}); - } - } else if (StoreInst *store = dyn_cast(BBI)) { - OLF->store_op0_op1.insert( - {store->getOperand(1), store->getOperand(0)}); + /// if its a __kmpc_static_fini + if (c->getCalledFunction()->getName() == "__kmpc_for_static_fini") + OLF->call_init_fini_mapping.insert({OLF->current_call_init_instruction, c}); } + /// if its a store instruction + if (StoreInst *store = dyn_cast(&BBI)) + /// store the operands + OLF->store_op0_op1.insert({store->getOperand(1), store->getOperand(0)}); } } - } + } + + + /// The following function delete the __kmpc_static_fini and __kmpc_static_init4 + /// call instructions which are redundent bool cleanInstructions(OMPLoopFusion *OLF) { + /// By default no change in the IR bool changed = false; - errs() << "[starting cleaning]" - << "\n"; - for (auto itr = OLF->call_map.begin(); itr != OLF->call_map.end(); itr++) { - Instruction *I = OLF->call_init_fini_mapping.find(itr->first)->second; - int count = (itr->second).size(); - if (count == 0) - continue; - else - I->eraseFromParent(); + /// Take each instructions that has some compatible instructions + for (auto itr : OLF->call_map) { + /// count the number of compatible instructions + int count = (itr.second).size(); + /// get the __kmpc_static_fini instruction specific to the __kmpc_static_init4 + /// and delete it + Instruction *I = OLF->call_init_fini_mapping.find(itr.first)->second; + I->eraseFromParent(); + /// We have changed the IR changed = true; - for (auto itr1 = (itr->second).begin(); itr1 != (itr->second).end(); - itr1++) { - Instruction *I1 = *itr1; + /// loop over the compatible instructions + for (auto itr1:itr.second) { + Instruction *I1 = itr1; I1->eraseFromParent(); - for (auto itr2 = OLF->call_init_fini_mapping.begin(); - itr2 != OLF->call_init_fini_mapping.end(); itr2++) { - if (*itr1 == itr2->first) { - Instruction *I2 = itr2->second; - if (count == 1 || count == 0) - break; - else { - I2->eraseFromParent(); - count--; - } + for (auto itr2: OLF->call_init_fini_mapping) { + /// Delete the respective __kmpc_static_fini call instruction + if (itr1 == itr2.first) { + Instruction *I2 = itr2.second; + /// Do not erase the last __kmpc_static_fini call instruction + if (count == 1 || count == 0) break; + I2->eraseFromParent(); + count--; + } } } } - } - errs() << "[Ending Cleaning]" - << "\n"; return changed; } - void replace_UseValues(Function *F, OMPLoopFusion *OLF) { - std::vector remove; - for (auto itr = OLF->call_map.begin(); itr != OLF->call_map.end(); itr++) { - std::vector vm; - for (Value *arg : (itr->first)->args()) { - vm.push_back(arg); - } - for (auto itr1 = (itr->second).begin(); itr1 != (itr->second).end(); - itr1++) { - std::vector vs; - for (Value *arg : (*itr1)->args()) { - vs.push_back(arg); - } - for (auto vmitr = vm.begin(), vsitr = vs.begin(); - vmitr != vm.end() && vsitr != vs.end(); vmitr++, vsitr++) { - if (isa(*vmitr)) - continue; - for (auto &BB : *F) { + /// The following function replaces the use values + /// of the deleted __kmpc_static_init4 call instructions with the parent + /// call function. The __kmpc_static_init4 is a parent call function + /// if it comes first + +void replace_UseValues(Function &F, OMPLoopFusion *OLF){ + std::vector remove; + for (auto &BB : F) for (auto &II : BB) { Instruction *It = &II; - if (isa(It)) - continue; - for (unsigned int k = 0; k < It->getNumOperands(); k++) { - if (It->getOperand(k) == *vsitr) { - It->setOperand(k, *vmitr); - if (isa(It) && k > 0) { - remove.push_back(It); - } + if (isa(It)) continue; + for (unsigned int k = 0; k < It->getNumOperands(); k++){ + auto temp = OLF->args_map.find(It->getOperand(k)); + if (temp != OLF->args_map.end()){ + It->setOperand(k, temp->second); + if (isa(It) && k > 0) remove.push_back(It); } } } - } - } - } - } - for (auto r = remove.begin(); r != remove.end(); r++) - (*r)->eraseFromParent(); - } - - void printFunction(Function *F) { - errs() << "\nThis is the new IR for the fused stuff\n\n"; - F->print(errs(), nullptr); - } + for (auto r: remove) + r->eraseFromParent(); +} +/// does function printing +// void printFunction(Function &F) { +// F.print(errs(), nullptr); +// } /// Try to delete parallel regions if possible bool deleteParallelRegions() { Index: openmp/runtime/src/kmp_sched.cpp =================================================================== --- openmp/runtime/src/kmp_sched.cpp +++ openmp/runtime/src/kmp_sched.cpp @@ -565,6 +565,7 @@ trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; } KMP_DEBUG_ASSERT(trip_count); + errs() << trip_count <<"[][]\n"; switch (schedule) { case kmp_sch_static: { if (trip_count <= nth) {