diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5951,8 +5951,9 @@ IntervalMap EndPoint; // Saves the list of instruction indices that are used in the loop. SmallPtrSet Ends; - // Saves the list of values that are used in the loop but are - // defined outside the loop, such as arguments and constants. + // Saves the list of values that are used in the loop but are defined outside + // the loop (not including non-instruction values such as arguments and + // constants). SmallPtrSet LoopInvariants; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { @@ -5964,6 +5965,9 @@ auto *Instr = dyn_cast(U); // Ignore non-instruction values such as arguments, constants, etc. + // FIXME: Might need some motivation why these values are ignored. If + // for example an argument is used inside the loop it will increase the + // register pressure (so shouldn't we add it to LoopInvariants). if (!Instr) continue; @@ -6019,14 +6023,19 @@ // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { - // Count the number of live intervals. + // Count the number of registers used, per register class, given all open + // intervals. + // Note that elements in this SmallMapVector will be default constructed + // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if + // there is no previous entry for ClassID. SmallMapVector RegUsage; if (VFs[j].isScalar()) { for (auto *Inst : OpenIntervals) { - unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); - // If RegUsage[ClassID] doesn't exist, it will be default - // constructed as 0 before the addition + unsigned ClassID = + TTI.getRegisterClassForType(false, Inst->getType()); + // FIXME: The target might use more than one register for the type + // even in the scalar case. RegUsage[ClassID] += 1; } } else { @@ -6036,14 +6045,14 @@ if (VecValuesToIgnore.count(Inst)) continue; if (isScalarAfterVectorization(Inst, VFs[j])) { - unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); - // If RegUsage[ClassID] doesn't exist, it will be default - // constructed as 0 before the addition + unsigned ClassID = + TTI.getRegisterClassForType(false, Inst->getType()); + // FIXME: The target might use more than one register for the type + // even in the scalar case. RegUsage[ClassID] += 1; } else { - unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType()); - // If RegUsage[ClassID] doesn't exist, it will be default - // constructed as 0 before the addition + unsigned ClassID = + TTI.getRegisterClassForType(true, Inst->getType()); RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]); } } @@ -6063,17 +6072,19 @@ } for (unsigned i = 0, e = VFs.size(); i < e; ++i) { + // Note that elements in this SmallMapVector will be default constructed + // as 0. So we can use "Invariant[ClassID] += n" in the code below even if + // there is no previous entry for ClassID. SmallMapVector Invariant; for (auto *Inst : LoopInvariants) { + // FIXME: The target might use more than one register for the type + // even in the scalar case. unsigned Usage = VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]); unsigned ClassID = TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType()); - if (Invariant.find(ClassID) == Invariant.end()) - Invariant[ClassID] = Usage; - else - Invariant[ClassID] += Usage; + Invariant[ClassID] += Usage; } LLVM_DEBUG({