Changeset View
Changeset View
Standalone View
Standalone View
lib/Transforms/Utils/LoopUtils.cpp
Show All 17 Lines | |||||
#include "llvm/Analysis/GlobalsModRef.h" | #include "llvm/Analysis/GlobalsModRef.h" | ||||
#include "llvm/Analysis/LoopInfo.h" | #include "llvm/Analysis/LoopInfo.h" | ||||
#include "llvm/Analysis/LoopPass.h" | #include "llvm/Analysis/LoopPass.h" | ||||
#include "llvm/Analysis/ScalarEvolution.h" | #include "llvm/Analysis/ScalarEvolution.h" | ||||
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" | ||||
#include "llvm/Analysis/ScalarEvolutionExpander.h" | #include "llvm/Analysis/ScalarEvolutionExpander.h" | ||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h" | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||
#include "llvm/Analysis/TargetTransformInfo.h" | #include "llvm/Analysis/TargetTransformInfo.h" | ||||
#include "llvm/Analysis/ValueTracking.h" | |||||
#include "llvm/IR/Dominators.h" | #include "llvm/IR/Dominators.h" | ||||
#include "llvm/IR/Instructions.h" | #include "llvm/IR/Instructions.h" | ||||
#include "llvm/IR/Module.h" | #include "llvm/IR/Module.h" | ||||
#include "llvm/IR/PatternMatch.h" | #include "llvm/IR/PatternMatch.h" | ||||
#include "llvm/IR/ValueHandle.h" | #include "llvm/IR/ValueHandle.h" | ||||
#include "llvm/Pass.h" | #include "llvm/Pass.h" | ||||
#include "llvm/Support/Debug.h" | #include "llvm/Support/Debug.h" | ||||
#include "llvm/Support/KnownBits.h" | |||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h" | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||
using namespace llvm; | using namespace llvm; | ||||
using namespace llvm::PatternMatch; | using namespace llvm::PatternMatch; | ||||
#define DEBUG_TYPE "loop-utils" | #define DEBUG_TYPE "loop-utils" | ||||
bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, | bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, | ||||
Show All 31 Lines | bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { | ||||
case RK_IntegerMult: | case RK_IntegerMult: | ||||
case RK_FloatAdd: | case RK_FloatAdd: | ||||
case RK_FloatMult: | case RK_FloatMult: | ||||
return true; | return true; | ||||
} | } | ||||
return false; | return false; | ||||
} | } | ||||
Instruction * | /// Determines if Phi may have been type-promoted. If Phi has a single user | ||||
RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, | /// that ANDs the Phi with a type mask, return the user. RT is updated to | ||||
/// account for the narrower bit width represented by the mask, and the AND | |||||
/// instruction is added to CI. | |||||
static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, | |||||
SmallPtrSetImpl<Instruction *> &Visited, | SmallPtrSetImpl<Instruction *> &Visited, | ||||
SmallPtrSetImpl<Instruction *> &CI) { | SmallPtrSetImpl<Instruction *> &CI) { | ||||
if (!Phi->hasOneUse()) | if (!Phi->hasOneUse()) | ||||
return Phi; | return Phi; | ||||
const APInt *M = nullptr; | const APInt *M = nullptr; | ||||
Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); | Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); | ||||
// Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT | // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT | ||||
// with a new integer type of the corresponding bit width. | // with a new integer type of the corresponding bit width. | ||||
if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { | if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { | ||||
int32_t Bits = (*M + 1).exactLogBase2(); | int32_t Bits = (*M + 1).exactLogBase2(); | ||||
if (Bits > 0) { | if (Bits > 0) { | ||||
RT = IntegerType::get(Phi->getContext(), Bits); | RT = IntegerType::get(Phi->getContext(), Bits); | ||||
Visited.insert(Phi); | Visited.insert(Phi); | ||||
CI.insert(J); | CI.insert(J); | ||||
return J; | return J; | ||||
} | } | ||||
} | } | ||||
return Phi; | return Phi; | ||||
} | } | ||||
bool RecurrenceDescriptor::getSourceExtensionKind( | /// Compute the minimal bit width needed to represent a reduction whose exit | ||||
Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, | /// instruction is given by Exit. | ||||
SmallPtrSetImpl<Instruction *> &Visited, | static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, | ||||
SmallPtrSetImpl<Instruction *> &CI) { | DemandedBits *DB, | ||||
AssumptionCache *AC, | |||||
DominatorTree *DT) { | |||||
bool IsSigned = false; | |||||
const DataLayout &DL = Exit->getModule()->getDataLayout(); | |||||
uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); | |||||
if (DB) { | |||||
// Use the demanded bits analysis to determine the bits that are live out | |||||
// of the exit instruction, rounding up to the nearest power of two. If the | |||||
// use of demanded bits results in a smaller bit width, we know the value | |||||
// must be positive (i.e., IsSigned = false), because if this were not the | |||||
// case, the sign bit would have been demanded. | |||||
auto Mask = DB->getDemandedBits(Exit); | |||||
MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); | |||||
} | |||||
if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { | |||||
// If demanded bits wasn't able to limit the bit width, we can try to use | |||||
// value tracking instead. This can be the case, for example, if the value | |||||
// may be negative. | |||||
auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); | |||||
auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); | |||||
MaxBitWidth = NumTypeBits - NumSignBits; | |||||
KnownBits Bits = computeKnownBits(Exit, DL); | |||||
if (!Bits.isNonNegative()) { | |||||
// If the value is not known to be non-negative, we set IsSigned to true, | |||||
// meaning that we will use sext instructions instead of zext | |||||
// instructions to restore the original type. | |||||
IsSigned = true; | |||||
if (!Bits.isNegative()) | |||||
// If the value is not known to be negative, we don't known what the | |||||
// upper bit is, and therefore, we don't know what kind of extend we | |||||
// will need. In this case, just increase the bit width by one bit and | |||||
// use sext. | |||||
++MaxBitWidth; | |||||
} | |||||
} | |||||
if (!isPowerOf2_64(MaxBitWidth)) | |||||
MaxBitWidth = NextPowerOf2(MaxBitWidth); | |||||
return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), | |||||
IsSigned); | |||||
} | |||||
/// Collect cast instructions that can be ignored in the vectorizer's cost | |||||
/// model, given a reduction exit value and the minimal type in which the | |||||
/// reduction can be represented. | |||||
static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, | |||||
Type *RecurrenceType, | |||||
SmallPtrSetImpl<Instruction *> &Casts) { | |||||
SmallVector<Instruction *, 8> Worklist; | SmallVector<Instruction *, 8> Worklist; | ||||
bool FoundOneOperand = false; | SmallPtrSet<Instruction *, 8> Visited; | ||||
unsigned DstSize = RT->getPrimitiveSizeInBits(); | |||||
Worklist.push_back(Exit); | Worklist.push_back(Exit); | ||||
// Traverse the instructions in the reduction expression, beginning with the | |||||
// exit value. | |||||
while (!Worklist.empty()) { | while (!Worklist.empty()) { | ||||
Instruction *I = Worklist.pop_back_val(); | Instruction *Val = Worklist.pop_back_val(); | ||||
dcaballe: (I don't know DB/ValueTracking in depth so maybe I'm missing something.)
If I understand… | |||||
Demanded bits and value tracking will report different bit widths, in general. My thinking was that if demanded bits is able to limit the bit width at all, we use that width. Otherwise, we can try to do something with value tracking, which I think can be a more expensive analysis. So I think what we should do there is move the isPowerOf2_64 round-ups to the very end. The MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) condition will then check if demanded bits was able to tell us anything before we do the rounding. mssimpso: Demanded bits and value tracking will report different bit widths, in general. My thinking was… | |||||
for (Use &U : I->operands()) { | Visited.insert(Val); | ||||
if (auto *Cast = dyn_cast<CastInst>(Val)) | |||||
// Terminate the traversal if the operand is not an instruction, or we | if (Cast->getSrcTy() == RecurrenceType) { | ||||
// reach the starting value. | // If the source type of a cast instruction is equal to the recurrence | ||||
Instruction *J = dyn_cast<Instruction>(U.get()); | // type, it will be eliminated, and should be ignored in the vectorizer | ||||
if (!J || J == Start) | // cost model. | ||||
continue; | Casts.insert(Cast); | ||||
// Otherwise, investigate the operation if it is also in the expression. | |||||
if (Visited.count(J)) { | |||||
Worklist.push_back(J); | |||||
continue; | continue; | ||||
} | } | ||||
// If the operand is not in Visited, it is not a reduction operation, but | // Add all operands to the work list if they are loop-varying values that | ||||
// it does feed into one. Make sure it is either a single-use sign- or | // we haven't yet visited. | ||||
// zero-extend instruction. | for (Value *O : cast<User>(Val)->operands()) | ||||
CastInst *Cast = dyn_cast<CastInst>(J); | if (auto *I = dyn_cast<Instruction>(O)) | ||||
bool IsSExtInst = isa<SExtInst>(J); | if (TheLoop->contains(I) && !Visited.count(I)) | ||||
if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) | Worklist.push_back(I); | ||||
return false; | |||||
// Ensure the source type of the extend is no larger than the reduction | |||||
// type. It is not necessary for the types to be identical. | |||||
unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); | |||||
if (SrcSize > DstSize) | |||||
return false; | |||||
// Furthermore, ensure that all such extends are of the same kind. | |||||
if (FoundOneOperand) { | |||||
if (IsSigned != IsSExtInst) | |||||
return false; | |||||
} else { | |||||
FoundOneOperand = true; | |||||
IsSigned = IsSExtInst; | |||||
} | |||||
// Lastly, if the source type of the extend matches the reduction type, | |||||
// add the extend to CI so that we can avoid accounting for it in the | |||||
// cost model. | |||||
if (SrcSize == DstSize) | |||||
CI.insert(Cast); | |||||
} | } | ||||
} | } | ||||
return true; | |||||
} | |||||
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, | bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, | ||||
Loop *TheLoop, bool HasFunNoNaNAttr, | Loop *TheLoop, bool HasFunNoNaNAttr, | ||||
RecurrenceDescriptor &RedDes) { | RecurrenceDescriptor &RedDes, | ||||
DemandedBits *DB, | |||||
AssumptionCache *AC, | |||||
DominatorTree *DT) { | |||||
if (Phi->getNumIncomingValues() != 2) | if (Phi->getNumIncomingValues() != 2) | ||||
return false; | return false; | ||||
// Reduction variables are only found in the loop header block. | // Reduction variables are only found in the loop header block. | ||||
if (Phi->getParent() != TheLoop->getHeader()) | if (Phi->getParent() != TheLoop->getHeader()) | ||||
return false; | return false; | ||||
// Obtain the reduction start value from the value that comes from the loop | // Obtain the reduction start value from the value that comes from the loop | ||||
▲ Show 20 Lines • Show All 172 Lines • ▼ Show 20 Lines | bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, | ||||
// pattern or more than just a select and cmp. | // pattern or more than just a select and cmp. | ||||
if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && | if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && | ||||
NumCmpSelectPatternInst != 2) | NumCmpSelectPatternInst != 2) | ||||
return false; | return false; | ||||
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) | if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) | ||||
return false; | return false; | ||||
// If we think Phi may have been type-promoted, we also need to ensure that | if (Start != Phi) { | ||||
// all source operands of the reduction are either SExtInsts or ZEstInsts. If | // If the starting value is not the same as the phi node, we speculatively | ||||
// so, we will be able to evaluate the reduction in the narrower bit width. | // looked through an 'and' instruction when evaluating a potential | ||||
if (Start != Phi) | // arithmetic reduction to determine if it may have been type-promoted. | ||||
may been -> may have been? dcaballe: may been -> may have been? | |||||
Nice catch! mssimpso: Nice catch! | |||||
if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, | // | ||||
IsSigned, VisitedInsts, CastInsts)) | // We now compute the minimal bit width that is required to represent the | ||||
// reduction. If this is the same width that was indicated by the 'and', we | |||||
// can represent the reduction in the smaller type. The 'and' instruction | |||||
// will be eliminated since it will essentially be a cast instruction that | |||||
// can be ignore in the cost model. If we compute a different type than we | |||||
// did when evaluating the 'and', the 'and' will not be eliminated, and we | |||||
// will end up with different kinds of operations in the recurrence | |||||
// expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is | |||||
// the case. | |||||
// | |||||
// The vectorizer relies on InstCombine to perform the actual | |||||
// type-shrinking. It does this by inserting instructions to truncate the | |||||
// exit value of the reduction to the width indicated by RecurrenceType and | |||||
// then extend this value back to the original width. If IsSigned is false, | |||||
// a 'zext' instruction will be generated; otherwise, a 'sext' will be | |||||
// used. | |||||
// | |||||
// TODO: We should not rely on InstCombine to rewrite the reduction in the | |||||
// smaller type. We should just generate a correctly typed expression | |||||
// to begin with. | |||||
Type *ComputedType; | |||||
std::tie(ComputedType, IsSigned) = | |||||
computeRecurrenceType(ExitInstruction, DB, AC, DT); | |||||
if (ComputedType != RecurrenceType) | |||||
return false; | return false; | ||||
// The recurrence expression will be represented in a narrower type. If | |||||
// there are any cast instructions that will be unnecessary, collect them | |||||
// in CastInsts. Note that the 'and' instruction was already included in | |||||
// this list. | |||||
// | |||||
// TODO: A better way to represent this may be to tag in some way all the | |||||
// instructions that are a part of the reduction. The vectorizer cost | |||||
// model could then apply the recurrence type to these instructions, | |||||
// without needing a white list of instructions to ignore. | |||||
collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); | |||||
} | |||||
// We found a reduction var if we have reached the original phi node and we | // We found a reduction var if we have reached the original phi node and we | ||||
// only have a single instruction with out-of-loop users. | // only have a single instruction with out-of-loop users. | ||||
// The ExitInstruction(Instruction which is allowed to have out-of-loop users) | // The ExitInstruction(Instruction which is allowed to have out-of-loop users) | ||||
// is saved as part of the RecurrenceDescriptor. | // is saved as part of the RecurrenceDescriptor. | ||||
// Save the description of this reduction variable. | // Save the description of this reduction variable. | ||||
RecurrenceDescriptor RD( | RecurrenceDescriptor RD( | ||||
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines | if (Insts.count(dyn_cast<Instruction>(*Use))) | ||||
++NumUses; | ++NumUses; | ||||
if (NumUses > 1) | if (NumUses > 1) | ||||
return true; | return true; | ||||
} | } | ||||
return false; | return false; | ||||
} | } | ||||
bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, | bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, | ||||
RecurrenceDescriptor &RedDes) { | RecurrenceDescriptor &RedDes, | ||||
DemandedBits *DB, AssumptionCache *AC, | |||||
DominatorTree *DT) { | |||||
BasicBlock *Header = TheLoop->getHeader(); | BasicBlock *Header = TheLoop->getHeader(); | ||||
Function &F = *Header->getParent(); | Function &F = *Header->getParent(); | ||||
bool HasFunNoNaNAttr = | bool HasFunNoNaNAttr = | ||||
F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; | F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; | ||||
if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { | if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, | ||||
AC, DT)) { | |||||
DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { | if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, | ||||
AC, DT)) { | |||||
DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { | if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, | ||||
AC, DT)) { | |||||
DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { | if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, | ||||
AC, DT)) { | |||||
DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { | if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, | ||||
AC, DT)) { | |||||
DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, | if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, | ||||
RedDes)) { | DB, AC, DT)) { | ||||
DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { | if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, | ||||
AC, DT)) { | |||||
DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { | if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, | ||||
AC, DT)) { | |||||
DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { | if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, | ||||
AC, DT)) { | |||||
DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); | DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); | ||||
return true; | return true; | ||||
} | } | ||||
// Not a reduction of known type. | // Not a reduction of known type. | ||||
return false; | return false; | ||||
} | } | ||||
bool RecurrenceDescriptor::isFirstOrderRecurrence( | bool RecurrenceDescriptor::isFirstOrderRecurrence( | ||||
▲ Show 20 Lines • Show All 1,143 Lines • Show Last 20 Lines |
(I don't know DB/ValueTracking in depth so maybe I'm missing something.)
If I understand correctly, we would try with value tracking in the following scenario:
But we wouldn't in the following one:
Is this the expected behavior? In other words, if DB returns a width narrower than the original one and it's later rounded up to the original width (first scenario), could value tracking return an even narrower width?
If so, shouldn't we always try with value tracking, even when MaxBitWidth != DL.getTypeSizeInBits?
Otherwise, shouldn't we skip value tracking for those cases (first scenario)? We could invoke isPowerOf2_64 only once at the end of the function.