Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Standalone View
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Show First 20 Lines • Show All 2,260 Lines • ▼ Show 20 Lines | void CombinerHelper::applyCombineTruncOfExt( | ||||
Builder.setInstrAndDebugLoc(MI); | Builder.setInstrAndDebugLoc(MI); | ||||
if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) | if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) | ||||
Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); | Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); | ||||
else | else | ||||
Builder.buildTrunc(DstReg, SrcReg); | Builder.buildTrunc(DstReg, SrcReg); | ||||
MI.eraseFromParent(); | MI.eraseFromParent(); | ||||
} | } | ||||
bool CombinerHelper::matchCombineTruncOfShl( | static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) { | ||||
MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { | const unsigned ShiftSize = ShiftTy.getScalarSizeInBits(); | ||||
const unsigned TruncSize = TruncTy.getScalarSizeInBits(); | |||||
// ShiftTy > 32 > TruncTy -> 32 | |||||
if (ShiftSize > 32 && TruncSize < 32) | |||||
return ShiftTy.changeElementSize(32); | |||||
// TODO: We could also reduce to 16 bits, but that's more target-dependent. | |||||
// Some targets like it, some don't, some only like it under certain | |||||
arsenm: This is what getPreferredShiftAmountTy is for | |||||
// conditions/processor versions, etc. | |||||
// A TL hook might be needed for this. | |||||
// Don't combine | |||||
return ShiftTy; | |||||
Start with lowercase letter arsenm: Start with lowercase letter | |||||
} | |||||
bool CombinerHelper::matchCombineTruncOfShift( | |||||
MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) { | |||||
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); | assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); | ||||
Register DstReg = MI.getOperand(0).getReg(); | Register DstReg = MI.getOperand(0).getReg(); | ||||
Register SrcReg = MI.getOperand(1).getReg(); | Register SrcReg = MI.getOperand(1).getReg(); | ||||
if (!MRI.hasOneNonDBGUse(SrcReg)) | |||||
This usage of getPreferredShiftAmountTy doesn't make sense, but you need something to pick an intermediate type here. Without adding anything, I guess you could bisect types until you found a legal shift? Creating the minimum required shift also "should" work out, but may create more legalizer and optimizer work. It would be nice if we could directly inspect the list of legal shift types, but the API doesn't have that. Does just reusing the truncating type work as well? arsenm: This usage of getPreferredShiftAmountTy doesn't make sense, but you need something to pick an… | |||||
Reusing the truncating type doesn't work as well, and trying to guess a legal one doesn't either for AArch64 because 16 bits shifts seem legal there, but are suboptimal compared to 64 (word size) shifts. My first version actually did a simplified bisecting to find a good type, but AArch64 suffered and that's why I didn't do it. I think a new TLI hook would be best so targets can just choose whatever they prefer. I'll post a version with that and we can discuss there. Pierre-vh: Reusing the truncating type doesn't work as well, and trying to guess a legal one doesn't… | |||||
Nevermind, I found that AArch64 "suffers" because the combine added in D109419 gets confused by the added trunc on the right shift operands. I'm looking into fixing it. Pierre-vh: Nevermind, I found that AArch64 "suffers" because the combine added in D109419 gets confused by… | |||||
return false; | |||||
LLT SrcTy = MRI.getType(SrcReg); | |||||
LLT DstTy = MRI.getType(DstReg); | LLT DstTy = MRI.getType(DstReg); | ||||
Register ShiftSrc; | |||||
Register ShiftAmt; | |||||
if (MRI.hasOneNonDBGUse(SrcReg) && | MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI); | ||||
mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && | const auto &TL = getTargetLowering(); | ||||
isLegalOrBeforeLegalizer( | |||||
{TargetOpcode::G_SHL, | LLT NewShiftTy; | ||||
{DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { | switch (SrcMI->getOpcode()) { | ||||
KnownBits Known = KB->getKnownBits(ShiftAmt); | default: | ||||
unsigned Size = DstTy.getSizeInBits(); | return false; | ||||
if (Known.countMaxActiveBits() <= Log2_32(Size)) { | case TargetOpcode::G_SHL: { | ||||
MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); | NewShiftTy = DstTy; | ||||
return true; | |||||
} | // Make sure new shift amount is legal. | ||||
KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg()); | |||||
if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits())) | |||||
return false; | |||||
break; | |||||
} | } | ||||
case TargetOpcode::G_LSHR: | |||||
case TargetOpcode::G_ASHR: { | |||||
// For right shifts, we conservatively do not do the transform if the TRUNC | |||||
// has any STORE users. The reason is that if we change the type of the | |||||
// shift, we may break the truncstore combine. | |||||
// | |||||
// TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)). | |||||
Do this first then? arsenm: Do this first then? | |||||
for (auto &User : MRI.use_instructions(DstReg)) | |||||
if (User.getOpcode() == TargetOpcode::G_STORE) | |||||
return false; | |||||
I don't understand special casing this, I'd rather just ignore it arsenm: I don't understand special casing this, I'd rather just ignore it | |||||
I spent a while trying to get the truncstore combine to work properly in such cases but it's really not easy. The matching logic is a bit complex. There's also a comment in that combine's matcher indicating it may be moved to a separate pass in the future to improve its matching logic, so I think it's better to just leave this special case in for now since:
The detailed reason is that the truncstore combine calculates the number of stores it should be merging together based on the type of the source it matches.
TL;DR: I would rather leave the special case in for now as removing isn't worth the effort IMO (and it doesn't seem to improve codegen quality significantly either) Pierre-vh: I spent a while trying to get the truncstore combine to work properly in such cases but it's… | |||||
NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy); | |||||
if (NewShiftTy == SrcTy) | |||||
return false; | |||||
// Make sure we won't lose information by truncating the high bits. | |||||
KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg()); | |||||
if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() - | |||||
DstTy.getScalarSizeInBits())) | |||||
return false; | return false; | ||||
break; | |||||
} | |||||
} | } | ||||
void CombinerHelper::applyCombineTruncOfShl( | if (!isLegalOrBeforeLegalizer( | ||||
MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { | {SrcMI->getOpcode(), | ||||
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); | {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}})) | ||||
Register DstReg = MI.getOperand(0).getReg(); | return false; | ||||
Register SrcReg = MI.getOperand(1).getReg(); | |||||
LLT DstTy = MRI.getType(DstReg); | |||||
MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); | |||||
Register ShiftSrc = MatchInfo.first; | MatchInfo = std::make_pair(SrcMI, NewShiftTy); | ||||
Register ShiftAmt = MatchInfo.second; | return true; | ||||
} | |||||
void CombinerHelper::applyCombineTruncOfShift( | |||||
MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) { | |||||
Builder.setInstrAndDebugLoc(MI); | Builder.setInstrAndDebugLoc(MI); | ||||
auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); | |||||
Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); | MachineInstr *ShiftMI = MatchInfo.first; | ||||
MI.eraseFromParent(); | LLT NewShiftTy = MatchInfo.second; | ||||
Register Dst = MI.getOperand(0).getReg(); | |||||
LLT DstTy = MRI.getType(Dst); | |||||
Register ShiftAmt = ShiftMI->getOperand(2).getReg(); | |||||
Register ShiftSrc = ShiftMI->getOperand(1).getReg(); | |||||
ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0); | |||||
Register NewShift = | |||||
Builder | |||||
.buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt}) | |||||
.getReg(0); | |||||
if (NewShiftTy == DstTy) | |||||
replaceRegWith(MRI, Dst, NewShift); | |||||
else | |||||
Builder.buildTrunc(Dst, NewShift); | |||||
eraseInst(MI); | |||||
} | } | ||||
bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { | bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { | ||||
return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { | return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { | ||||
return MO.isReg() && | return MO.isReg() && | ||||
getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); | getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); | ||||
}); | }); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 3,820 Lines • Show Last 20 Lines |
This is what getPreferredShiftAmountTy is for