Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 10,180 Lines • ▼ Show 20 Lines | if (Subtarget.isISA3_0() && (V1->hasOneUse() || V2->hasOneUse())) { | ||||
// if V2 is dead, then we swap V1 and V2 so we can | // if V2 is dead, then we swap V1 and V2 so we can | ||||
// use V2 as the destination instead. | // use V2 as the destination instead. | ||||
if (!V1->hasOneUse() && V2->hasOneUse()) { | if (!V1->hasOneUse() && V2->hasOneUse()) { | ||||
std::swap(V1, V2); | std::swap(V1, V2); | ||||
NeedSwap = !NeedSwap; | NeedSwap = !NeedSwap; | ||||
} | } | ||||
} | } | ||||
bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD; | |||||
bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD; | |||||
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except | // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except | ||||
// that it is in input element units, not in bytes. Convert now. | // that it is in input element units, not in bytes. Convert now. | ||||
// For little endian, the order of the input vectors is reversed, and | // For little endian, the order of the input vectors is reversed, and | ||||
// the permutation mask is complemented with respect to 31. This is | // the permutation mask is complemented with respect to 31. This is | ||||
// necessary to produce proper semantics with the big-endian-based vperm | // necessary to produce proper semantics with the big-endian-based vperm | ||||
// instruction. | // instruction. | ||||
EVT EltVT = V1.getValueType().getVectorElementType(); | EVT EltVT = V1.getValueType().getVectorElementType(); | ||||
unsigned BytesPerElement = EltVT.getSizeInBits() / 8; | unsigned BytesPerElement = EltVT.getSizeInBits() / 8; | ||||
bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD; | |||||
bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD; | |||||
/* | /* | ||||
Vectors will be appended like so: [ V1 | v2 ] | Vectors will be appended like so: [ V1 | v2 ] | ||||
XXSWAPD on V1: | XXSWAPD on V1: | ||||
stefanp: I don't think you need to bother with this extra ternary operator.
Everywhere you use… | |||||
[ A | B | C | D ] -> [ C | D | A | B ] | [ A | B | C | D ] -> [ C | D | A | B ] | ||||
0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15 | 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15 | ||||
i.e. index of A, B += 8, and index of C, D -= 8. | i.e. index of A, B += 8, and index of C, D -= 8. | ||||
XXSWAPD on V2: | XXSWAPD on V2: | ||||
[ E | F | G | H ] -> [ G | H | E | F ] | [ E | F | G | H ] -> [ G | H | E | F ] | ||||
16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31 | 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31 | ||||
i.e. index of E, F += 8, index of G, H -= 8 | i.e. index of E, F += 8, index of G, H -= 8 | ||||
Swap V1 and V2: | Swap V1 and V2: | ||||
[ V1 | V2 ] -> [ V2 | V1 ] | [ V1 | V2 ] -> [ V2 | V1 ] | ||||
0-15 16-31 0-15 16-31 | 0-15 16-31 0-15 16-31 | ||||
i.e. index of V1 += 16, index of V2 -= 16 | i.e. index of V1 += 16, index of V2 -= 16 | ||||
*/ | */ | ||||
SmallVector<SDValue, 16> ResultMask; | SmallVector<SDValue, 16> ResultMask; | ||||
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { | ||||
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; | unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; | ||||
if (Opcode == PPCISD::XXPERM) { | |||||
if (V1HasXXSWAPD) { | if (V1HasXXSWAPD) { | ||||
if (SrcElt < 8) | if (SrcElt < 8) | ||||
SrcElt += 8; | SrcElt += 8; | ||||
else if (SrcElt < 16) | else if (SrcElt < 16) | ||||
SrcElt -= 8; | SrcElt -= 8; | ||||
} | } | ||||
if (V2HasXXSWAPD) { | if (V2HasXXSWAPD) { | ||||
if (SrcElt > 23) | if (SrcElt > 23) | ||||
SrcElt -= 8; | SrcElt -= 8; | ||||
else if (SrcElt > 15) | else if (SrcElt > 15) | ||||
SrcElt += 8; | SrcElt += 8; | ||||
} | } | ||||
if (NeedSwap) { | if (NeedSwap) { | ||||
if (SrcElt < 16) | if (SrcElt < 16) | ||||
SrcElt += 16; | SrcElt += 16; | ||||
else | else | ||||
SrcElt -= 16; | SrcElt -= 16; | ||||
} | } | ||||
} | |||||
for (unsigned j = 0; j != BytesPerElement; ++j) | for (unsigned j = 0; j != BytesPerElement; ++j) | ||||
if (isLittleEndian) | if (isLittleEndian) | ||||
ResultMask.push_back( | ResultMask.push_back( | ||||
DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32)); | DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32)); | ||||
else | else | ||||
ResultMask.push_back( | ResultMask.push_back( | ||||
DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32)); | DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32)); | ||||
} | } | ||||
if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) { | |||||
if (V1HasXXSWAPD) { | if (V1HasXXSWAPD) { | ||||
dl = SDLoc(V1->getOperand(0)); | dl = SDLoc(V1->getOperand(0)); | ||||
V1 = V1->getOperand(0)->getOperand(1); | V1 = V1->getOperand(0)->getOperand(1); | ||||
} | } | ||||
if (V2HasXXSWAPD) { | if (V2HasXXSWAPD) { | ||||
dl = SDLoc(V2->getOperand(0)); | dl = SDLoc(V2->getOperand(0)); | ||||
V2 = V2->getOperand(0)->getOperand(1); | V2 = V2->getOperand(0)->getOperand(1); | ||||
} | } | ||||
if (V1HasXXSWAPD || V2HasXXSWAPD || Opcode == PPCISD::XXPERM) { | |||||
if (isPPC64 && ValType != MVT::v2f64) | if (isPPC64 && ValType != MVT::v2f64) | ||||
V1 = DAG.getBitcast(MVT::v2f64, V1); | V1 = DAG.getBitcast(MVT::v2f64, V1); | ||||
if (isPPC64 && V2.getValueType() != MVT::v2f64) | if (isPPC64 && V2.getValueType() != MVT::v2f64) | ||||
V2 = DAG.getBitcast(MVT::v2f64, V2); | V2 = DAG.getBitcast(MVT::v2f64, V2); | ||||
} | } | ||||
Not Done ReplyInline ActionsFor this section how about: if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) { /// Same code as above but you don't need to check that (V1HasXXSWAPD || V2HasXXSWAPD) } You don't need to go into the if statement at all if there isn't at least one XXSWAPD. stefanp: For this section how about:
```
if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD))… | |||||
ShufflesHandledWithVPERM++; | ShufflesHandledWithVPERM++; | ||||
SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); | SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); | ||||
LLVM_DEBUG({ | LLVM_DEBUG({ | ||||
Not Done ReplyInline ActionsIs it possible to have a swap on both sides? stefanp: Is it possible to have a swap on both sides? | |||||
I have not see such a case. maryammo: I have not see such a case.
btw, this patch is to address the vperm issue, so we can look at… | |||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); | ||||
if (Opcode == PPCISD::XXPERM) { | if (Opcode == PPCISD::XXPERM) { | ||||
dbgs() << "Emitting a XXPERM for the following shuffle:\n"; | dbgs() << "Emitting a XXPERM for the following shuffle:\n"; | ||||
} else { | } else { | ||||
dbgs() << "Emitting a VPERM for the following shuffle:\n"; | dbgs() << "Emitting a VPERM for the following shuffle:\n"; | ||||
} | } | ||||
SVOp->dump(); | SVOp->dump(); | ||||
dbgs() << "With the following permute control vector:\n"; | dbgs() << "With the following permute control vector:\n"; | ||||
▲ Show 20 Lines • Show All 8,136 Lines • Show Last 20 Lines |
I don't think you need to bother with this extra ternary operator.
Everywhere you use V1HasXXSWAPD or V2HasXXSWAPD seems to already be guarded by Opcode == PPCISD::XXPERM so it doesn't matter what value those two bool values have in cases when you are not using the XXPERM oeprand.