Diff 133207

lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp

Show First 20 Lines • Show All 1,184 Lines • ▼ Show 20 Lines	case PPC::CLRLSLDIo: {
TmpInst.addOperand(MCOperand::createImm(N));		TmpInst.addOperand(MCOperand::createImm(N));
TmpInst.addOperand(MCOperand::createImm(B - N));		TmpInst.addOperand(MCOperand::createImm(B - N));
Inst = TmpInst;		Inst = TmpInst;
break;		break;
}		}
case PPC::RLWINMbm:		case PPC::RLWINMbm:
case PPC::RLWINMobm: {		case PPC::RLWINMobm: {
unsigned MB, ME;		unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();		unsigned BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))		if (!isRunOfOnes(BM, MB, ME))
		nemanjaiUnsubmitted Not Done Reply Inline Actions Do we not want to use `uint32_t` instead of `unsigned` to emphasize that this is a 32-bit value? nemanjai: Do we not want to use `uint32_t` instead of `unsigned` to emphasize that this is a 32-bit value?
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions I changed the type of `BM` instead of specifying type in `isRunOfOnes` since BM is used only in this function. I feel this is cleaner than explicitly saying uint32_t for isRunOfOnes. inouehrs: I changed the type of `BM` instead of specifying type in `isRunOfOnes` since BM is used only in…
break;		break;

MCInst TmpInst;		MCInst TmpInst;
TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);		TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));		TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));		TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));		TmpInst.addOperand(Inst.getOperand(2));
TmpInst.addOperand(MCOperand::createImm(MB));		TmpInst.addOperand(MCOperand::createImm(MB));
TmpInst.addOperand(MCOperand::createImm(ME));		TmpInst.addOperand(MCOperand::createImm(ME));
Inst = TmpInst;		Inst = TmpInst;
break;		break;
}		}
case PPC::RLWIMIbm:		case PPC::RLWIMIbm:
case PPC::RLWIMIobm: {		case PPC::RLWIMIobm: {
unsigned MB, ME;		unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();		unsigned BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))		if (!isRunOfOnes(BM, MB, ME))
break;		break;

MCInst TmpInst;		MCInst TmpInst;
TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);		TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
TmpInst.addOperand(Inst.getOperand(0));		TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.		TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
TmpInst.addOperand(Inst.getOperand(1));		TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));		TmpInst.addOperand(Inst.getOperand(2));
TmpInst.addOperand(MCOperand::createImm(MB));		TmpInst.addOperand(MCOperand::createImm(MB));
TmpInst.addOperand(MCOperand::createImm(ME));		TmpInst.addOperand(MCOperand::createImm(ME));
Inst = TmpInst;		Inst = TmpInst;
break;		break;
}		}
case PPC::RLWNMbm:		case PPC::RLWNMbm:
case PPC::RLWNMobm: {		case PPC::RLWNMobm: {
unsigned MB, ME;		unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();		unsigned BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))		if (!isRunOfOnes(BM, MB, ME))
break;		break;

MCInst TmpInst;		MCInst TmpInst;
TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);		TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
TmpInst.addOperand(Inst.getOperand(0));		TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));		TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));		TmpInst.addOperand(Inst.getOperand(2));
▲ Show 20 Lines • Show All 751 Lines • Show Last 20 Lines

lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h

Show First 20 Lines • Show All 57 Lines • ▼ Show 20 Lines	std::unique_ptr<MCObjectWriter> createPPCMachObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit,		bool Is64Bit,
uint32_t CPUType,		uint32_t CPUType,
uint32_t CPUSubtype);		uint32_t CPUSubtype);

/// Returns true iff Val consists of one contiguous run of 1s with any number of		/// Returns true iff Val consists of one contiguous run of 1s with any number of
/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so		/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so
/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not,		/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not,
/// since all 1s are not contiguous.		/// since all 1s are not contiguous.
static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {		/// So far, isRunOfOnes supports only 32-bit and 64-bit unsigned integer types.
		template <typename T>
		static inline bool isRunOfOnes(T Val, unsigned &MB, unsigned &ME) {
		kbartonUnsubmitted Done Reply Inline Actions I don't understand why this has been transformed into a templated function. Is this necessary for this patch, or just some kind of cleanup? kbarton: I don't understand why this has been transformed into a templated function. Is this necessary…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions I need 64-bit version of `isRunOfOnes`. Since 32-bit and 64-bit versions are almost same, I use template to avoid writing almost same function twice. inouehrs: I need 64-bit version of `isRunOfOnes`. Since 32-bit and 64-bit versions are almost same, I use…
		nemanjaiUnsubmitted Not Done Reply Inline Actions I find it rather surprising that an adequate function doesn't exist in either `APInt` or somewhere in `MathExtras.h`. I wonder if adding it there might be a better place. Perhaps check with frequent contributors to those files? nemanjai: I find it rather surprising that an adequate function doesn't exist in either `APInt` or…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions `isRunOfOnes` is a wrapper around isShiftedMask defined in MathExtras.h and just returns the results of countLeadingZeros. I found similar function in ARM, AArch64, NVPTX and AMDGPU, but they all use countTrailingZeros after isShiftedMask. It seems that what we need around isShiftedMask depends on ISA. inouehrs: `isRunOfOnes` is a wrapper around isShiftedMask defined in MathExtras.h and just returns the…
		static_assert(std::numeric_limits<T>::is_integer &&
		!std::numeric_limits<T>::is_signed &&
		(std::numeric_limits<T>::digits == 32 \|\|
		std::numeric_limits<T>::digits == 64),
		"isRunOfOnes supports only 32-bit and 64-bit unsigned integer");

if (!Val)		if (!Val)
return false;		return false;

if (isShiftedMask_32(Val)) {		const bool Is64Bit = (std::numeric_limits<T>::digits == 64);

		bool IsShiftedMask = Is64Bit ? isShiftedMask_64(Val) :
		nemanjaiUnsubmitted Done Reply Inline Actions Nit: spaces between binary operators and operands. nemanjai: Nit: spaces between binary operators and operands.
		kbartonUnsubmitted Done Reply Inline Actions I don't think I've ever seen a ternary used in an if statement like this. If we don't have precedent for this, could you please put spaces around the ? and : I find this difficult to read as is. kbarton: I don't think I've ever seen a ternary used in an if statement like this. If we don't have…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions I hope this version is easier to read. inouehrs: I hope this version is easier to read.
		isShiftedMask_32(Val);
		if (IsShiftedMask) {
// look for the first non-zero bit		// look for the first non-zero bit
MB = countLeadingZeros(Val);		MB = countLeadingZeros(Val);
// look for the first zero bit after the run of ones		// look for the first zero bit after the run of ones
ME = countLeadingZeros((Val - 1) ^ Val);		ME = countLeadingZeros((Val - 1) ^ Val);
return true;		return true;
} else {		} else {
Val = ~Val; // invert mask		Val = ~Val; // invert mask
if (isShiftedMask_32(Val)) {		IsShiftedMask = Is64Bit ? isShiftedMask_64(Val) :
		isShiftedMask_32(Val);
		if (IsShiftedMask) {
		jtonyUnsubmitted Done Reply Inline Actions Missing space before `:`. Actually, we may want to put this short ternary expression in one line. jtony: Missing space before `:`. Actually, we may want to put this short ternary expression in one…
// effectively look for the first zero bit		// effectively look for the first zero bit
ME = countLeadingZeros(Val) - 1;		ME = countLeadingZeros(Val) - 1;
// effectively look for the first one bit after the run of zeros		// effectively look for the first one bit after the run of zeros
MB = countLeadingZeros((Val - 1) ^ Val) + 1;		MB = countLeadingZeros((Val - 1) ^ Val) + 1;
return true;		return true;
}		}
}		}
// no run present		// no run present
Show All 25 Lines

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Show First 20 Lines • Show All 193 Lines • ▼ Show 20 Lines	public:
// Select - Convert the specified operand from a target-independent to a		// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.		// target-specific node if it hasn't already been changed.
void Select(SDNode *N) override;		void Select(SDNode *N) override;

bool tryBitfieldInsert(SDNode *N);		bool tryBitfieldInsert(SDNode *N);
bool tryBitPermutation(SDNode *N);		bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N);		bool tryIntCompareInGPR(SDNode *N);

		/// tryRotateThenMaskInsert - Returns true if N is replaced by
		/// RLDIMI/RLWIMI instruction.
		template <typename T>
		bool tryRotateThenMaskInsert(SDNode *N);

/// SelectCC - Select a comparison of the specified values with the		/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.		/// specified condition code, returning the CR# of the expression.
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,		SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl);		const SDLoc &dl);

/// SelectAddrImm - Returns true if the address N can be represented by		/// SelectAddrImm - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement [r+imm].		/// a base register plus a signed 16-bit displacement [r+imm].
bool SelectAddrImm(SDValue N, SDValue &Disp,		bool SelectAddrImm(SDValue N, SDValue &Disp,
▲ Show 20 Lines • Show All 357 Lines • ▼ Show 20 Lines	bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
if (Mask && !(Mask & Indeterminant)) {		if (Mask && !(Mask & Indeterminant)) {
SH = Shift & 31;		SH = Shift & 31;
// make sure the mask is still a mask (wrap arounds may not be)		// make sure the mask is still a mask (wrap arounds may not be)
return isRunOfOnes(Mask, MB, ME);		return isRunOfOnes(Mask, MB, ME);
}		}
return false;		return false;
}		}

		/// Find a subtree generated for bitfield insert and convert it with
		/// a rotate left then mask insert instruction.
		bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
		// Expected nodes
		// %and1 = and i64 %val1, MASK
		// %and2 = and i64 %val2, ~MASK
		// %N = or i64 %and1, %and2
		if (N->getOpcode() != ISD::OR)
		return false;

		SDValue Op0 = N->getOperand(0);
		SDValue Op1 = N->getOperand(1);
		if (Op0->getOpcode() != ISD::AND \|\| Op1->getOpcode() != ISD::AND)
		return false;

		if (N->getValueType(0) == MVT::i32) {
		unsigned Mask1, Mask2;
		if (isInt32Immediate(Op0->getOperand(1), Mask1) &&
		isInt32Immediate(Op1->getOperand(1), Mask2) &&
		Mask1 == ~Mask2)
		return tryRotateThenMaskInsert<uint32_t>(N);
		}
		if (N->getValueType(0) == MVT::i64) {
		uint64_t Mask1, Mask2;
		if (isInt64Immediate(Op0->getOperand(1), Mask1) &&
		isInt64Immediate(Op1->getOperand(1), Mask2) &&
		Mask1 == ~Mask2)
		return tryRotateThenMaskInsert<uint64_t>(N);
		}
		return false;
		}

/// Turn an or of two masked values into the rotate left word immediate then		/// Turn an or of two masked values into the rotate left word immediate then
/// mask insert (rlwimi) instruction.		/// mask insert (rlwimi) instruction.
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {		template <typename T>
		bool PPCDAGToDAGISel::tryRotateThenMaskInsert(SDNode *N) {
		static_assert(std::numeric_limits<T>::is_integer &&
		!std::numeric_limits<T>::is_signed &&
		(std::numeric_limits<T>::digits == 32 \|\|
		std::numeric_limits<T>::digits == 64),
		"tryRotateThenMaskInsert does not support this type");
		unsigned BitSize = std::numeric_limits<T>::digits;
		kbartonUnsubmitted Done Reply Inline Actions Is it possible to refactor this to separate the SimpleCase from the non-simple case without requiring too much code duplication? This will get rid of the boolean parameter, which makes it harder to follow. kbarton: Is it possible to refactor this to separate the SimpleCase from the non-simple case without…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions I added a new `tryBitfieldInsert` for checking simple bitfield insert cases. The original `tryBitfieldInsert` actually covers wider range than a simple bitfield insert; so I renamed it to `tryRotateThenMaskInsert` inouehrs: I added a new `tryBitfieldInsert` for checking simple bitfield insert cases. The original…
SDValue Op0 = N->getOperand(0);		SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);		SDValue Op1 = N->getOperand(1);
SDLoc dl(N);		SDLoc dl(N);

KnownBits LKnown, RKnown;		KnownBits LKnown, RKnown;
CurDAG->computeKnownBits(Op0, LKnown);		CurDAG->computeKnownBits(Op0, LKnown);
CurDAG->computeKnownBits(Op1, RKnown);		CurDAG->computeKnownBits(Op1, RKnown);

unsigned TargetMask = LKnown.Zero.getZExtValue();		T TargetMask = LKnown.Zero.getZExtValue();
unsigned InsertMask = RKnown.Zero.getZExtValue();		T InsertMask = RKnown.Zero.getZExtValue();

if ((TargetMask \| InsertMask) == 0xFFFFFFFF) {		// If all bits come from two source registers,
		// we can use rldimi/rlwimi instruction.
		if ((TargetMask \| InsertMask) == (T)-1) {
unsigned Op0Opc = Op0.getOpcode();		unsigned Op0Opc = Op0.getOpcode();
unsigned Op1Opc = Op1.getOpcode();		unsigned Op1Opc = Op1.getOpcode();
unsigned Value, SH = 0;		unsigned Value = 0, SH = 0;
TargetMask = ~TargetMask;		TargetMask = ~TargetMask;
InsertMask = ~InsertMask;		InsertMask = ~InsertMask;

// If the LHS has a foldable shift and the RHS does not, then swap it to the		// If the LHS has a foldable shift and the RHS does not, then swap it to the
// RHS so that we can fold the shift into the insert.		// RHS so that we can fold the shift into the insert.
if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {		if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
if (Op0.getOperand(0).getOpcode() == ISD::SHL \|\|		if (Op0.getOperand(0).getOpcode() == ISD::SHL \|\|
Op0.getOperand(0).getOpcode() == ISD::SRL) {		Op0.getOperand(0).getOpcode() == ISD::SRL) {
Show All 13 Lines	if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
}		}
}		}

unsigned MB, ME;		unsigned MB, ME;
if (isRunOfOnes(InsertMask, MB, ME)) {		if (isRunOfOnes(InsertMask, MB, ME)) {
if ((Op1Opc == ISD::SHL \|\| Op1Opc == ISD::SRL) &&		if ((Op1Opc == ISD::SHL \|\| Op1Opc == ISD::SRL) &&
isInt32Immediate(Op1.getOperand(1), Value)) {		isInt32Immediate(Op1.getOperand(1), Value)) {
Op1 = Op1.getOperand(0);		Op1 = Op1.getOperand(0);
SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;		SH = (Op1Opc == ISD::SHL) ? Value : BitSize - Value;
		Op1Opc = Op1.getOpcode();
}		}
if (Op1Opc == ISD::AND) {		if (Op1Opc == ISD::AND) {
// The AND mask might not be a constant, and we need to make sure that		// The AND mask might not be a constant, and we need to make sure that
// if we're going to fold the masking with the insert, all bits not		// if we're going to fold the masking with the insert, all bits not
// know to be zero in the mask are known to be one.		// know to be zero in the mask are known to be one.
KnownBits MKnown;		KnownBits MKnown;
CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);		CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();		bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();

unsigned SHOpc = Op1.getOperand(0).getOpcode();		unsigned SHOpc = Op1.getOperand(0).getOpcode();
if ((SHOpc == ISD::SHL \|\| SHOpc == ISD::SRL) && CanFoldMask &&		if ((SHOpc == ISD::SHL \|\| SHOpc == ISD::SRL) && CanFoldMask &&
isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {		isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
// Note that Value must be in range here (less than 32) because		// Note that Value must be in range here (less than 32) because
// otherwise there would not be any bits set in InsertMask.		// otherwise there would not be any bits set in InsertMask.
Op1 = Op1.getOperand(0).getOperand(0);		Op1 = Op1.getOperand(0).getOperand(0);
SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;		SH = (SHOpc == ISD::SHL) ? Value : BitSize - Value;
		Op1Opc = Op1.getOpcode();
}		}
}		}

SH &= 31;		SH &= (BitSize - 1);

		if (BitSize == 32) {
		// We eliminate AND instructions if they are already folded into rlwimi.
		if (Op1Opc == ISD::AND && isInt32Immediate(Op1.getOperand(1), Value) &&
		(Value << SH) == InsertMask)
		Op1 = Op1.getOperand(0);
		if (Op0Opc == ISD::AND && isInt32Immediate(Op0.getOperand(1), Value) &&
		Value == ~InsertMask)
		Op0 = Op0.getOperand(0);

SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),		SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
getI32Imm(ME, dl) };		getI32Imm(ME, dl) };
ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));		ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
return true;		return true;
}		}
		else {
		// We cannot specify ME for rldimi; ~SH is used instead.
		if (ME == 63 - SH) {
		// We omit AND instructions if they are already folded into rldimi.
		uint64_t Value64 = 0;
		if (Op1Opc == ISD::AND && isInt64Immediate(Op1.getOperand(1), Value64) &&
		(Value64 << SH) == InsertMask)
		Op1 = Op1.getOperand(0);
		if (Op0Opc == ISD::AND && isInt64Immediate(Op0.getOperand(1), Value64) &&
		Value64 == ~InsertMask)
		Op0 = Op0.getOperand(0);

		SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl) };
		ReplaceNode(N, CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops));
		return true;
		}
		}
		}
}		}
return false;		return false;
}		}

// Predict the number of instructions that would be generated by calling		// Predict the number of instructions that would be generated by calling
// selectI64Imm(N).		// selectI64Imm(N).
static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {		static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
		nemanjaiUnsubmitted Done Reply Inline Actions It really seems like this is a bunch of unnecessary code duplication. The only difference in the first 70 lines of this function vs. `tryRotateThenMaskInsert32()` is replacing `32` with `64`. Since you have the bit width available from the parameter, I think you should unify these two functions into one (and if it makes more sense, split out the last few lines into a pair of small functions/lambda). nemanjai: It really seems like this is a bunch of unnecessary code duplication. The only difference in…
// Assume no remaining bits.		// Assume no remaining bits.
unsigned Remainder = 0;		unsigned Remainder = 0;
// Assume no shift required.		// Assume no shift required.
unsigned Shift = 0;		unsigned Shift = 0;

// If it can't be represented as a 32 bit value.		// If it can't be represented as a 32 bit value.
if (!isInt<32>(Imm)) {		if (!isInt<32>(Imm)) {
Shift = countTrailingZeros<uint64_t>(Imm);		Shift = countTrailingZeros<uint64_t>(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;		int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;

// If the shifted value fits 32 bits.		// If the shifted value fits 32 bits.
if (isInt<32>(ImmSh)) {		if (isInt<32>(ImmSh)) {
// Go with the shifted value.		// Go with the shifted value.
Imm = ImmSh;		Imm = ImmSh;
} else {		} else {
// Still stuck with a 64 bit value.		// Still stuck with a 64 bit value.
Remainder = Imm;		Remainder = Imm;
Shift = 32;		Shift = 32;
Imm >>= 32;		Imm >>= 32;
}		}
}		}
		nemanjaiUnsubmitted Done Reply Inline Actions Overwrapping? It seems you're checking to ensure they don't overlap (i.e. no bits in common). nemanjai: Overwrapping? It seems you're checking to ensure they don't overlap (i.e. no bits in common).
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions Fixed typo. Thanks! inouehrs: Fixed typo. Thanks!

// Intermediate operand.		// Intermediate operand.
unsigned Result = 0;		unsigned Result = 0;

		nemanjaiUnsubmitted Done Reply Inline Actions Hard to count all the F's, please use a simpler expression. Maybe `~0ULL` or even `~(TargetMask \| InsertMask) == 0ULL)` if it's all of them (or something along those lines. Also, a comment about why this condition is being checked. nemanjai: Hard to count all the F's, please use a simpler expression. Maybe `~0ULL` or even `~(TargetMask…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions Done. inouehrs: Done.
// Handle first 32 bits.		// Handle first 32 bits.
unsigned Lo = Imm & 0xFFFF;		unsigned Lo = Imm & 0xFFFF;

// Simple value.		// Simple value.
if (isInt<16>(Imm)) {		if (isInt<16>(Imm)) {
// Just the Lo bits.		// Just the Lo bits.
++Result;		++Result;
} else if (Lo) {		} else if (Lo) {
▲ Show 20 Lines • Show All 3,178 Lines • ▼ Show 20 Lines	void PPCDAGToDAGISel::Select(SDNode *N) {

// In case any misguided DAG-level optimizations form an ADD with a		// In case any misguided DAG-level optimizations form an ADD with a
// TargetConstant operand, crash here instead of miscompiling (by selecting		// TargetConstant operand, crash here instead of miscompiling (by selecting
// an r+r add instead of some kind of r+i add).		// an r+r add instead of some kind of r+i add).
if (N->getOpcode() == ISD::ADD &&		if (N->getOpcode() == ISD::ADD &&
N->getOperand(1).getOpcode() == ISD::TargetConstant)		N->getOperand(1).getOpcode() == ISD::TargetConstant)
llvm_unreachable("Invalid ADD with TargetConstant operand");		llvm_unreachable("Invalid ADD with TargetConstant operand");

// Try matching complex bit permutations before doing anything else.		// Find opportunity to use rotate left immediate then mask insert instruction
		// for a simple bitfield insert, i.e. (or (and %a, MASK) (and %b, ~MASK))
		// before tryBitPermutation, which may generate suboptimal machine IR;
		// Leave more complicated cases for tryBitPermutation.
		if (tryBitfieldInsert(N))
		nemanjaiUnsubmitted Done Reply Inline Actions Between this and the early exits in the functions that find Rotate-and-insert opportunities, it seems that the simple case is `(or (or %a, %b) (or %c, %d))` where the two operands of the outer `or` have known-zero bits in complementary locations. If that's the case, please add a comment for this and an explanation why this case is special. Furthermore (as Kit has already alluded to) it is likely that this simple case has simple handling and should get a corresponding simple function rather than passing a `bool` parameter to these functions to tell them where they're called from. nemanjai: Between this and the early exits in the functions that find Rotate-and-insert opportunities, it…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions As Kit and you suggested, I refactored the code to avoid a boolean parameter. inouehrs: As Kit and you suggested, I refactored the code to avoid a boolean parameter.
		return;

		// Try matching complex bit permutations next.
if (tryBitPermutation(N))		if (tryBitPermutation(N))
return;		return;

// Try to emit integer compares as GPR-only sequences (i.e. no use of CR).		// Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
if (tryIntCompareInGPR(N))		if (tryIntCompareInGPR(N))
return;		return;

switch (N->getOpcode()) {		switch (N->getOpcode()) {
▲ Show 20 Lines • Show All 259 Lines • ▼ Show 20 Lines	if (isInt32Immediate(N->getOperand(1), Imm) &&
return;		return;
}		}
}		}

// Other cases are autogenerated.		// Other cases are autogenerated.
break;		break;
}		}
case ISD::OR: {		case ISD::OR: {
if (N->getValueType(0) == MVT::i32)		if ((N->getValueType(0) == MVT::i32 && tryRotateThenMaskInsert<uint32_t>(N)) \|\|
if (tryBitfieldInsert(N))		(N->getValueType(0) == MVT::i64 && tryRotateThenMaskInsert<uint64_t>(N)))
return;		return;
		jtonyUnsubmitted Done Reply Inline Actions Like Nemanja have mentioned above in his comment at line 1190 (lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp). It may be more clear if we use `uint32_t` instead of `unsigned` here since we are using `uint64_t` for the 64 bit. Or we could also use `unsigned long long` instead of `uint64_t` to match the 32 bit type `unsigned`. Personally, I would like we use this type pairs consistently (either `uint64_t`, `uint32_t` or `unsigned`, `unsigned long long` is fine), but it is up to you. jtony: Like Nemanja have mentioned above in his comment at line 1190…

int16_t Imm;		int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&		if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {		isIntS16Immediate(N->getOperand(1), Imm)) {
KnownBits LHSKnown;		KnownBits LHSKnown;
CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);		CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);

// If this is equivalent to an add, then we can fold it with the		// If this is equivalent to an add, then we can fold it with the
▲ Show 20 Lines • Show All 1,828 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/addi-offset-fold.ll

	Show All 24 Lines

	; CHECK-LABEL: @foo			; CHECK-LABEL: @foo
	; FIXME: We don't need to do these stores/loads at all.			; FIXME: We don't need to do these stores/loads at all.
	; CHECK-DAG: std 3, -24(1)			; CHECK-DAG: std 3, -24(1)
	; CHECK-DAG: stb 4, -16(1)			; CHECK-DAG: stb 4, -16(1)
	; CHECK: ori 2, 2, 0			; CHECK: ori 2, 2, 0
	; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1)			; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1)
	; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1)			; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1)
	; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32			; CHECK-DAG: rldimi [[REG3:[0-9]+]], [[REG1]], 32, 24
	; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]]			; CHECK: rldicl 3, [[REG3]], 33, 57
	; CHECK: rldicl 3, [[REG4]], 33, 57
	; CHECK: blr			; CHECK: blr
	}			}

	attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="ppc64le" }			attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="ppc64le" }

test/CodeGen/PowerPC/bitfieldinsert.ll

This file was added.

				; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s \| FileCheck %s
				; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s \| FileCheck %s

				; bitfieldinsert64: Test for rldimi
				; equivalent C code
				; struct s64 {
				; int a:5;
				; int b:16;
				; long c:42;
				; };
				; void bitfieldinsert64(struct s *p, unsigned short v) {
				; p->b = v;
				; }

				%struct.s64 = type { i64 }

				define void @bitfieldinsert64(%struct.s64* nocapture %p, i16 zeroext %v) {
				; CHECK-LABEL: @bitfieldinsert64
				; CHECK: ld [[REG1:[0-9]+]], 0(3)
				; CHECK: rldimi [[REG1]], 4, 5, 43
				; CHECK: std [[REG1]], 0(3)
				; CHECK: blr
				entry:
				%0 = getelementptr inbounds %struct.s64, %struct.s64* %p, i64 0, i32 0
				%1 = zext i16 %v to i64
				%bf.load = load i64, i64* %0, align 8
				%bf.shl = shl nuw nsw i64 %1, 5
				%bf.clear = and i64 %bf.load, -2097121
				%bf.set = or i64 %bf.clear, %bf.shl
				store i64 %bf.set, i64* %0, align 8
				ret void
				}

				; bitfieldinsert32: Test for rlwimi
				; equivalent C code
				; struct s32 {
				; int a:8;
				; int b:16;
				; int c:8;
				; };
				; void bitfieldinsert32(struct s32 *p, unsigned int v) {
				; p->b = v;
				; }

				%struct.s32 = type { i32 }

				define void @bitfieldinsert32(%struct.s32* nocapture %p, i32 zeroext %v) {
				; CHECK-LABEL: @bitfieldinsert32
				; CHECK: lwz [[REG1:[0-9]+]], 0(3)
				; CHECK: rlwimi [[REG1]], 4, 8, 8, 23
				; CHECK: stw [[REG1]], 0(3)
				; CHECK: blr
				entry:
				%0 = getelementptr inbounds %struct.s32, %struct.s32* %p, i64 0, i32 0
				%bf.load = load i32, i32* %0, align 4
				%bf.value = shl i32 %v, 8
				%bf.shl = and i32 %bf.value, 16776960
				%bf.clear = and i32 %bf.load, -16776961
				%bf.set = or i32 %bf.clear, %bf.shl
				store i32 %bf.set, i32* %0, align 4
				ret void
				}

test/CodeGen/PowerPC/ppc64le-aggregates.ll

	Show First 20 Lines • Show All 230 Lines • ▼ Show 20 Lines
	; CHECK-DAG: fmr 10, 2			; CHECK-DAG: fmr 10, 2
	; CHECK-DAG: fmr 11, 3			; CHECK-DAG: fmr 11, 3
	; CHECK-DAG: fmr 12, 4			; CHECK-DAG: fmr 12, 4
	; CHECK-DAG: fmr 13, 5			; CHECK-DAG: fmr 13, 5
	; CHECK-DAG: stfs 5, [[OFF0:[0-9]+]](1)			; CHECK-DAG: stfs 5, [[OFF0:[0-9]+]](1)
	; CHECK-DAG: stfs 6, [[OFF1:[0-9]+]](1)			; CHECK-DAG: stfs 6, [[OFF1:[0-9]+]](1)
	; CHECK-DAG: stfs 7, [[OFF2:[0-9]+]](1)			; CHECK-DAG: stfs 7, [[OFF2:[0-9]+]](1)
	; CHECK-DAG: stfs 8, [[OFF3:[0-9]+]](1)			; CHECK-DAG: stfs 8, [[OFF3:[0-9]+]](1)
	; CHECK-DAG: lwz [[REG0:[0-9]+]], [[OFF0]](1)			; CHECK-DAG: lwz 9, [[OFF0]](1)
	; CHECK-DAG: lwz [[REG1:[0-9]+]], [[OFF1]](1)			; CHECK-DAG: lwz [[REG1:[0-9]+]], [[OFF1]](1)
	; CHECK-DAG: lwz [[REG2:[0-9]+]], [[OFF2]](1)			; CHECK-DAG: lwz 10, [[OFF2]](1)
	; CHECK-DAG: lwz [[REG3:[0-9]+]], [[OFF3]](1)			; CHECK-DAG: lwz [[REG3:[0-9]+]], [[OFF3]](1)
	; CHECK-DAG: sldi [[REG1]], [[REG1]], 32			; CHECK-DAG: rldimi 9, [[REG1]], 32, 0
	; CHECK-DAG: sldi [[REG3]], [[REG3]], 32			; CHECK-DAG: rldimi 10, [[REG3]], 32, 0
	; CHECK-DAG: or 9, [[REG0]], [[REG1]]
	; CHECK-DAG: or 10, [[REG2]], [[REG3]]
	; CHECK: bl test1			; CHECK: bl test1

	declare void @test1([8 x float], [8 x float])			declare void @test1([8 x float], [8 x float])

	define float @callee2([8 x float] %a, [5 x float] %b, [2 x float] %c) {			define float @callee2([8 x float] %a, [5 x float] %b, [2 x float] %c) {
	entry:			entry:
	%c.extract = extractvalue [2 x float] %c, 1			%c.extract = extractvalue [2 x float] %c, 1
	ret float %c.extract			ret float %c.extract
	▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[PPC] exploit rotate-left-then-mask-insert instructions for bitfield insert
AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 133207

lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp

lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

test/CodeGen/PowerPC/addi-offset-fold.ll

test/CodeGen/PowerPC/bitfieldinsert.ll

test/CodeGen/PowerPC/ppc64le-aggregates.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PPC] exploit rotate-left-then-mask-insert instructions for bitfield insertAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 133207

lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp

lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

test/CodeGen/PowerPC/addi-offset-fold.ll

test/CodeGen/PowerPC/bitfieldinsert.ll

test/CodeGen/PowerPC/ppc64le-aggregates.ll

[PPC] exploit rotate-left-then-mask-insert instructions for bitfield insert
AbandonedPublic