Diff 121270

llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,949 Lines • ▼ Show 20 Lines	static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,

// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))		// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getNode()->hasOneUse())		if (N0.getNode()->hasOneUse())
if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))		if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
return Result;		return Result;
return SDValue();		return SDValue();
}		}

		static SDValue PerformSHLSimplify(SDNode *N,
		TargetLowering::DAGCombinerInfo &DCI,
		const ARMSubtarget *ST) {
		// Allow the generic combiner to identify potential bswaps.
		if (DCI.isBeforeLegalize())
		return SDValue();

		// DAG combiner will fold:
		// (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
		// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
		// Other code patterns that can be also be modified have the following form:
		// b + ((a << 1) \| 510)
		// b + ((a << 1) & 510)
		// b + ((a << 1) ^ 510)
		// b + ((a << 1) + 510)

		// Many instructions can perform the shift for free, but it requires both
		// the operands to be registers. If c1 << c2 is too large, a mov immediate
		// instruction will needed. So, unfold back to the original pattern if:
		// - if c1 and c2 are small enough that they don't require mov imms.
		// - the user(s) of the node can perform an shl

		// No shifted operands for 16-bit instructions.
		if (ST->isThumb() && ST->isThumb1Only())
		return SDValue();

		// Check that all the users could perform the shl themselves.
		for (auto U : N->uses()) {
		switch(U->getOpcode()) {
		default:
		return SDValue();
		case ISD::SUB:
		case ISD::ADD:
		case ISD::AND:
		case ISD::OR:
		case ISD::XOR:
		case ISD::SETCC:
		case ARMISD::CMP:
		// Check that its not already using a shl.
		if (U->getOperand(0).getOpcode() == ISD::SHL \|\|
		U->getOperand(1).getOpcode() == ISD::SHL)
		return SDValue();
		break;
		}
		}

		if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
		N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
		return SDValue();

		if (N->getOperand(0).getOpcode() != ISD::SHL)
		return SDValue();

		SDValue SHL = N->getOperand(0);

		auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
		auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
		if (!C1ShlC2 \|\| !C2)
		return SDValue();

		DEBUG(dbgs() << "Trying to simplify shl: "; N->dump());

		APInt C2Int = C2->getAPIntValue();
		APInt C1Int = C1ShlC2->getAPIntValue();

		// Check that performing a lshr will not lose any information.
		APInt Mask = APInt::getHighBitsSet(C2Int.getBitWidth(),
		C2Int.getBitWidth() - C2->getZExtValue());
		if ((C1Int & Mask) != C1Int)
		return SDValue();

		// Shift the first constant.
		C1Int.lshrInPlace(C2Int);

		// The immediates are encoded as an 8-bit value that can be rotated.
		unsigned Zeros = C1Int.countLeadingZeros() + C1Int.countTrailingZeros();
		if (C1Int.getBitWidth() - Zeros > 8)
		return SDValue();

		Zeros = C2Int.countLeadingZeros() + C2Int.countTrailingZeros();
		if (C2Int.getBitWidth() - Zeros > 8)
		return SDValue();

		SelectionDAG &DAG = DCI.DAG;
		SDLoc dl(N);
		SDValue X = SHL.getOperand(0);
		SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
		DAG.getConstant(C1Int, dl, MVT::i32));
		// Shift left to compensate for the lshr of C1Int.
		SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));

		DAG.ReplaceAllUsesWith(SDValue(N, 0), Res);
		return SDValue(N, 0);
		}


/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.		/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///		///
static SDValue PerformADDCombine(SDNode *N,		static SDValue PerformADDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {		const ARMSubtarget *Subtarget) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);

		// Only works one way, because it needs an immediate operand.
		if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
		return Result;

// First try with the default operand order.		// First try with the default operand order.
if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))		if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
return Result;		return Result;

// If that didn't work, try again with the operands commuted.		// If that didn't work, try again with the operands commuted.
return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);		return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
}		}

▲ Show 20 Lines • Show All 172 Lines • ▼ Show 20 Lines	if (SplatBitSize <= 64) {
}		}
}		}
}		}

if (!Subtarget->isThumb1Only()) {		if (!Subtarget->isThumb1Only()) {
// fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))		// fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))		if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
return Result;		return Result;

		if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
		return Result;
}		}

return SDValue();		return SDValue();
}		}

// Try combining OR nodes to SMULWB, SMULWT.		// Try combining OR nodes to SMULWB, SMULWT.
static SDValue PerformORCombineToSMULWBT(SDNode *OR,		static SDValue PerformORCombineToSMULWBT(SDNode *OR,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
▲ Show 20 Lines • Show All 217 Lines • ▼ Show 20 Lines	static SDValue PerformORCombine(SDNode *N,
if (!Subtarget->isThumb1Only()) {		if (!Subtarget->isThumb1Only()) {
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))		// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))		if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;		return Result;
if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))		if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
return Result;		return Result;
}		}

// The code below optimizes (or (and X, Y), Z).
// The AND operand needs to have a single user to make these optimizations
// profitable.
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
return SDValue();
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);

// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.		// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&		if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT)) {		DAG.getTargetLoweringInfo().isTypeLegal(VT)) {

		// The code below optimizes (or (and X, Y), Z).
		// The AND operand needs to have a single user to make these optimizations
		// profitable.
		if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
		return SDValue();

APInt SplatUndef;		APInt SplatUndef;
unsigned SplatBitSize;		unsigned SplatBitSize;
bool HasAnyUndefs;		bool HasAnyUndefs;

APInt SplatBits0, SplatBits1;		APInt SplatBits0, SplatBits1;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));		BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));		BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
// Ensure that the second operand of both ands are constants		// Ensure that the second operand of both ands are constants
Show All 16 Lines	if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
return DAG.getNode(ISD::BITCAST, dl, VT, Result);		return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}		}
}		}
}		}
}		}

// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when		// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.		// reasonable.
		if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))		if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
return Res;		return Res;
		}

		if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
		return Result;

return SDValue();		return SDValue();
}		}

static SDValue PerformXORCombine(SDNode *N,		static SDValue PerformXORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {		const ARMSubtarget *Subtarget) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;

if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))		if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();		return SDValue();

if (!Subtarget->isThumb1Only()) {		if (!Subtarget->isThumb1Only()) {
// fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))		// fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))		if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;		return Result;

		if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
		return Result;
}		}

return SDValue();		return SDValue();
}		}

// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,		// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and		// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
// their position in "to" (Rd).		// their position in "to" (Rd).
▲ Show 20 Lines • Show All 3,682 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/ARM/unfold-shifts.ll

				; RUN: llc -mtriple armv6t2 %s -o - \| FileCheck %s
				; RUN: llc -mtriple thumbv6t2 %s -o - \| FileCheck %s --check-prefix=CHECK-T2
				; RUN: llc -mtriple armv7 %s -o - \| FileCheck %s
				; RUN: llc -mtriple thumbv7 %s -o - \| FileCheck %s --check-prefix=CHECK-T2
				; RUN: llc -mtriple thumbv7m %s -o - \| FileCheck %s --check-prefix=CHECK-T2
				; RUN: llc -mtriple thumbv8m.main %s -o - \| FileCheck %s --check-prefix=CHECK-T2

				; CHECK-LABEL: unfold1
				; CHECK-NOT: mov
				; CHECK: orr r0, r0, #255
				; CHECK: add r0, r1, r0, lsl #1
				; CHECK-T2-NOT: mov
				; CHECK-T2: orr r0, r0, #255
				; CHECK-T2: add.w r0, r1, r0, lsl #1
				define arm_aapcscc i32 @unfold1(i32 %a, i32 %b) {
				entry:
				%or = shl i32 %a, 1
				%shl = or i32 %or, 510
				%add = add nsw i32 %shl, %b
				ret i32 %add
				}

				; CHECK-LABEL: unfold2
				; CHECK-NOT: mov
				; CHECK: orr r0, r0, #4080
				; CHECK: sub r0, r1, r0, lsl #2
				; CHECK-T2-NOT: mov
				; CHECK-T2: orr r0, r0, #4080
				; CHECK-T2: sub.w r0, r1, r0, lsl #2
				define arm_aapcscc i32 @unfold2(i32 %a, i32 %b) {
				entry:
				%or = shl i32 %a, 2
				%shl = or i32 %or, 16320
				%sub = sub nsw i32 %b, %shl
				ret i32 %sub
				}

				; CHECK-LABEL: unfold3
				; CHECK-NOT: mov
				; CHECK: orr r0, r0, #65280
				; CHECK: and r0, r1, r0, lsl #4
				; CHECK-T2-NOT: mov
				; CHECK-T2: orr r0, r0, #65280
				; CHECK-T2: and.w r0, r1, r0, lsl #4
				define arm_aapcscc i32 @unfold3(i32 %a, i32 %b) {
				entry:
				%or = shl i32 %a, 4
				%shl = or i32 %or, 1044480
				%and = and i32 %shl, %b
				ret i32 %and
				}

				; CHECK-LABEL: unfold4
				; CHECK-NOT: mov
				; CHECK: orr r0, r0, #1044480
				; CHECK: eor r0, r1, r0, lsl #5
				; CHECK-T2-NOT: mov
				; CHECK-T2: orr r0, r0, #1044480
				; CHECK-T2: eor.w r0, r1, r0, lsl #5
				define arm_aapcscc i32 @unfold4(i32 %a, i32 %b) {
				entry:
				%or = shl i32 %a, 5
				%shl = or i32 %or, 33423360
				%xor = xor i32 %shl, %b
				ret i32 %xor
				}

				; CHECK-LABEL: unfold5
				; CHECK-NOT: mov
				; CHECK: add r0, r0, #496
				; CHECK: orr r0, r1, r0, lsl #6
				; CHECK-T2: add.w r0, r0, #496
				; CHECK-T2: orr.w r0, r1, r0, lsl #6
				define arm_aapcscc i32 @unfold5(i32 %a, i32 %b) {
				entry:
				%add = shl i32 %a, 6
				%shl = add i32 %add, 31744
				%or = or i32 %shl, %b
				ret i32 %or
				}

				; CHECK-LABEL: unfold6
				; CHECK-NOT: mov
				; CHECK: add r0, r0, #7936
				; CHECK: and r0, r1, r0, lsl #8
				; CHECK-T2-NOT: mov
				; CHECK-T2: add.w r0, r0, #7936
				; CHECK-T2: and.w r0, r1, r0, lsl #8
				define arm_aapcscc i32 @unfold6(i32 %a, i32 %b) {
				entry:
				%add = shl i32 %a, 8
				%shl = add i32 %add, 2031616
				%and = and i32 %shl, %b
				ret i32 %and
				}

				; CHECK-LABEL: unfold7
				; CHECK-NOT: mov
				; CHECK: and r0, r0, #256
				; CHECK: add r0, r1, r0, lsl #1
				; CHECK-T2-NOT: mov
				; CHECK-T2: and r0, r0, #256
				; CHECK-T2: add.w r0, r1, r0, lsl #1
				define arm_aapcscc i32 @unfold7(i32 %a, i32 %b) {
				entry:
				%shl = shl i32 %a, 1
				%and = and i32 %shl, 512
				%add = add nsw i32 %and, %b
				ret i32 %add
				}

				; CHECK-LABEL: unfold8
				; CHECK-NOT: mov
				; CHECK: add r0, r0, #126976
				; CHECK: eor r0, r1, r0, lsl #9
				; CHECK-T2-NOT: mov
				; CHECK-T2: add.w r0, r0, #126976
				; CHECK-T2: eor.w r0, r1, r0, lsl #9
				define arm_aapcscc i32 @unfold8(i32 %a, i32 %b) {
				entry:
				%add = shl i32 %a, 9
				%shl = add i32 %add, 65011712
				%xor = xor i32 %shl, %b
				ret i32 %xor
				}

				; CHECK-LABEL: unfold9
				; CHECK-NOT: mov
				; CHECK: eor r0, r0, #255
				; CHECK: add r0, r1, r0, lsl #1
				; CHECK-T2-NOT: mov
				; CHECK-T2: eor r0, r0, #255
				; CHECK-T2: add.w r0, r1, r0, lsl #1
				define arm_aapcscc i32 @unfold9(i32 %a, i32 %b) {
				entry:
				%shl = shl i32 %a, 1
				%xor = xor i32 %shl, 510
				%add = add nsw i32 %xor, %b
				ret i32 %add
				}

				; CHECK-LABEL: unfold10
				; CHECK-NOT: mov r2
				; CHECK: orr r2, r0, #4080
				; CHECK: cmp r1, r2, lsl #10
				; CHECK-T2-NOT: mov.w r2
				; CHECK-T2: orr r2, r0, #4080
				; CHECK-T2: cmp.w r1, r2, lsl #10
				define arm_aapcscc i32 @unfold10(i32 %a, i32 %b) {
				entry:
				%or = shl i32 %a, 10
				%shl = or i32 %or, 4177920
				%cmp = icmp sgt i32 %shl, %b
				%conv = zext i1 %cmp to i32
				ret i32 %conv
				}

				; CHECK-LABEL: unfold11
				; CHECK-NOT: mov r2
				; CHECK: add r2, r0, #7936
				; CHECK: cmp r1, r2, lsl #11
				; CHECK-T2-NOT: mov.w r2
				; CHECK-T2: add.w r2, r0, #7936
				; CHECK-T2: cmp.w r1, r2, lsl #11
				define arm_aapcscc i32 @unfold11(i32 %a, i32 %b) {
				entry:
				%add = shl i32 %a, 11
				%shl = add i32 %add, 16252928
				%cmp = icmp sgt i32 %shl, %b
				%conv = zext i1 %cmp to i32
				ret i32 %conv
				}

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] add, or, and and xor with shl combining
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 121270

llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp

llvm/trunk/test/CodeGen/ARM/unfold-shifts.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] add, or, and and xor with shl combiningClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 121270

llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp

llvm/trunk/test/CodeGen/ARM/unfold-shifts.ll

[ARM] add, or, and and xor with shl combining
ClosedPublic