Diff 250170

llvm/lib/Target/ARM/ARMISelLowering.h

Show First 20 Lines • Show All 348 Lines • ▼ Show 20 Lines	public:
void AdjustInstrPostInstrSelection(MachineInstr &MI,		void AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const override;		SDNode *Node) const override;

SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;		SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;		SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;		SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;		SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

		bool SimplifyDemandedBitsForTargetNode(SDValue Op,
		const APInt &OriginalDemandedBits,
		const APInt &OriginalDemandedElts,
		KnownBits &Known,
		TargetLoweringOpt &TLO,
		unsigned Depth) const override;
		samparkerUnsubmitted Not Done Reply Inline Actions Doesn't this need override? samparker: Doesn't this need override?

bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;		bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;

/// allowsMisalignedMemoryAccesses - Returns true if the target allows		/// allowsMisalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses of the specified type. Returns whether it		/// unaligned memory accesses of the specified type. Returns whether it
/// is "fast" by reference in the second argument.		/// is "fast" by reference in the second argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,		bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
unsigned Align,		unsigned Align,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
▲ Show 20 Lines • Show All 523 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 14,175 Lines • ▼ Show 20 Lines
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {		static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();		unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {		switch (IntNo) {
default:		default:
// Don't do anything for most intrinsics.		// Don't do anything for most intrinsics.
break;		break;

// Vector shifts: check for immediate versions and lower them.		// Vector shifts: check for immediate versions and lower them.
// Note: This is done during DAG combining instead of DAG legalizing because		// Note: This is done during DAG combining instead of DAG legalizing because
		samparkerUnsubmitted Not Done Reply Inline Actions For readability, maybe introduce a lambda to help create the shift and do the replacement? Some aptly named variables for shift ranges and whether we're doing a logical/left/right shift could also help. samparker: For readability, maybe introduce a lambda to help create the shift and do the replacement? Some…
// the build_vectors for 64-bit vector element shift counts are generally		// the build_vectors for 64-bit vector element shift counts are generally
// not legal, and it is hard to see their values after they get legalized to		// not legal, and it is hard to see their values after they get legalized to
// loads from a constant pool.		// loads from a constant pool.
case Intrinsic::arm_neon_vshifts:		case Intrinsic::arm_neon_vshifts:
case Intrinsic::arm_neon_vshiftu:		case Intrinsic::arm_neon_vshiftu:
case Intrinsic::arm_neon_vrshifts:		case Intrinsic::arm_neon_vrshifts:
case Intrinsic::arm_neon_vrshiftu:		case Intrinsic::arm_neon_vrshiftu:
case Intrinsic::arm_neon_vrshiftn:		case Intrinsic::arm_neon_vrshiftn:
▲ Show 20 Lines • Show All 2,031 Lines • ▼ Show 20 Lines	ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
// We could try to recognize lsls+lsrs or lsrs+lsls pairs here.		// We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
// We could try to prefer Thumb1 immediates which can be lowered to a		// We could try to prefer Thumb1 immediates which can be lowered to a
// two-instruction sequence.		// two-instruction sequence.
// We could try to recognize more legal ARM/Thumb2 immediates here.		// We could try to recognize more legal ARM/Thumb2 immediates here.

return false;		return false;
}		}

		bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
		SDValue Op, const APInt &OriginalDemandedBits,
		const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
		unsigned Depth) const {
		unsigned Opc = Op.getOpcode();

		switch (Opc) {
		case ARMISD::ASRL:
		case ARMISD::LSRL: {
		// If this is result 0 and the other result is unused, see if the demand
		// bits allow us to shrink this long shift into a standard small shift in
		// the opposite direction.
		if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
		isa<ConstantSDNode>(Op->getOperand(2))) {
		unsigned ShAmt = Op->getConstantOperandVal(2);
		if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(
		APInt::getAllOnesValue(32) << (32 - ShAmt)))
		return TLO.CombineTo(
		Op, TLO.DAG.getNode(
		ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
		TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
		}
		break;
		}
		}

		return TargetLowering::SimplifyDemandedBitsForTargetNode(
		Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// ARM Inline Assembly Support		// ARM Inline Assembly Support
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {		bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
// Looking for "rev" which is V6+.		// Looking for "rev" which is V6+.
if (!Subtarget->hasV6Ops())		if (!Subtarget->hasV6Ops())
▲ Show 20 Lines • Show All 1,683 Lines • Show Last 20 Lines

llvm/test/CodeGen/Thumb2/fir.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve %s -o - \| FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-MVE			; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve %s -o - \| FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-MVE
	; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi -mattr=+dsp %s -o - \| FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-NOMVE			; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi -mattr=+dsp %s -o - \| FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-NOMVE

	define void @test1(i32* %p0, i32 %p1, i32 %p2, i32 *%pDst) {			define void @test1(i32* %p0, i32 %p1, i32 %p2, i32 *%pDst) {
	; CHECK-MVE-LABEL: test1:			; CHECK-LABEL: test1:
	; CHECK-MVE: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-MVE-NEXT: ldr r1, [r1]			; CHECK-NEXT: ldr r1, [r1]
	; CHECK-MVE-NEXT: ldr r2, [r2]			; CHECK-NEXT: ldr r2, [r2]
	; CHECK-MVE-NEXT: ldr r0, [r0]			; CHECK-NEXT: ldr r0, [r0]
	; CHECK-MVE-NEXT: smull r2, r1, r2, r1			; CHECK-NEXT: smmul r1, r2, r1
	; CHECK-MVE-NEXT: lsrl r2, r1, #31			; CHECK-NEXT: add.w r0, r0, r1, lsl #1
	; CHECK-MVE-NEXT: bic r1, r2, #1			; CHECK-NEXT: str r0, [r3]
	; CHECK-MVE-NEXT: add r0, r1			; CHECK-NEXT: bx lr
	; CHECK-MVE-NEXT: str r0, [r3]
	; CHECK-MVE-NEXT: bx lr
	;
	; CHECK-NOMVE-LABEL: test1:
	; CHECK-NOMVE: @ %bb.0: @ %entry
	; CHECK-NOMVE-NEXT: ldr r1, [r1]
	; CHECK-NOMVE-NEXT: ldr r2, [r2]
	; CHECK-NOMVE-NEXT: ldr r0, [r0]
	; CHECK-NOMVE-NEXT: smmul r1, r2, r1
	; CHECK-NOMVE-NEXT: add.w r0, r0, r1, lsl #1
	; CHECK-NOMVE-NEXT: str r0, [r3]
	; CHECK-NOMVE-NEXT: bx lr
	entry:			entry:
	%l3 = load i32, i32* %p0, align 4			%l3 = load i32, i32* %p0, align 4
	%l4 = load i32, i32* %p1, align 4			%l4 = load i32, i32* %p1, align 4
	%conv5.us = sext i32 %l4 to i64			%conv5.us = sext i32 %l4 to i64
	%l5 = load i32, i32* %p2, align 4			%l5 = load i32, i32* %p2, align 4
	%conv6.us = sext i32 %l5 to i64			%conv6.us = sext i32 %l5 to i64
	%mul.us = mul nsw i64 %conv6.us, %conv5.us			%mul.us = mul nsw i64 %conv6.us, %conv5.us
	%l6 = lshr i64 %mul.us, 31			%l6 = lshr i64 %mul.us, 31
	Show All 31 Lines

llvm/test/CodeGen/Thumb2/shift_parts.ll

	Show First 20 Lines • Show All 416 Lines • ▼ Show 20 Lines
	entry:			entry:
	%shr = shl i64 %x, 44			%shr = shl i64 %x, 44
	%t = trunc i64 %shr to i32			%t = trunc i64 %shr to i32
	ret i32 %t			ret i32 %t
	}			}


	define i32 @ashr_demand_bottommask(i64 %x) {			define i32 @ashr_demand_bottommask(i64 %x) {
	; CHECK-MVE-LABEL: ashr_demand_bottommask:			; CHECK-LABEL: ashr_demand_bottommask:
	; CHECK-MVE: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-MVE-NEXT: lsrl r0, r1, #31			; CHECK-NEXT: lsls r0, r1, #1
	; CHECK-MVE-NEXT: bic r0, r0, #1			; CHECK-NEXT: bx lr
	; CHECK-MVE-NEXT: bx lr
	;
	; CHECK-NON-MVE-LABEL: ashr_demand_bottommask:
	; CHECK-NON-MVE: @ %bb.0: @ %entry
	; CHECK-NON-MVE-NEXT: lsls r0, r1, #1
	; CHECK-NON-MVE-NEXT: bx lr
	entry:			entry:
	%shr = ashr i64 %x, 31			%shr = ashr i64 %x, 31
	%t = trunc i64 %shr to i32			%t = trunc i64 %shr to i32
	%a = and i32 %t, -2			%a = and i32 %t, -2
	ret i32 %a			ret i32 %a
	}			}

	define i32 @lshr_demand_bottommask(i64 %x) {			define i32 @lshr_demand_bottommask(i64 %x) {
	; CHECK-MVE-LABEL: lshr_demand_bottommask:			; CHECK-LABEL: lshr_demand_bottommask:
	; CHECK-MVE: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-MVE-NEXT: lsrl r0, r1, #31			; CHECK-NEXT: lsls r0, r1, #1
	; CHECK-MVE-NEXT: bic r0, r0, #1			; CHECK-NEXT: bx lr
	; CHECK-MVE-NEXT: bx lr
	;
	; CHECK-NON-MVE-LABEL: lshr_demand_bottommask:
	; CHECK-NON-MVE: @ %bb.0: @ %entry
	; CHECK-NON-MVE-NEXT: lsls r0, r1, #1
	; CHECK-NON-MVE-NEXT: bx lr
	entry:			entry:
	%shr = lshr i64 %x, 31			%shr = lshr i64 %x, 31
	%t = trunc i64 %shr to i32			%t = trunc i64 %shr to i32
	%a = and i32 %t, -2			%a = and i32 %t, -2
	ret i32 %a			ret i32 %a
	}			}

	define i32 @lsl_demand_bottommask(i64 %x) {			define i32 @lsl_demand_bottommask(i64 %x) {
	; CHECK-LABEL: lsl_demand_bottommask:			; CHECK-LABEL: lsl_demand_bottommask:
	; CHECK: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: lsls r0, r0, #31			; CHECK-NEXT: lsls r0, r0, #31
	; CHECK-NEXT: bx lr			; CHECK-NEXT: bx lr
	entry:			entry:
	%shr = shl i64 %x, 31			%shr = shl i64 %x, 31
	%t = trunc i64 %shr to i32			%t = trunc i64 %shr to i32
	%a = and i32 %t, -2			%a = and i32 %t, -2
	ret i32 %a			ret i32 %a
	}			}

	define i32 @ashr_demand_bottommask2(i64 %x) {			define i32 @ashr_demand_bottommask2(i64 %x) {
	; CHECK-MVE-LABEL: ashr_demand_bottommask2:			; CHECK-LABEL: ashr_demand_bottommask2:
	; CHECK-MVE: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-MVE-NEXT: lsrl r0, r1, #31			; CHECK-NEXT: mvn r0, #2
	; CHECK-MVE-NEXT: bic r0, r0, #3			; CHECK-NEXT: and.w r0, r0, r1, lsl #1
	; CHECK-MVE-NEXT: bx lr			; CHECK-NEXT: bx lr
	;
	; CHECK-NON-MVE-LABEL: ashr_demand_bottommask2:
	; CHECK-NON-MVE: @ %bb.0: @ %entry
	; CHECK-NON-MVE-NEXT: mvn r0, #2
	; CHECK-NON-MVE-NEXT: and.w r0, r0, r1, lsl #1
	; CHECK-NON-MVE-NEXT: bx lr
	entry:			entry:
	%shr = ashr i64 %x, 31			%shr = ashr i64 %x, 31
	%t = trunc i64 %shr to i32			%t = trunc i64 %shr to i32
	%a = and i32 %t, -4			%a = and i32 %t, -4
	ret i32 %a			ret i32 %a
	}			}

	define i32 @lshr_demand_bottommask2(i64 %x) {			define i32 @lshr_demand_bottommask2(i64 %x) {
	; CHECK-MVE-LABEL: lshr_demand_bottommask2:			; CHECK-LABEL: lshr_demand_bottommask2:
	; CHECK-MVE: @ %bb.0: @ %entry			; CHECK: @ %bb.0: @ %entry
	; CHECK-MVE-NEXT: lsrl r0, r1, #31			; CHECK-NEXT: mvn r0, #2
	; CHECK-MVE-NEXT: bic r0, r0, #3			; CHECK-NEXT: and.w r0, r0, r1, lsl #1
	; CHECK-MVE-NEXT: bx lr			; CHECK-NEXT: bx lr
	;
	; CHECK-NON-MVE-LABEL: lshr_demand_bottommask2:
	; CHECK-NON-MVE: @ %bb.0: @ %entry
	; CHECK-NON-MVE-NEXT: mvn r0, #2
	; CHECK-NON-MVE-NEXT: and.w r0, r0, r1, lsl #1
	; CHECK-NON-MVE-NEXT: bx lr
	entry:			entry:
	%shr = lshr i64 %x, 31			%shr = lshr i64 %x, 31
	%t = trunc i64 %shr to i32			%t = trunc i64 %shr to i32
	%a = and i32 %t, -4			%a = and i32 %t, -4
	ret i32 %a			ret i32 %a
	}			}

	define i32 @lsl_demand_bottommask2(i64 %x) {			define i32 @lsl_demand_bottommask2(i64 %x) {
	Show All 23 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Optimise ASRL/LSRL to smaller shifts using demand bits.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 250170

llvm/lib/Target/ARM/ARMISelLowering.h

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/test/CodeGen/Thumb2/fir.ll

llvm/test/CodeGen/Thumb2/shift_parts.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Optimise ASRL/LSRL to smaller shifts using demand bits.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 250170

llvm/lib/Target/ARM/ARMISelLowering.h

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/test/CodeGen/Thumb2/fir.ll

llvm/test/CodeGen/Thumb2/shift_parts.ll

[ARM] Optimise ASRL/LSRL to smaller shifts using demand bits.
ClosedPublic