This is an archive of the discontinued LLVM Phabricator instance.

Differential D120933

[SDAG] match rotate pattern with extra 'or' operation
ClosedPublic

Authored by spatel on Mar 3 2022, 12:28 PM.

Download Raw Diff

Details

Reviewers

fzhinkin
RKSimon
dmgreen

Commits

rG341623653d89: [SDAG] match rotate pattern with extra 'or' operation

Summary

This is another fold generalized from D111530. We can find a common source for a rotate operation hidden inside an 'or':
https://alive2.llvm.org/ce/z/9pV8hn

Deciding when this is profitable vs. a funnel-shift is tricky, but this does not show any regressions: if a target has a rotate but it does not have a funnel-shift, then try to form the rotate here. That is why we don't have x86 test diffs for the scalar tests that are duplicated from AArch64 ( 74a65e3834d9487 ) - shld/shrd are available. That also makes it difficult to show vector diffs - the only case where I found a diff was on x86 AVX512 or XOP with i64 elements.

There's an additional check for a legal type to avoid a problem seen with x86-32 where we form a 64-bit rotate but then it gets split inefficiently. We might avoid that by adding more rotate folds, but I didn't check to see what is missing on that path.

This gets most of the motivating patterns for AArch64 / ARM that are in D111530. We still need a couple of enhancements to setcc pattern matching with rotate/funnel-shift to get the rest.

Diff Detail

Unit TestsFailed

	Time	Test
	61,040 ms	x64 debian > Clang.CodeGen/RISCV/rvv-intrinsics::vloxseg.c
	60,740 ms	x64 debian > Clang.CodeGen/RISCV/rvv-intrinsics::vlseg.c
	60,770 ms	x64 debian > Clang.CodeGen/RISCV/rvv-intrinsics::vlsegff.c
	60,820 ms	x64 debian > Clang.CodeGen/RISCV/rvv-intrinsics::vluxseg.c
	60,770 ms	x64 debian > Clang.CodeGen/RISCV/rvv-intrinsics::vsoxseg.c
		View Full Test Results (20 Failed)

Event Timeline

spatel created this revision.Mar 3 2022, 12:28 PM

Herald added a project: Restricted Project. · View Herald TranscriptMar 3 2022, 12:28 PM

Herald added subscribers: ecnelises, pengfei, hiraditya and 2 others. · View Herald Transcript

spatel requested review of this revision.Mar 3 2022, 12:28 PM

Herald added a project: Restricted Project. · View Herald TranscriptMar 3 2022, 12:28 PM

Herald added a subscriber: llvm-commits. · View Herald Transcript

Harbormaster completed remote builds in B152444: Diff 412794.Mar 3 2022, 1:21 PM

fzhinkin accepted this revision.Mar 9 2022, 6:19 AM

This revision is now accepted and ready to land.Mar 9 2022, 6:19 AM

This revision was landed with ongoing or failed builds.Mar 9 2022, 10:19 AM

Closed by commit rG341623653d89: [SDAG] match rotate pattern with extra 'or' operation (authored by spatel). · Explain Why

This revision was automatically updated to reflect the committed changes.

spatel added a commit: rG341623653d89: [SDAG] match rotate pattern with extra 'or' operation.

Revision Contents

Path

Size

llvm/

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

55 lines

test/

CodeGen/

AArch64/

icmp-shift-opt.ll

18 lines

logic-shift.ll

16 lines

ARM/

consthoist-icmpimm.ll

24 lines

icmp-shift-opt.ll

24 lines

X86/

rotate_vec.ll

20 lines

Diff 412794

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,393 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (!RHSShift \|\| !LHSShift)		if (!RHSShift \|\| !LHSShift)
return SDValue();		return SDValue();

// At this point we've matched or extracted a shift op on each side.		// At this point we've matched or extracted a shift op on each side.

if (LHSShift.getOpcode() == RHSShift.getOpcode())		if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.		return SDValue(); // Shifts must disagree.

// TODO: Support pre-legalization funnel-shift by constant.
bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
if (!IsRotate && !(HasFSHL \|\| HasFSHR))
return SDValue(); // Requires funnel shift support.

// Canonicalize shl to left side in a shl/srl pair.		// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {		if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);		std::swap(LHS, RHS);
std::swap(LHSShift, RHSShift);		std::swap(LHSShift, RHSShift);
std::swap(LHSMask, RHSMask);		std::swap(LHSMask, RHSMask);
}		}

unsigned EltSizeInBits = VT.getScalarSizeInBits();		unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);		SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);		SDValue LHSShiftAmt = LHSShift.getOperand(1);
SDValue RHSShiftArg = RHSShift.getOperand(0);		SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);		SDValue RHSShiftAmt = RHSShift.getOperand(1);

		auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
		ConstantSDNode *RHS) {
		return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
		};

		// TODO: Support pre-legalization funnel-shift by constant.
		bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
		if (!IsRotate && !(HasFSHL \|\| HasFSHR)) {
		if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
		ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
		// Look for a disguised rotate by constant.
		// The common shifted operand X may be hidden inside another 'or'.
		SDValue X, Y;
		auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
		if (!Or.hasOneUse() \|\| Or.getOpcode() != ISD::OR)
		return false;
		if (CommonOp == Or.getOperand(0)) {
		X = CommonOp;
		Y = Or.getOperand(1);
		return true;
		}
		if (CommonOp == Or.getOperand(1)) {
		X = CommonOp;
		Y = Or.getOperand(0);
		return true;
		}
		return false;
		};

		// (shl (X \| Y), C1) \| (srl X, C2) --> (rotl X, C1) \| (shl Y, C1)
		if (matchOr(LHSShiftArg, RHSShiftArg)) {
		SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
		SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
		return DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
		}
		// (shl X, C1) \| (srl (X \| Y), C2) --> (rotl X, C1) \| (srl Y, C2)
		if (matchOr(RHSShiftArg, LHSShiftArg)) {
		SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
		SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
		return DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
		}
		}

		return SDValue(); // Requires funnel shift support.
		}

// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)		// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)		// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
// fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)		// fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
// fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)		// fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
// iff C1+C2 == EltSizeInBits		// iff C1+C2 == EltSizeInBits
auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {		if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;		SDValue Res;
if (IsRotate && (HasROTL \|\| HasROTR \|\| !(HasFSHL \|\| HasFSHR))) {		if (IsRotate && (HasROTL \|\| HasROTR \|\| !(HasFSHL \|\| HasFSHR))) {
bool UseROTL = !LegalOperations \|\| HasROTL;		bool UseROTL = !LegalOperations \|\| HasROTL;
Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,		Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
UseROTL ? LHSShiftAmt : RHSShiftAmt);		UseROTL ? LHSShiftAmt : RHSShiftAmt);
} else {		} else {
bool UseFSHL = !LegalOperations \|\| HasFSHL;		bool UseFSHL = !LegalOperations \|\| HasFSHL;
▲ Show 20 Lines • Show All 16,965 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/icmp-shift-opt.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi \| FileCheck %s		; RUN: llc < %s -mtriple=arm64-eabi \| FileCheck %s

; Optimize expanded SRL/SHL used as an input of		; Optimize expanded SRL/SHL used as an input of
; SETCC comparing it with zero by removing rotation.		; SETCC comparing it with zero by removing rotation.
;		;
; See https://bugs.llvm.org/show_bug.cgi?id=50197		; See https://bugs.llvm.org/show_bug.cgi?id=50197
define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {		define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_lt_power_of_2:		; CHECK-LABEL: opt_setcc_lt_power_of_2:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: .LBB0_1: // %loop		; CHECK-NEXT: .LBB0_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1		; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds x0, x0, #1		; CHECK-NEXT: adds x0, x0, #1
; CHECK-NEXT: adcs x1, x1, xzr		; CHECK-NEXT: adcs x1, x1, xzr
; CHECK-NEXT: orr x8, x0, x1		; CHECK-NEXT: orr x8, x1, x0, lsr #60
; CHECK-NEXT: extr x8, x1, x8, #60
; CHECK-NEXT: cbnz x8, .LBB0_1		; CHECK-NEXT: cbnz x8, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit		; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret		; CHECK-NEXT: ret
br label %loop		br label %loop

loop:		loop:
%phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]		%phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]
%inc = add i128 %phi.a, 1		%inc = add i128 %phi.a, 1
%cmp = icmp ult i128 %inc, 1152921504606846976		%cmp = icmp ult i128 %inc, 1152921504606846976
br i1 %cmp, label %exit, label %loop		br i1 %cmp, label %exit, label %loop

exit:		exit:
ret i128 %inc		ret i128 %inc
}		}

define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {		define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_eq_zero:		; CHECK-LABEL: opt_setcc_srl_eq_zero:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x0, x1		; CHECK-NEXT: orr x8, x1, x0, lsr #17
; CHECK-NEXT: extr x8, x1, x8, #17
; CHECK-NEXT: cmp x8, #0		; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq		; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%srl = lshr i128 %a, 17		%srl = lshr i128 %a, 17
%cmp = icmp eq i128 %srl, 0		%cmp = icmp eq i128 %srl, 0
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {		define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_ne_zero:		; CHECK-LABEL: opt_setcc_srl_ne_zero:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x0, x1		; CHECK-NEXT: orr x8, x1, x0, lsr #17
; CHECK-NEXT: extr x8, x1, x8, #17
; CHECK-NEXT: cmp x8, #0		; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne		; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%srl = lshr i128 %a, 17		%srl = lshr i128 %a, 17
%cmp = icmp ne i128 %srl, 0		%cmp = icmp ne i128 %srl, 0
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {		define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_eq_zero:		; CHECK-LABEL: opt_setcc_shl_eq_zero:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x1, x0		; CHECK-NEXT: orr x8, x0, x1, lsl #17
; CHECK-NEXT: extr x8, x8, x0, #47
; CHECK-NEXT: cmp x8, #0		; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq		; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%shl = shl i128 %a, 17		%shl = shl i128 %a, 17
%cmp = icmp eq i128 %shl, 0		%cmp = icmp eq i128 %shl, 0
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {		define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero:		; CHECK-LABEL: opt_setcc_shl_ne_zero:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x1, x0		; CHECK-NEXT: orr x8, x0, x1, lsl #17
; CHECK-NEXT: extr x8, x8, x0, #47
; CHECK-NEXT: cmp x8, #0		; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne		; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%shl = shl i128 %a, 17		%shl = shl i128 %a, 17
%cmp = icmp ne i128 %shl, 0		%cmp = icmp ne i128 %shl, 0
ret i1 %cmp		ret i1 %cmp
}		}

Show All 17 Lines	; CHECK-NEXT: ret
ret i1 %cmp		ret i1 %cmp
}		}

; Check that optimization is applied to DAG having appropriate shape		; Check that optimization is applied to DAG having appropriate shape
; even if there were no actual shift's expansion.		; even if there were no actual shift's expansion.
define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {		define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:		; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x0, x1		; CHECK-NEXT: orr x8, x1, x0, lsl #17
; CHECK-NEXT: extr x8, x8, x1, #47
; CHECK-NEXT: cmp x8, #0		; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq		; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%shl.a = shl i64 %a, 17		%shl.a = shl i64 %a, 17
%srl.b = lshr i64 %b, 47		%srl.b = lshr i64 %b, 47
%or.0 = or i64 %shl.a, %srl.b		%or.0 = or i64 %shl.a, %srl.b
%shl.b = shl i64 %b, 17		%shl.b = shl i64 %b, 17
%or.1 = or i64 %or.0, %shl.b		%or.1 = or i64 %or.0, %shl.b
▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/logic-shift.ll

Show First 20 Lines • Show All 228 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret i64 %r		ret i64 %r
}		}

; (shl (X \| Y), C1) \| (srl X, C2) --> (rotl X, C1) \| (shl Y, C1)		; (shl (X \| Y), C1) \| (srl X, C2) --> (rotl X, C1) \| (shl Y, C1)

define i32 @or_fshl_commute0(i32 %x, i32 %y) {		define i32 @or_fshl_commute0(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshl_commute0:		; CHECK-LABEL: or_fshl_commute0:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1		; CHECK-NEXT: ror w8, w0, #27
; CHECK-NEXT: extr w0, w8, w0, #27		; CHECK-NEXT: orr w0, w8, w1, lsl #5
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%or1 = or i32 %x, %y		%or1 = or i32 %x, %y
%sh1 = shl i32 %or1, 5		%sh1 = shl i32 %or1, 5
%sh2 = lshr i32 %x, 27		%sh2 = lshr i32 %x, 27
%r = or i32 %sh1, %sh2		%r = or i32 %sh1, %sh2
ret i32 %r		ret i32 %r
}		}

define i64 @or_fshl_commute1(i64 %x, i64 %y) {		define i64 @or_fshl_commute1(i64 %x, i64 %y) {
; CHECK-LABEL: or_fshl_commute1:		; CHECK-LABEL: or_fshl_commute1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w1, w0		; CHECK-NEXT: ror x8, x0, #29
; CHECK-NEXT: extr x0, x8, x0, #29		; CHECK-NEXT: orr x0, x8, x1, lsl #35
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%or1 = or i64 %y, %x		%or1 = or i64 %y, %x
%sh1 = shl i64 %or1, 35		%sh1 = shl i64 %or1, 35
%sh2 = lshr i64 %x, 29		%sh2 = lshr i64 %x, 29
%r = or i64 %sh1, %sh2		%r = or i64 %sh1, %sh2
ret i64 %r		ret i64 %r
}		}

▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret i32 %r		ret i32 %r
}		}

; (shl X, C1) \| (srl (X \| Y), C2) --> (rotl X, C1) \| (srl Y, C2)		; (shl X, C1) \| (srl (X \| Y), C2) --> (rotl X, C1) \| (srl Y, C2)

define i64 @or_fshr_commute0(i64 %x, i64 %y) {		define i64 @or_fshr_commute0(i64 %x, i64 %y) {
; CHECK-LABEL: or_fshr_commute0:		; CHECK-LABEL: or_fshr_commute0:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x0, x1		; CHECK-NEXT: ror x8, x0, #24
; CHECK-NEXT: extr x0, x0, x8, #24		; CHECK-NEXT: orr x0, x8, x1, lsr #24
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%or1 = or i64 %x, %y		%or1 = or i64 %x, %y
%sh1 = shl i64 %x, 40		%sh1 = shl i64 %x, 40
%sh2 = lshr i64 %or1, 24		%sh2 = lshr i64 %or1, 24
%r = or i64 %sh1, %sh2		%r = or i64 %sh1, %sh2
ret i64 %r		ret i64 %r
}		}

define i32 @or_fshr_commute1(i32 %x, i32 %y) {		define i32 @or_fshr_commute1(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshr_commute1:		; CHECK-LABEL: or_fshr_commute1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w1, w0		; CHECK-NEXT: ror w8, w0, #29
; CHECK-NEXT: extr w0, w0, w8, #29		; CHECK-NEXT: orr w0, w8, w1, lsr #29
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%or1 = or i32 %y, %x		%or1 = or i32 %y, %x
%sh1 = shl i32 %x, 3		%sh1 = shl i32 %x, 3
%sh2 = lshr i32 %or1, 29		%sh2 = lshr i32 %or1, 29
%r = or i32 %sh1, %sh2		%r = or i32 %sh1, %sh2
ret i32 %r		ret i32 %r
}		}

▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/test/CodeGen/ARM/consthoist-icmpimm.ll

	Show First 20 Lines • Show All 624 Lines • ▼ Show 20 Lines
	; CHECKV7M-NEXT: .save {r7, lr}			; CHECKV7M-NEXT: .save {r7, lr}
	; CHECKV7M-NEXT: push {r7, lr}			; CHECKV7M-NEXT: push {r7, lr}
	; CHECKV7M-NEXT: mov r12, r0			; CHECKV7M-NEXT: mov r12, r0
	; CHECKV7M-NEXT: ldr r0, [sp, #16]			; CHECKV7M-NEXT: ldr r0, [sp, #16]
	; CHECKV7M-NEXT: lsls r0, r0, #31			; CHECKV7M-NEXT: lsls r0, r0, #31
	; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8]			; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8]
	; CHECKV7M-NEXT: beq .LBB6_2			; CHECKV7M-NEXT: beq .LBB6_2
	; CHECKV7M-NEXT: @ %bb.1: @ %then			; CHECKV7M-NEXT: @ %bb.1: @ %then
	; CHECKV7M-NEXT: orrs r2, r3			; CHECKV7M-NEXT: orr.w r2, r3, r2, lsr #17
	; CHECKV7M-NEXT: lsrs r2, r2, #17			; CHECKV7M-NEXT: orr.w r1, r1, r12, lsr #17
	; CHECKV7M-NEXT: orr.w r2, r2, r3, lsl #15
	; CHECKV7M-NEXT: orr.w r3, r12, r1
	; CHECKV7M-NEXT: cmp r2, #0			; CHECKV7M-NEXT: cmp r2, #0
	; CHECKV7M-NEXT: mov r2, r0			; CHECKV7M-NEXT: mov r2, r0
	; CHECKV7M-NEXT: lsr.w r3, r3, #17
	; CHECKV7M-NEXT: orr.w r1, r3, r1, lsl #15
	; CHECKV7M-NEXT: it ne			; CHECKV7M-NEXT: it ne
	; CHECKV7M-NEXT: movne r2, lr			; CHECKV7M-NEXT: movne r2, lr
	; CHECKV7M-NEXT: cmp r1, #0			; CHECKV7M-NEXT: cmp r1, #0
	; CHECKV7M-NEXT: it ne			; CHECKV7M-NEXT: it ne
	; CHECKV7M-NEXT: movne r0, lr			; CHECKV7M-NEXT: movne r0, lr
	; CHECKV7M-NEXT: add r0, r2			; CHECKV7M-NEXT: add r0, r2
	; CHECKV7M-NEXT: pop {r7, pc}			; CHECKV7M-NEXT: pop {r7, pc}
	; CHECKV7M-NEXT: .LBB6_2: @ %else			; CHECKV7M-NEXT: .LBB6_2: @ %else
	; CHECKV7M-NEXT: orr.w r1, r2, r3			; CHECKV7M-NEXT: orr.w r1, r3, r2, lsr #17
	; CHECKV7M-NEXT: lsrs r1, r1, #17
	; CHECKV7M-NEXT: orr.w r1, r1, r3, lsl #15
	; CHECKV7M-NEXT: cmp r1, #0			; CHECKV7M-NEXT: cmp r1, #0
	; CHECKV7M-NEXT: it ne			; CHECKV7M-NEXT: it ne
	; CHECKV7M-NEXT: movne r0, lr			; CHECKV7M-NEXT: movne r0, lr
	; CHECKV7M-NEXT: pop {r7, pc}			; CHECKV7M-NEXT: pop {r7, pc}
	;			;
	; CHECKV7A-LABEL: icmp64_uge_m2:			; CHECKV7A-LABEL: icmp64_uge_m2:
	; CHECKV7A: @ %bb.0:			; CHECKV7A: @ %bb.0:
	; CHECKV7A-NEXT: .save {r4, lr}			; CHECKV7A-NEXT: .save {r4, lr}
	; CHECKV7A-NEXT: push {r4, lr}			; CHECKV7A-NEXT: push {r4, lr}
	; CHECKV7A-NEXT: ldr r4, [sp, #16]			; CHECKV7A-NEXT: ldr r4, [sp, #16]
	; CHECKV7A-NEXT: mov r12, r0			; CHECKV7A-NEXT: mov r12, r0
	; CHECKV7A-NEXT: ldrd lr, r0, [sp, #8]			; CHECKV7A-NEXT: ldrd lr, r0, [sp, #8]
	; CHECKV7A-NEXT: lsls r4, r4, #31			; CHECKV7A-NEXT: lsls r4, r4, #31
	; CHECKV7A-NEXT: beq .LBB6_2			; CHECKV7A-NEXT: beq .LBB6_2
	; CHECKV7A-NEXT: @ %bb.1: @ %then			; CHECKV7A-NEXT: @ %bb.1: @ %then
	; CHECKV7A-NEXT: orrs r2, r3			; CHECKV7A-NEXT: orr.w r2, r3, r2, lsr #17
	; CHECKV7A-NEXT: lsrs r2, r2, #17			; CHECKV7A-NEXT: orr.w r1, r1, r12, lsr #17
	; CHECKV7A-NEXT: orr.w r2, r2, r3, lsl #15
	; CHECKV7A-NEXT: orr.w r3, r12, r1
	; CHECKV7A-NEXT: cmp r2, #0			; CHECKV7A-NEXT: cmp r2, #0
	; CHECKV7A-NEXT: mov r2, r0			; CHECKV7A-NEXT: mov r2, r0
	; CHECKV7A-NEXT: lsr.w r3, r3, #17
	; CHECKV7A-NEXT: orr.w r1, r3, r1, lsl #15
	; CHECKV7A-NEXT: it ne			; CHECKV7A-NEXT: it ne
	; CHECKV7A-NEXT: movne r2, lr			; CHECKV7A-NEXT: movne r2, lr
	; CHECKV7A-NEXT: cmp r1, #0			; CHECKV7A-NEXT: cmp r1, #0
	; CHECKV7A-NEXT: it ne			; CHECKV7A-NEXT: it ne
	; CHECKV7A-NEXT: movne r0, lr			; CHECKV7A-NEXT: movne r0, lr
	; CHECKV7A-NEXT: add r0, r2			; CHECKV7A-NEXT: add r0, r2
	; CHECKV7A-NEXT: pop {r4, pc}			; CHECKV7A-NEXT: pop {r4, pc}
	; CHECKV7A-NEXT: .LBB6_2: @ %else			; CHECKV7A-NEXT: .LBB6_2: @ %else
	; CHECKV7A-NEXT: orr.w r1, r2, r3			; CHECKV7A-NEXT: orr.w r1, r3, r2, lsr #17
	; CHECKV7A-NEXT: lsrs r1, r1, #17
	; CHECKV7A-NEXT: orr.w r1, r1, r3, lsl #15
	; CHECKV7A-NEXT: cmp r1, #0			; CHECKV7A-NEXT: cmp r1, #0
	; CHECKV7A-NEXT: it ne			; CHECKV7A-NEXT: it ne
	; CHECKV7A-NEXT: movne r0, lr			; CHECKV7A-NEXT: movne r0, lr
	; CHECKV7A-NEXT: pop {r4, pc}			; CHECKV7A-NEXT: pop {r4, pc}
	br i1 %c, label %then, label %else			br i1 %c, label %then, label %else
	then:			then:
	%c1 = icmp uge i64 %x, 131072			%c1 = icmp uge i64 %x, 131072
	%c2 = icmp uge i64 %y, 131072			%c2 = icmp uge i64 %y, 131072
	▲ Show 20 Lines • Show All 130 Lines • Show Last 20 Lines

llvm/test/CodeGen/ARM/icmp-shift-opt.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=armv7 %s -o - \| FileCheck %s		; RUN: llc -mtriple=armv7 %s -o - \| FileCheck %s

; Optimize expanded SRL/SHL used as an input of		; Optimize expanded SRL/SHL used as an input of
; SETCC comparing it with zero by removing rotation.		; SETCC comparing it with zero by removing rotation.
;		;
; See https://bugs.llvm.org/show_bug.cgi?id=50197		; See https://bugs.llvm.org/show_bug.cgi?id=50197
define i64 @opt_setcc_lt_power_of_2(i64 %a) nounwind {		define i64 @opt_setcc_lt_power_of_2(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_lt_power_of_2:		; CHECK-LABEL: opt_setcc_lt_power_of_2:
; CHECK: @ %bb.0:		; CHECK: @ %bb.0:
; CHECK-NEXT: .LBB0_1: @ %loop		; CHECK-NEXT: .LBB0_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1		; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r0, r0, #1		; CHECK-NEXT: adds r0, r0, #1
; CHECK-NEXT: adc r1, r1, #0		; CHECK-NEXT: adc r1, r1, #0
; CHECK-NEXT: orr r2, r0, r1		; CHECK-NEXT: orr r2, r1, r0, lsr #16
; CHECK-NEXT: uxth r3, r1
; CHECK-NEXT: orr r2, r3, r2, lsr #16
; CHECK-NEXT: cmp r2, #0		; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: bne .LBB0_1		; CHECK-NEXT: bne .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %exit		; CHECK-NEXT: @ %bb.2: @ %exit
; CHECK-NEXT: bx lr		; CHECK-NEXT: bx lr
br label %loop		br label %loop

loop:		loop:
%phi.a = phi i64 [ %a, %0 ], [ %inc, %loop ]		%phi.a = phi i64 [ %a, %0 ], [ %inc, %loop ]
%inc = add i64 %phi.a, 1		%inc = add i64 %phi.a, 1
%cmp = icmp ult i64 %inc, 65536		%cmp = icmp ult i64 %inc, 65536
br i1 %cmp, label %exit, label %loop		br i1 %cmp, label %exit, label %loop

exit:		exit:
ret i64 %inc		ret i64 %inc
}		}

define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind {		define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_eq_zero:		; CHECK-LABEL: opt_setcc_srl_eq_zero:
; CHECK: @ %bb.0:		; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, r1		; CHECK-NEXT: orr r0, r1, r0, lsr #17
; CHECK-NEXT: lsr r0, r0, #17
; CHECK-NEXT: orr r0, r0, r1, lsl #15
; CHECK-NEXT: clz r0, r0		; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5		; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr		; CHECK-NEXT: bx lr
%srl = lshr i64 %a, 17		%srl = lshr i64 %a, 17
%cmp = icmp eq i64 %srl, 0		%cmp = icmp eq i64 %srl, 0
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind {		define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_ne_zero:		; CHECK-LABEL: opt_setcc_srl_ne_zero:
; CHECK: @ %bb.0:		; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, r1		; CHECK-NEXT: orr r0, r1, r0, lsr #17
; CHECK-NEXT: lsr r0, r0, #17
; CHECK-NEXT: orr r0, r0, r1, lsl #15
; CHECK-NEXT: cmp r0, #0		; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwne r0, #1		; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr		; CHECK-NEXT: bx lr
%srl = lshr i64 %a, 17		%srl = lshr i64 %a, 17
%cmp = icmp ne i64 %srl, 0		%cmp = icmp ne i64 %srl, 0
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind {		define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_eq_zero:		; CHECK-LABEL: opt_setcc_shl_eq_zero:
; CHECK: @ %bb.0:		; CHECK: @ %bb.0:
; CHECK-NEXT: orr r1, r1, r0		; CHECK-NEXT: orr r0, r0, r1, lsl #17
; CHECK-NEXT: lsl r1, r1, #17
; CHECK-NEXT: orr r0, r1, r0, lsr #15
; CHECK-NEXT: clz r0, r0		; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5		; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr		; CHECK-NEXT: bx lr
%shl = shl i64 %a, 17		%shl = shl i64 %a, 17
%cmp = icmp eq i64 %shl, 0		%cmp = icmp eq i64 %shl, 0
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @opt_setcc_shl_ne_zero(i64 %a) nounwind {		define i1 @opt_setcc_shl_ne_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero:		; CHECK-LABEL: opt_setcc_shl_ne_zero:
; CHECK: @ %bb.0:		; CHECK: @ %bb.0:
; CHECK-NEXT: orr r1, r1, r0		; CHECK-NEXT: orr r0, r0, r1, lsl #17
; CHECK-NEXT: lsl r1, r1, #17
; CHECK-NEXT: orr r0, r1, r0, lsr #15
; CHECK-NEXT: cmp r0, #0		; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwne r0, #1		; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr		; CHECK-NEXT: bx lr
%shl = shl i64 %a, 17		%shl = shl i64 %a, 17
%cmp = icmp ne i64 %shl, 0		%cmp = icmp ne i64 %shl, 0
ret i1 %cmp		ret i1 %cmp
}		}

Show All 18 Lines	; CHECK-NEXT: pop {r4, r5, r11, pc}
ret i1 %cmp		ret i1 %cmp
}		}

; Check that optimization is applied to DAG having appropriate shape		; Check that optimization is applied to DAG having appropriate shape
; even if there were no actual shift's expansion.		; even if there were no actual shift's expansion.
define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) nounwind {		define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:		; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
; CHECK: @ %bb.0:		; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, r1		; CHECK-NEXT: orr r0, r1, r0, lsl #17
; CHECK-NEXT: lsl r0, r0, #17
; CHECK-NEXT: orr r0, r0, r1, lsr #15
; CHECK-NEXT: clz r0, r0		; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5		; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr		; CHECK-NEXT: bx lr
%shl.a = shl i32 %a, 17		%shl.a = shl i32 %a, 17
%srl.b = lshr i32 %b, 15		%srl.b = lshr i32 %b, 15
%or.0 = or i32 %shl.a, %srl.b		%or.0 = or i32 %shl.a, %srl.b
%shl.b = shl i32 %b, 17		%shl.b = shl i32 %b, 17
%or.1 = or i32 %or.0, %shl.b		%or.1 = or i32 %or.0, %shl.b
▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/rotate_vec.ll

Show First 20 Lines • Show All 224 Lines • ▼ Show 20 Lines	; AVX512-NEXT: retq
%or1 = or <4 x i32> %y, %x		%or1 = or <4 x i32> %y, %x
%sh1 = shl <4 x i32> %or1, <i32 21, i32 21, i32 21, i32 21>		%sh1 = shl <4 x i32> %or1, <i32 21, i32 21, i32 21, i32 21>
%sh2 = lshr <4 x i32> %x, <i32 11, i32 11, i32 11, i32 11>		%sh2 = lshr <4 x i32> %x, <i32 11, i32 11, i32 11, i32 11>
%r = or <4 x i32> %sh2, %sh1		%r = or <4 x i32> %sh2, %sh1
ret <4 x i32> %r		ret <4 x i32> %r
}		}

define <2 x i64> @or_fshr_v2i64(<2 x i64> %x, <2 x i64> %y) {		define <2 x i64> @or_fshr_v2i64(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: or_fshr_v2i64:		; XOP-LABEL: or_fshr_v2i64:
; CHECK: # %bb.0:		; XOP: # %bb.0:
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm1		; XOP-NEXT: vpsrlq $22, %xmm1, %xmm1
; CHECK-NEXT: vpsllq $42, %xmm0, %xmm0		; XOP-NEXT: vprotq $42, %xmm0, %xmm0
; CHECK-NEXT: vpsrlq $22, %xmm1, %xmm1		; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0		; XOP-NEXT: retq
; CHECK-NEXT: retq		;
		; AVX512-LABEL: or_fshr_v2i64:
		; AVX512: # %bb.0:
		; AVX512-NEXT: vpsrlq $22, %xmm1, %xmm1
		; AVX512-NEXT: vprolq $42, %xmm0, %xmm0
		; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
		; AVX512-NEXT: retq
%or1 = or <2 x i64> %x, %y		%or1 = or <2 x i64> %x, %y
%sh1 = shl <2 x i64> %x, <i64 42, i64 42>		%sh1 = shl <2 x i64> %x, <i64 42, i64 42>
%sh2 = lshr <2 x i64> %or1, <i64 22, i64 22>		%sh2 = lshr <2 x i64> %or1, <i64 22, i64 22>
%r = or <2 x i64> %sh1, %sh2		%r = or <2 x i64> %sh1, %sh2
ret <2 x i64> %r		ret <2 x i64> %r
}		}

declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)		declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)