This is an archive of the discontinued LLVM Phabricator instance.

[SDAG] try to canonicalize logical shift after bswap
ClosedPublic

Authored by spatel on Mar 29 2022, 7:00 AM.

Download Raw Diff

Details

Reviewers

Chenbing.Zheng
RKSimon
craig.topper

Commits

rGe18cc5277fd8: [SDAG] try to canonicalize logical shift after bswap

Summary

When shifting by a byte-multiple:
bswap (shl X, C) --> lshr (bswap X), C
bswap (lshr X, C) --> shl (bswap X), C

This is the backend version of D122010 and an alternative suggested in D120648. There's an extra check to make sure the shift amount is valid that was not in the rough draft.

I'm not sure if there is a larger motivating case for RISCV (bug report?), but the ARM diffs show a benefit from having a late version of the transform (because we do not combine the loads in IR).

Diff Detail

Event Timeline

spatel created this revision.Mar 29 2022, 7:00 AM

Herald added a project: Restricted Project. · View Herald TranscriptMar 29 2022, 7:00 AM

Herald added subscribers: luke957, StephenFan, frasercrmck and 26 others. · View Herald Transcript

spatel requested review of this revision.Mar 29 2022, 7:00 AM

Herald added a project: Restricted Project. · View Herald TranscriptMar 29 2022, 7:00 AM

Herald added subscribers: llvm-commits, • pcwang-thead, MaskRay. · View Herald Transcript

spatel mentioned this in D120648: [DAGCombine] fold (bswap(srl (bswap c), 8*x)) -> (shl c, 8*x).Mar 29 2022, 7:02 AM

As bswap nodes tend to be generated in the DAG via load combines I agree we need this fold here as well as instcombine

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
9756	Add comment describing the fold

Added code comment to describe the transform.

LGTM - cheers

This revision is now accepted and ready to land.Mar 30 2022, 2:29 AM

Harbormaster completed remote builds in B156771: Diff 418892.Mar 30 2022, 3:28 AM

This revision was landed with ongoing or failed builds.Mar 30 2022, 6:30 AM

Closed by commit rGe18cc5277fd8: [SDAG] try to canonicalize logical shift after bswap (authored by spatel). · Explain Why

This revision was automatically updated to reflect the committed changes.

spatel added a commit: rGe18cc5277fd8: [SDAG] try to canonicalize logical shift after bswap.

Revision Contents

Path

Size

llvm/

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

11 lines

test/

CodeGen/

AArch64/

arm64-rev.ll

3 lines

load-combine-big-endian.ll

3 lines

load-combine.ll

3 lines

ARM/

load-combine-big-endian.ll

9 lines

load-combine.ll

9 lines

RISCV/

bswap-shift.ll

88 lines

X86/

combine-bswap.ll

54 lines

Diff 418864

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,747 Lines • ▼ Show 20 Lines	if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
Res = DAG.getNode(ISD::SHL, DL, VT, Res,		Res = DAG.getNode(ISD::SHL, DL, VT, Res,
DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));		DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);		Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);		Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
return DAG.getZExtOrTrunc(Res, DL, VT);		return DAG.getZExtOrTrunc(Res, DL, VT);
}		}
}		}

		if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL) &&
		RKSimonUnsubmitted Done Reply Inline Actions Add comment describing the fold RKSimon: Add comment describing the fold
		N0.hasOneUse()) {
		auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
		if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
		ShAmt->getZExtValue() % 8 == 0) {
		SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
		unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
		return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
		}
		}

return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {		SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

// fold (bitreverse c1) -> c2		// fold (bitreverse c1) -> c2
▲ Show 20 Lines • Show All 14,715 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/arm64-rev.ll

	Show First 20 Lines • Show All 647 Lines • ▼ Show 20 Lines

	; Reduced regression from D120192			; Reduced regression from D120192
	define void @test_bswap32_narrow(i32* %p0, i16* %p1) nounwind {			define void @test_bswap32_narrow(i32* %p0, i16* %p1) nounwind {
	; CHECK-LABEL: test_bswap32_narrow:			; CHECK-LABEL: test_bswap32_narrow:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill			; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
	; CHECK-NEXT: ldrh w8, [x0, #2]			; CHECK-NEXT: ldrh w8, [x0, #2]
	; CHECK-NEXT: mov x19, x1			; CHECK-NEXT: mov x19, x1
	; CHECK-NEXT: lsl w8, w8, #16			; CHECK-NEXT: rev16 w0, w8
	; CHECK-NEXT: rev w0, w8
	; CHECK-NEXT: bl gid_tbl_len			; CHECK-NEXT: bl gid_tbl_len
	; CHECK-NEXT: strh wzr, [x19]			; CHECK-NEXT: strh wzr, [x19]
	; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload			; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	;			;
	; GISEL-LABEL: test_bswap32_narrow:			; GISEL-LABEL: test_bswap32_narrow:
	; GISEL: // %bb.0:			; GISEL: // %bb.0:
	; GISEL-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill			; GISEL-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
	Show All 17 Lines

llvm/test/CodeGen/AArch64/load-combine-big-endian.ll

Show First 20 Lines • Show All 436 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret i32 %tmp48		ret i32 %tmp48
}		}
; i8* p; // p is 2 byte aligned		; i8* p; // p is 2 byte aligned
; (i32) p[0] \| ((i32) p[1] << 8)		; (i32) p[0] \| ((i32) p[1] << 8)
define i32 @zext_load_i32_by_i8(i32* %arg) {		define i32 @zext_load_i32_by_i8(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8:		; CHECK-LABEL: zext_load_i32_by_i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]		; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: lsl w8, w8, #16		; CHECK-NEXT: rev16 w0, w8
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*		%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0		%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
%tmp2 = load i8, i8* %tmp1, align 2		%tmp2 = load i8, i8* %tmp1, align 2
%tmp3 = zext i8 %tmp2 to i32		%tmp3 = zext i8 %tmp2 to i32
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1		%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp5 = load i8, i8* %tmp4, align 1		%tmp5 = load i8, i8* %tmp4, align 1
%tmp6 = zext i8 %tmp5 to i32		%tmp6 = zext i8 %tmp5 to i32
▲ Show 20 Lines • Show All 137 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/load-combine.ll

Show First 20 Lines • Show All 493 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret i32 %tmp8		ret i32 %tmp8
}		}
; i8* p; // p is 2 byte aligned		; i8* p; // p is 2 byte aligned
; (i32) p[1] \| ((i32) p[0] << 8)		; (i32) p[1] \| ((i32) p[0] << 8)
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {		define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8_bswap:		; CHECK-LABEL: zext_load_i32_by_i8_bswap:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]		; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: lsl w8, w8, #16		; CHECK-NEXT: rev16 w0, w8
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret

%tmp = bitcast i32* %arg to i8*		%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1		%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 1		%tmp2 = load i8, i8* %tmp1, align 1
%tmp3 = zext i8 %tmp2 to i32		%tmp3 = zext i8 %tmp2 to i32
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0		%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
%tmp5 = load i8, i8* %tmp4, align 2		%tmp5 = load i8, i8* %tmp4, align 2
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines

llvm/test/CodeGen/ARM/load-combine-big-endian.ll

	Show First 20 Lines • Show All 819 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ldrb r1, [r0]			; CHECK-NEXT: ldrb r1, [r0]
	; CHECK-NEXT: ldrb r0, [r0, #1]			; CHECK-NEXT: ldrb r0, [r0, #1]
	; CHECK-NEXT: orr r0, r1, r0, lsl #8			; CHECK-NEXT: orr r0, r1, r0, lsl #8
	; CHECK-NEXT: mov pc, lr			; CHECK-NEXT: mov pc, lr
	;			;
	; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:			; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
	; CHECK-ARMv6: @ %bb.0:			; CHECK-ARMv6: @ %bb.0:
	; CHECK-ARMv6-NEXT: ldrh r0, [r0]			; CHECK-ARMv6-NEXT: ldrh r0, [r0]
	; CHECK-ARMv6-NEXT: lsl r0, r0, #16			; CHECK-ARMv6-NEXT: rev16 r0, r0
	; CHECK-ARMv6-NEXT: rev r0, r0
	; CHECK-ARMv6-NEXT: bx lr			; CHECK-ARMv6-NEXT: bx lr
	;			;
	; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:			; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
	; CHECK-THUMBv6: @ %bb.0:			; CHECK-THUMBv6: @ %bb.0:
	; CHECK-THUMBv6-NEXT: ldrh r0, [r0]			; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
	; CHECK-THUMBv6-NEXT: lsls r0, r0, #16			; CHECK-THUMBv6-NEXT: rev16 r0, r0
	; CHECK-THUMBv6-NEXT: rev r0, r0
	; CHECK-THUMBv6-NEXT: bx lr			; CHECK-THUMBv6-NEXT: bx lr
	;			;
	; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:			; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
	; CHECK-THUMBv7: @ %bb.0:			; CHECK-THUMBv7: @ %bb.0:
	; CHECK-THUMBv7-NEXT: ldrh r0, [r0]			; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
	; CHECK-THUMBv7-NEXT: lsls r0, r0, #16			; CHECK-THUMBv7-NEXT: rev16 r0, r0
	; CHECK-THUMBv7-NEXT: rev r0, r0
	; CHECK-THUMBv7-NEXT: bx lr			; CHECK-THUMBv7-NEXT: bx lr

	%tmp = bitcast i32* %arg to i8*			%tmp = bitcast i32* %arg to i8*
	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0			%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
	%tmp2 = load i8, i8* %tmp1, align 2			%tmp2 = load i8, i8* %tmp1, align 2
	%tmp3 = zext i8 %tmp2 to i32			%tmp3 = zext i8 %tmp2 to i32
	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1			%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
	%tmp5 = load i8, i8* %tmp4, align 1			%tmp5 = load i8, i8* %tmp4, align 1
	▲ Show 20 Lines • Show All 287 Lines • Show Last 20 Lines

llvm/test/CodeGen/ARM/load-combine.ll

	Show First 20 Lines • Show All 869 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ldrb r1, [r0]			; CHECK-NEXT: ldrb r1, [r0]
	; CHECK-NEXT: ldrb r0, [r0, #1]			; CHECK-NEXT: ldrb r0, [r0, #1]
	; CHECK-NEXT: orr r0, r0, r1, lsl #8			; CHECK-NEXT: orr r0, r0, r1, lsl #8
	; CHECK-NEXT: mov pc, lr			; CHECK-NEXT: mov pc, lr
	;			;
	; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:			; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
	; CHECK-ARMv6: @ %bb.0:			; CHECK-ARMv6: @ %bb.0:
	; CHECK-ARMv6-NEXT: ldrh r0, [r0]			; CHECK-ARMv6-NEXT: ldrh r0, [r0]
	; CHECK-ARMv6-NEXT: lsl r0, r0, #16			; CHECK-ARMv6-NEXT: rev16 r0, r0
	; CHECK-ARMv6-NEXT: rev r0, r0
	; CHECK-ARMv6-NEXT: bx lr			; CHECK-ARMv6-NEXT: bx lr
	;			;
	; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:			; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
	; CHECK-THUMBv6: @ %bb.0:			; CHECK-THUMBv6: @ %bb.0:
	; CHECK-THUMBv6-NEXT: ldrh r0, [r0]			; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
	; CHECK-THUMBv6-NEXT: lsls r0, r0, #16			; CHECK-THUMBv6-NEXT: rev16 r0, r0
	; CHECK-THUMBv6-NEXT: rev r0, r0
	; CHECK-THUMBv6-NEXT: bx lr			; CHECK-THUMBv6-NEXT: bx lr
	;			;
	; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:			; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
	; CHECK-THUMBv7: @ %bb.0:			; CHECK-THUMBv7: @ %bb.0:
	; CHECK-THUMBv7-NEXT: ldrh r0, [r0]			; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
	; CHECK-THUMBv7-NEXT: lsls r0, r0, #16			; CHECK-THUMBv7-NEXT: rev16 r0, r0
	; CHECK-THUMBv7-NEXT: rev r0, r0
	; CHECK-THUMBv7-NEXT: bx lr			; CHECK-THUMBv7-NEXT: bx lr

	%tmp = bitcast i32* %arg to i8*			%tmp = bitcast i32* %arg to i8*
	%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1			%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
	%tmp2 = load i8, i8* %tmp1, align 1			%tmp2 = load i8, i8* %tmp1, align 1
	%tmp3 = zext i8 %tmp2 to i32			%tmp3 = zext i8 %tmp2 to i32
	%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0			%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
	%tmp5 = load i8, i8* %tmp4, align 2			%tmp5 = load i8, i8* %tmp4, align 2
	▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/bswap-shift.ll

Show All 35 Lines	; RV64ZB-NEXT: ret
%2 = lshr i16 %1, 7		%2 = lshr i16 %1, 7
%3 = call i16 @llvm.bswap.i16(i16 %2)		%3 = call i16 @llvm.bswap.i16(i16 %2)
ret i16 %3		ret i16 %3
}		}

define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {		define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
; RV32ZB-LABEL: test_bswap_srli_8_bswap_i16:		; RV32ZB-LABEL: test_bswap_srli_8_bswap_i16:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: andi a0, a0, 255		; RV32ZB-NEXT: slli a0, a0, 8
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: srli a0, a0, 16
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_srli_8_bswap_i16:		; RV64ZB-LABEL: test_bswap_srli_8_bswap_i16:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: andi a0, a0, 255		; RV64ZB-NEXT: slli a0, a0, 8
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 48
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i16 @llvm.bswap.i16(i16 %a)		%1 = call i16 @llvm.bswap.i16(i16 %a)
%2 = lshr i16 %1, 8		%2 = lshr i16 %1, 8
%3 = call i16 @llvm.bswap.i16(i16 %2)		%3 = call i16 @llvm.bswap.i16(i16 %2)
ret i16 %3		ret i16 %3
}		}

define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {		define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
; RV32ZB-LABEL: test_bswap_srli_8_bswap_i32:		; RV32ZB-LABEL: test_bswap_srli_8_bswap_i32:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: rev8 a0, a0		; RV32ZB-NEXT: slli a0, a0, 8
; RV32ZB-NEXT: srli a0, a0, 8
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_srli_8_bswap_i32:		; RV64ZB-LABEL: test_bswap_srli_8_bswap_i32:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: rev8 a0, a0		; RV64ZB-NEXT: slliw a0, a0, 8
; RV64ZB-NEXT: srli a0, a0, 40
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 32
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i32 @llvm.bswap.i32(i32 %a)		%1 = call i32 @llvm.bswap.i32(i32 %a)
%2 = lshr i32 %1, 8		%2 = lshr i32 %1, 8
%3 = call i32 @llvm.bswap.i32(i32 %2)		%3 = call i32 @llvm.bswap.i32(i32 %2)
ret i32 %3		ret i32 %3
}		}

define i32 @test_bswap_srli_16_bswap_i32(i32 %a) nounwind {		define i32 @test_bswap_srli_16_bswap_i32(i32 %a) nounwind {
; RV32ZB-LABEL: test_bswap_srli_16_bswap_i32:		; RV32ZB-LABEL: test_bswap_srli_16_bswap_i32:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: rev8 a0, a0		; RV32ZB-NEXT: slli a0, a0, 16
; RV32ZB-NEXT: srli a0, a0, 16
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_srli_16_bswap_i32:		; RV64ZB-LABEL: test_bswap_srli_16_bswap_i32:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: rev8 a0, a0		; RV64ZB-NEXT: slliw a0, a0, 16
; RV64ZB-NEXT: srli a0, a0, 48
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 32
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i32 @llvm.bswap.i32(i32 %a)		%1 = call i32 @llvm.bswap.i32(i32 %a)
%2 = lshr i32 %1, 16		%2 = lshr i32 %1, 16
%3 = call i32 @llvm.bswap.i32(i32 %2)		%3 = call i32 @llvm.bswap.i32(i32 %2)
ret i32 %3		ret i32 %3
}		}

define i32 @test_bswap_srli_24_bswap_i32(i32 %a) nounwind {		define i32 @test_bswap_srli_24_bswap_i32(i32 %a) nounwind {
; RV32ZB-LABEL: test_bswap_srli_24_bswap_i32:		; RV32ZB-LABEL: test_bswap_srli_24_bswap_i32:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: andi a0, a0, 255		; RV32ZB-NEXT: slli a0, a0, 24
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_srli_24_bswap_i32:		; RV64ZB-LABEL: test_bswap_srli_24_bswap_i32:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: andi a0, a0, 255		; RV64ZB-NEXT: slliw a0, a0, 24
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 32
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i32 @llvm.bswap.i32(i32 %a)		%1 = call i32 @llvm.bswap.i32(i32 %a)
%2 = lshr i32 %1, 24		%2 = lshr i32 %1, 24
%3 = call i32 @llvm.bswap.i32(i32 %2)		%3 = call i32 @llvm.bswap.i32(i32 %2)
ret i32 %3		ret i32 %3
}		}

define i64 @test_bswap_srli_48_bswap_i64(i64 %a) nounwind {		define i64 @test_bswap_srli_48_bswap_i64(i64 %a) nounwind {
; RV32ZB-LABEL: test_bswap_srli_48_bswap_i64:		; RV32ZB-LABEL: test_bswap_srli_48_bswap_i64:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: rev8 a0, a0		; RV32ZB-NEXT: slli a1, a0, 16
; RV32ZB-NEXT: srli a0, a0, 16
; RV32ZB-NEXT: rev8 a1, a0
; RV32ZB-NEXT: li a0, 0		; RV32ZB-NEXT: li a0, 0
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_srli_48_bswap_i64:		; RV64ZB-LABEL: test_bswap_srli_48_bswap_i64:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: rev8 a0, a0		; RV64ZB-NEXT: slli a0, a0, 48
; RV64ZB-NEXT: srli a0, a0, 48
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i64 @llvm.bswap.i64(i64 %a)		%1 = call i64 @llvm.bswap.i64(i64 %a)
%2 = lshr i64 %1, 48		%2 = lshr i64 %1, 48
%3 = call i64 @llvm.bswap.i64(i64 %2)		%3 = call i64 @llvm.bswap.i64(i64 %2)
ret i64 %3		ret i64 %3
}		}

define i16 @test_bswap_shli_7_bswap_i16(i16 %a) nounwind {		define i16 @test_bswap_shli_7_bswap_i16(i16 %a) nounwind {
Show All 18 Lines	; RV64ZB-NEXT: ret
%2 = shl i16 %1, 7		%2 = shl i16 %1, 7
%3 = call i16 @llvm.bswap.i16(i16 %2)		%3 = call i16 @llvm.bswap.i16(i16 %2)
ret i16 %3		ret i16 %3
}		}

define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {		define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
; RV32ZB-LABEL: test_bswap_shli_8_bswap_i16:		; RV32ZB-LABEL: test_bswap_shli_8_bswap_i16:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: andi a0, a0, -256		; RV32ZB-NEXT: slli a0, a0, 16
; RV32ZB-NEXT: rev8 a0, a0		; RV32ZB-NEXT: srli a0, a0, 24
; RV32ZB-NEXT: srli a0, a0, 16
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_shli_8_bswap_i16:		; RV64ZB-LABEL: test_bswap_shli_8_bswap_i16:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: andi a0, a0, -256		; RV64ZB-NEXT: slli a0, a0, 48
; RV64ZB-NEXT: rev8 a0, a0		; RV64ZB-NEXT: srli a0, a0, 56
; RV64ZB-NEXT: srli a0, a0, 48
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i16 @llvm.bswap.i16(i16 %a)		%1 = call i16 @llvm.bswap.i16(i16 %a)
%2 = shl i16 %1, 8		%2 = shl i16 %1, 8
%3 = call i16 @llvm.bswap.i16(i16 %2)		%3 = call i16 @llvm.bswap.i16(i16 %2)
ret i16 %3		ret i16 %3
}		}

define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {		define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
; RV32ZB-LABEL: test_bswap_shli_8_bswap_i32:		; RV32ZB-LABEL: test_bswap_shli_8_bswap_i32:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: rev8 a0, a0		; RV32ZB-NEXT: srli a0, a0, 8
; RV32ZB-NEXT: slli a0, a0, 8
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_shli_8_bswap_i32:		; RV64ZB-LABEL: test_bswap_shli_8_bswap_i32:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: rev8 a0, a0		; RV64ZB-NEXT: srliw a0, a0, 8
; RV64ZB-NEXT: srli a0, a0, 24
; RV64ZB-NEXT: andi a0, a0, -256
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 32
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i32 @llvm.bswap.i32(i32 %a)		%1 = call i32 @llvm.bswap.i32(i32 %a)
%2 = shl i32 %1, 8		%2 = shl i32 %1, 8
%3 = call i32 @llvm.bswap.i32(i32 %2)		%3 = call i32 @llvm.bswap.i32(i32 %2)
ret i32 %3		ret i32 %3
}		}

define i32 @test_bswap_shli_16_bswap_i32(i32 %a) nounwind {		define i32 @test_bswap_shli_16_bswap_i32(i32 %a) nounwind {
; RV32ZB-LABEL: test_bswap_shli_16_bswap_i32:		; RV32ZB-LABEL: test_bswap_shli_16_bswap_i32:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: rev8 a0, a0		; RV32ZB-NEXT: srli a0, a0, 16
; RV32ZB-NEXT: slli a0, a0, 16
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_shli_16_bswap_i32:		; RV64ZB-LABEL: test_bswap_shli_16_bswap_i32:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: rev8 a0, a0		; RV64ZB-NEXT: srliw a0, a0, 16
; RV64ZB-NEXT: srli a0, a0, 32
; RV64ZB-NEXT: slli a0, a0, 16
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 32
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i32 @llvm.bswap.i32(i32 %a)		%1 = call i32 @llvm.bswap.i32(i32 %a)
%2 = shl i32 %1, 16		%2 = shl i32 %1, 16
%3 = call i32 @llvm.bswap.i32(i32 %2)		%3 = call i32 @llvm.bswap.i32(i32 %2)
ret i32 %3		ret i32 %3
}		}

define i32 @test_bswap_shli_24_bswap_i32(i32 %a) nounwind {		define i32 @test_bswap_shli_24_bswap_i32(i32 %a) nounwind {
; RV32ZB-LABEL: test_bswap_shli_24_bswap_i32:		; RV32ZB-LABEL: test_bswap_shli_24_bswap_i32:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: lui a1, 1044480		; RV32ZB-NEXT: srli a0, a0, 24
; RV32ZB-NEXT: and a0, a0, a1
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_shli_24_bswap_i32:		; RV64ZB-LABEL: test_bswap_shli_24_bswap_i32:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: lui a1, 1044480		; RV64ZB-NEXT: srliw a0, a0, 24
; RV64ZB-NEXT: and a0, a0, a1
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 32
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i32 @llvm.bswap.i32(i32 %a)		%1 = call i32 @llvm.bswap.i32(i32 %a)
%2 = shl i32 %1, 24		%2 = shl i32 %1, 24
%3 = call i32 @llvm.bswap.i32(i32 %2)		%3 = call i32 @llvm.bswap.i32(i32 %2)
ret i32 %3		ret i32 %3
}		}

define i64 @test_bswap_shli_48_bswap_i64(i64 %a) nounwind {		define i64 @test_bswap_shli_48_bswap_i64(i64 %a) nounwind {
; RV32ZB-LABEL: test_bswap_shli_48_bswap_i64:		; RV32ZB-LABEL: test_bswap_shli_48_bswap_i64:
; RV32ZB: # %bb.0:		; RV32ZB: # %bb.0:
; RV32ZB-NEXT: rev8 a0, a1		; RV32ZB-NEXT: srli a0, a1, 16
; RV32ZB-NEXT: slli a0, a0, 16
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: li a1, 0		; RV32ZB-NEXT: li a1, 0
; RV32ZB-NEXT: ret		; RV32ZB-NEXT: ret
;		;
; RV64ZB-LABEL: test_bswap_shli_48_bswap_i64:		; RV64ZB-LABEL: test_bswap_shli_48_bswap_i64:
; RV64ZB: # %bb.0:		; RV64ZB: # %bb.0:
; RV64ZB-NEXT: rev8 a0, a0		; RV64ZB-NEXT: srli a0, a0, 48
; RV64ZB-NEXT: slli a0, a0, 48
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: ret		; RV64ZB-NEXT: ret
%1 = call i64 @llvm.bswap.i64(i64 %a)		%1 = call i64 @llvm.bswap.i64(i64 %a)
%2 = shl i64 %1, 48		%2 = shl i64 %1, 48
%3 = call i64 @llvm.bswap.i64(i64 %2)		%3 = call i64 @llvm.bswap.i64(i64 %2)
ret i64 %3		ret i64 %3
}		}

llvm/test/CodeGen/X86/combine-bswap.ll

	Show All 33 Lines
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax			; X64-NEXT: movl %edi, %eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%b = call i32 @llvm.bswap.i32(i32 %a0)			%b = call i32 @llvm.bswap.i32(i32 %a0)
	%c = call i32 @llvm.bswap.i32(i32 %b)			%c = call i32 @llvm.bswap.i32(i32 %b)
	ret i32 %c			ret i32 %c
	}			}

	; TODO: fold (bswap(srl (bswap c), x)) -> (shl c, x)
	define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {			define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
	; X86-LABEL: test_bswap_srli_8_bswap_i16:			; X86-LABEL: test_bswap_srli_8_bswap_i16:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: rolw $8, %ax			; X86-NEXT: shll $8, %eax
	; X86-NEXT: # kill: def $ax killed $ax killed $eax			; X86-NEXT: # kill: def $ax killed $ax killed $eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: test_bswap_srli_8_bswap_i16:			; X64-LABEL: test_bswap_srli_8_bswap_i16:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movzbl %dil, %eax			; X64-NEXT: movl %edi, %eax
	; X64-NEXT: rolw $8, %ax			; X64-NEXT: shll $8, %eax
	; X64-NEXT: # kill: def $ax killed $ax killed $eax			; X64-NEXT: # kill: def $ax killed $ax killed $eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%1 = call i16 @llvm.bswap.i16(i16 %a)			%1 = call i16 @llvm.bswap.i16(i16 %a)
	%2 = lshr i16 %1, 8			%2 = lshr i16 %1, 8
	%3 = call i16 @llvm.bswap.i16(i16 %2)			%3 = call i16 @llvm.bswap.i16(i16 %2)
	ret i16 %3			ret i16 %3
	}			}

	define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {			define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
	; X86-LABEL: test_bswap_srli_8_bswap_i32:			; X86-LABEL: test_bswap_srli_8_bswap_i32:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: bswapl %eax			; X86-NEXT: shll $8, %eax
	; X86-NEXT: shrl $8, %eax
	; X86-NEXT: bswapl %eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: test_bswap_srli_8_bswap_i32:			; X64-LABEL: test_bswap_srli_8_bswap_i32:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax			; X64-NEXT: movl %edi, %eax
	; X64-NEXT: bswapl %eax			; X64-NEXT: shll $8, %eax
	; X64-NEXT: shrl $8, %eax
	; X64-NEXT: bswapl %eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%1 = call i32 @llvm.bswap.i32(i32 %a)			%1 = call i32 @llvm.bswap.i32(i32 %a)
	%2 = lshr i32 %1, 8			%2 = lshr i32 %1, 8
	%3 = call i32 @llvm.bswap.i32(i32 %2)			%3 = call i32 @llvm.bswap.i32(i32 %2)
	ret i32 %3			ret i32 %3
	}			}

	define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind {			define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind {
	; X86-LABEL: test_bswap_srli_16_bswap_i64:			; X86-LABEL: test_bswap_srli_16_bswap_i64:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movl {{[0-9]+}}(%esp), %edx			; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
	; X86-NEXT: bswapl %edx			; X86-NEXT: shll $16, %eax
	; X86-NEXT: bswapl %eax
	; X86-NEXT: shrdl $16, %eax, %edx
	; X86-NEXT: shrl $16, %eax
	; X86-NEXT: bswapl %edx
	; X86-NEXT: bswapl %eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: test_bswap_srli_16_bswap_i64:			; X64-LABEL: test_bswap_srli_16_bswap_i64:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movq %rdi, %rax			; X64-NEXT: movq %rdi, %rax
	; X64-NEXT: bswapq %rax			; X64-NEXT: shlq $16, %rax
	; X64-NEXT: shrq $16, %rax
	; X64-NEXT: bswapq %rax
	; X64-NEXT: retq			; X64-NEXT: retq
	%1 = call i64 @llvm.bswap.i64(i64 %a)			%1 = call i64 @llvm.bswap.i64(i64 %a)
	%2 = lshr i64 %1, 16			%2 = lshr i64 %1, 16
	%3 = call i64 @llvm.bswap.i64(i64 %2)			%3 = call i64 @llvm.bswap.i64(i64 %2)
	ret i64 %3			ret i64 %3
	}			}

	; TODO: fold (bswap(shl (bswap c), x)) -> (srl c, x)
	define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {			define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
	; X86-LABEL: test_bswap_shli_8_bswap_i16:			; X86-LABEL: test_bswap_shli_8_bswap_i16:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: shll $8, %eax
	; X86-NEXT: rolw $8, %ax
	; X86-NEXT: # kill: def $ax killed $ax killed $eax			; X86-NEXT: # kill: def $ax killed $ax killed $eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: test_bswap_shli_8_bswap_i16:			; X64-LABEL: test_bswap_shli_8_bswap_i16:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax			; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $65280, %eax # imm = 0xFF00			; X64-NEXT: movzbl %ah, %eax
	; X64-NEXT: rolw $8, %ax
	; X64-NEXT: # kill: def $ax killed $ax killed $eax			; X64-NEXT: # kill: def $ax killed $ax killed $eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%1 = call i16 @llvm.bswap.i16(i16 %a)			%1 = call i16 @llvm.bswap.i16(i16 %a)
	%2 = shl i16 %1, 8			%2 = shl i16 %1, 8
	%3 = call i16 @llvm.bswap.i16(i16 %2)			%3 = call i16 @llvm.bswap.i16(i16 %2)
	ret i16 %3			ret i16 %3
	}			}

	define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {			define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
	; X86-LABEL: test_bswap_shli_8_bswap_i32:			; X86-LABEL: test_bswap_shli_8_bswap_i32:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: bswapl %eax			; X86-NEXT: shrl $8, %eax
	; X86-NEXT: shll $8, %eax
	; X86-NEXT: bswapl %eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: test_bswap_shli_8_bswap_i32:			; X64-LABEL: test_bswap_shli_8_bswap_i32:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax			; X64-NEXT: movl %edi, %eax
	; X64-NEXT: bswapl %eax			; X64-NEXT: shrl $8, %eax
	; X64-NEXT: shll $8, %eax
	; X64-NEXT: bswapl %eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%1 = call i32 @llvm.bswap.i32(i32 %a)			%1 = call i32 @llvm.bswap.i32(i32 %a)
	%2 = shl i32 %1, 8			%2 = shl i32 %1, 8
	%3 = call i32 @llvm.bswap.i32(i32 %2)			%3 = call i32 @llvm.bswap.i32(i32 %2)
	ret i32 %3			ret i32 %3
	}			}

	define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind {			define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind {
	; X86-LABEL: test_bswap_shli_16_bswap_i64:			; X86-LABEL: test_bswap_shli_16_bswap_i64:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx			; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
	; X86-NEXT: bswapl %ecx
	; X86-NEXT: bswapl %eax
	; X86-NEXT: shldl $16, %ecx, %eax
	; X86-NEXT: bswapl %eax
	; X86-NEXT: rolw $8, %cx
	; X86-NEXT: movzwl %cx, %edx
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: test_bswap_shli_16_bswap_i64:			; X64-LABEL: test_bswap_shli_16_bswap_i64:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movq %rdi, %rax			; X64-NEXT: movq %rdi, %rax
	; X64-NEXT: bswapq %rax			; X64-NEXT: shrq $16, %rax
	; X64-NEXT: shlq $16, %rax
	; X64-NEXT: bswapq %rax
	; X64-NEXT: retq			; X64-NEXT: retq
	%1 = call i64 @llvm.bswap.i64(i64 %a)			%1 = call i64 @llvm.bswap.i64(i64 %a)
	%2 = shl i64 %1, 16			%2 = shl i64 %1, 16
	%3 = call i64 @llvm.bswap.i64(i64 %2)			%3 = call i64 @llvm.bswap.i64(i64 %2)
	ret i64 %3			ret i64 %3
	}			}

	define i32 @test_demandedbits_bswap(i32 %a0) nounwind {			define i32 @test_demandedbits_bswap(i32 %a0) nounwind {
	▲ Show 20 Lines • Show All 153 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SDAG] try to canonicalize logical shift after bswapClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 418864

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/AArch64/arm64-rev.ll

llvm/test/CodeGen/AArch64/load-combine-big-endian.ll

llvm/test/CodeGen/AArch64/load-combine.ll

llvm/test/CodeGen/ARM/load-combine-big-endian.ll

llvm/test/CodeGen/ARM/load-combine.ll

llvm/test/CodeGen/RISCV/bswap-shift.ll

llvm/test/CodeGen/X86/combine-bswap.ll

[SDAG] try to canonicalize logical shift after bswap
ClosedPublic