Diff 402827

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,958 Lines • ▼ Show 20 Lines	if (N1C && !N1C->isOpaque())
if (SDValue NewSRA = visitShiftByConstant(N))		if (SDValue NewSRA = visitShiftByConstant(N))
return NewSRA;		return NewSRA;

// Try to transform this shift into a multiply-high if		// Try to transform this shift into a multiply-high if
// it matches the appropriate pattern detected in combineShiftToMULH.		// it matches the appropriate pattern detected in combineShiftToMULH.
if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))		if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
return MULH;		return MULH;

		// Attempt to convert a sra of a load into a narrower sign-extending load.
		if (SDValue NarrowLoad = reduceLoadWidth(N))
		return NarrowLoad;

return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::visitSRL(SDNode *N) {		SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
if (SDValue V = DAG.simplifyShift(N0, N1))		if (SDValue V = DAG.simplifyShift(N0, N1))
return V;		return V;
▲ Show 20 Lines • Show All 3,171 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
// to indicate that the narrowed load should be left-shifted ShAmt bits to get		// to indicate that the narrowed load should be left-shifted ShAmt bits to get
// the result.		// the result.
bool HasShiftedOffset = false;		bool HasShiftedOffset = false;
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then		// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.		// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {		if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;		ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();		ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
} else if (Opc == ISD::SRL) {		} else if (Opc == ISD::SRL \|\| Opc == ISD::SRA) {
// Another special-case: SRL is basically zero-extending a narrower value,		// Another special-case: SRL/SRA is basically zero/sign-extending a narrower
// or it may be shifting a higher subword, half or byte into the lowest		// value, or it may be shifting a higher subword, half or byte into the
// bits.		// lowest bits.

// Only handle shift with constant shift amount, and the shiftee must be a		// Only handle shift with constant shift amount, and the shiftee must be a
// load.		// load.
auto *LN = dyn_cast<LoadSDNode>(N0);		auto *LN = dyn_cast<LoadSDNode>(N0);
auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));		auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!N1C \|\| !LN)		if (!N1C \|\| !LN)
return SDValue();		return SDValue();
// If the shift amount is larger than the memory type then we're not		// If the shift amount is larger than the memory type then we're not
// accessing any of the loaded bytes.		// accessing any of the loaded bytes.
ShAmt = N1C->getZExtValue();		ShAmt = N1C->getZExtValue();
uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();		uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
if (MemoryWidth <= ShAmt)		if (MemoryWidth <= ShAmt)
		spatelUnsubmitted Not Done Reply Inline Actions The various shift amount variables are difficult to follow. This is really a question for the existing code - can we make it clearer (either through renaming or code comments) how "ShAmt", "ShiftAmt" and "ShLeftAmt" are different? If we can improve that, is it possible to make a small helper/lambda, so we can share the bailout conditions for SRA/SRL instead of duplicating code? spatel: The various shift amount variables are difficult to follow. This is really a question for the…
		bjopeAuthorUnsubmitted Done Reply Inline Actions Yes. I thought the old names were kind of confusing. So I added the SRA separately. I'll make a separate patch, trying to improve the existing code as well. One reason I did not share things with SRL (I started off trying to do it that way) is that the solution for SRL seemed to be divided into two parts. I believe the second part (that includes the hasOneUse guard) can be triggered also as being nestled inside an AND (N pointing at the AND and N0 being the SRL). Although, we can probably do some code sharing for the first part regardless of that, at least now when I understand that the unexplained "N0 = SDValue(N, 0)" only should be done only for SRL and not for SRA. bjope: Yes. I thought the old names were kind of confusing. So I added the SRA separately. I'll make a…
return SDValue();		return SDValue();
// Attempt to fold away the SRL by using ZEXTLOAD.		// Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
ExtType = ISD::ZEXTLOAD;		ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);		ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
// If original load is a SEXTLOAD then we can't simply replace it by a		// If original load is a SEXTLOAD then we can't simply replace it by a
// ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD		// ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
// followed by a ZEXT, but that is not handled at the moment).		// followed by a ZEXT, but that is not handled at the moment). Similarly if
if (LN->getExtensionType() == ISD::SEXTLOAD)		// the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
		if ((LN->getExtensionType() == ISD::SEXTLOAD \|\|
		LN->getExtensionType() == ISD::ZEXTLOAD) &&
		LN->getExtensionType() != ExtType)
return SDValue();		return SDValue();
} else if (Opc == ISD::AND) {		} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.		// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));		auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!AndC)		if (!AndC)
return SDValue();		return SDValue();

const APInt &Mask = AndC->getAPIntValue();		const APInt &Mask = AndC->getAPIntValue();
▲ Show 20 Lines • Show All 11,914 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/pr13891.ll

	; RUN: llc -verify-machineinstrs < %s \| FileCheck %s			; RUN: llc -verify-machineinstrs < %s \| FileCheck %s
	target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"			target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
	target triple = "powerpc64-unknown-linux-gnu"			target triple = "powerpc64-unknown-linux-gnu"

	%struct.foo = type { i8, i8 }			%struct.foo = type { i8, i8 }

	define void @_Z5check3foos(%struct.foo* nocapture byval(%struct.foo) %f, i16 signext %i) noinline {			define void @_Z5check3foos(%struct.foo* nocapture byval(%struct.foo) %f, i16 signext %i) noinline {
	; CHECK-LABEL: _Z5check3foos:			; CHECK-LABEL: _Z5check3foos:
	; CHECK: sth 3, {{[0-9]+}}(1)			; CHECK: sth 3, {{[0-9]+}}(1)
	; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)			; CHECK: lbz {{[0-9]+}}, {{[0-9]+}}(1)
	entry:			entry:
	%0 = bitcast %struct.foo* %f to i16*			%0 = bitcast %struct.foo* %f to i16*
	%1 = load i16, i16* %0, align 2			%1 = load i16, i16* %0, align 2
	%bf.val.sext = ashr i16 %1, 8			%bf.val.sext = ashr i16 %1, 8
	%cmp = icmp eq i16 %bf.val.sext, %i			%cmp = icmp eq i16 %bf.val.sext, %i
	br i1 %cmp, label %if.end, label %if.then			br i1 %cmp, label %if.end, label %if.then

	if.then: ; preds = %entry			if.then: ; preds = %entry
	Show All 9 Lines

llvm/test/CodeGen/X86/combine-sra-load.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown \| FileCheck %s --check-prefix=CHECK			; RUN: llc < %s -mtriple=x86_64-unknown-unknown \| FileCheck %s --check-prefix=CHECK

	; FIXME: fold (sra (load i32), 16)) -> (sextload i16)			; fold (sra (load i32), 16)) -> (sextload i16)
	define i32 @sra_half(i32* %p) {			define i32 @sra_half(i32* %p) {
	; CHECK-LABEL: sra_half:			; CHECK-LABEL: sra_half:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movl (%rdi), %eax			; CHECK-NEXT: movswl 2(%rdi), %eax
	; CHECK-NEXT: sarl $16, %eax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%load = load i32, i32* %p			%load = load i32, i32* %p
	%shift = ashr i32 %load, 16			%shift = ashr i32 %load, 16
	ret i32 %shift			ret i32 %shift
	}			}

	; Vector version not folded.			; Vector version not folded.
	define <4 x i32> @sra_half_vec(<4 x i32>* %p) {			define <4 x i32> @sra_half_vec(<4 x i32>* %p) {
	; CHECK-LABEL: sra_half_vec:			; CHECK-LABEL: sra_half_vec:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movdqa (%rdi), %xmm0			; CHECK-NEXT: movdqa (%rdi), %xmm0
	; CHECK-NEXT: psrad $16, %xmm0			; CHECK-NEXT: psrad $16, %xmm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%load = load <4 x i32>, <4 x i32>* %p			%load = load <4 x i32>, <4 x i32>* %p
	%shift = ashr <4 x i32> %load, <i32 16, i32 16, i32 16, i32 16>			%shift = ashr <4 x i32> %load, <i32 16, i32 16, i32 16, i32 16>
	ret <4 x i32> %shift			ret <4 x i32> %shift
	}			}

	; FIXME: fold (sra (load i64), 48)) -> (sextload i16)			; fold (sra (load i64), 48)) -> (sextload i16)
	define i64 @sra_large_shift(i64* %r) {			define i64 @sra_large_shift(i64* %r) {
	; CHECK-LABEL: sra_large_shift:			; CHECK-LABEL: sra_large_shift:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movq (%rdi), %rax			; CHECK-NEXT: movswq 6(%rdi), %rax
	; CHECK-NEXT: sarq $48, %rax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%t0 = load i64, i64* %r			%t0 = load i64, i64* %r
	%conv = ashr i64 %t0, 48			%conv = ashr i64 %t0, 48
	ret i64 %conv			ret i64 %conv
	}			}

	; Negative test, no fold expected.			; Negative test, no fold expected.
	define i32 @sra_small_shift(i32* %p) {			define i32 @sra_small_shift(i32* %p) {
	Show All 14 Lines
	; CHECK-NEXT: movzbl 1(%rdi), %eax			; CHECK-NEXT: movzbl 1(%rdi), %eax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%load = load i16, i16* %p			%load = load i16, i16* %p
	%zext = zext i16 %load to i32			%zext = zext i16 %load to i32
	%shift = ashr i32 %zext, 8			%shift = ashr i32 %zext, 8
	ret i32 %shift			ret i32 %shift
	}			}

	; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8)			; fold (sra (sextload i16 to i32), 8) -> (sextload i8)
	define i32 @sra_of_sextload(i16* %p) {			define i32 @sra_of_sextload(i16* %p) {
	; CHECK-LABEL: sra_of_sextload:			; CHECK-LABEL: sra_of_sextload:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movswl (%rdi), %eax			; CHECK-NEXT: movsbl 1(%rdi), %eax
	; CHECK-NEXT: sarl $8, %eax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%load = load i16, i16* %p			%load = load i16, i16* %p
	%sext = sext i16 %load to i32			%sext = sext i16 %load to i32
	%shift = ashr i32 %sext, 8			%shift = ashr i32 %sext, 8
	ret i32 %shift			ret i32 %shift
	}			}

	; Negative test. If the shift amount is larger than the memory type then			; Negative test. If the shift amount is larger than the memory type then
	; we're not accessing any of the loaded bytes (only the extended bits). So the			; we're not accessing any of the loaded bytes (only the extended bits). So the
	; shift is expected to remain.			; shift is expected to remain.
				spatelUnsubmitted Not Done Reply Inline Actions This comment is not accurate. We are replicating (splatting) the sign bit of the loaded i16 across 32-bits, so there's still a shift. In IR, instcombine would transform this into: define i32 @sra_of_sextload_no_fold(i16* %p) { %load = load i16, i16* %p, align 2 %1 = ashr i16 %load, 15 %shift = sext i16 %1 to i32 ret i32 %shift } spatel: This comment is not accurate. We are replicating (splatting) the sign bit of the loaded i16…
				bjopeAuthorUnsubmitted Done Reply Inline Actions Right, the comment is supposed to say "so we can't fold away the shift". bjope: Right, the comment is supposed to say "so we can't fold away the shift".
	define i32 @sra_of_sextload_no_fold(i16* %p) {			define i32 @sra_of_sextload_no_fold(i16* %p) {
	; CHECK-LABEL: sra_of_sextload_no_fold:			; CHECK-LABEL: sra_of_sextload_no_fold:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movswl (%rdi), %eax			; CHECK-NEXT: movswl (%rdi), %eax
	; CHECK-NEXT: sarl $16, %eax			; CHECK-NEXT: sarl $16, %eax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%load = load i16, i16* %p			%load = load i16, i16* %p
	%sext = sext i16 %load to i32			%sext = sext i16 %load to i32
	%shift = ashr i32 %sext, 16			%shift = ashr i32 %sext, 16
	ret i32 %shift			ret i32 %shift
	}			}

	; FIXME: Fold even if SRA has multiple uses.			; Fold even if SRA has multiple uses.
	define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) {			define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) {
	; CHECK-LABEL: sra_to_sextload_multiple_sra_uses:			; CHECK-LABEL: sra_to_sextload_multiple_sra_uses:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movl (%rdi), %ecx			; CHECK-NEXT: movswl 2(%rdi), %ecx
	; CHECK-NEXT: sarl $16, %ecx
	; CHECK-NEXT: movl %ecx, %eax			; CHECK-NEXT: movl %ecx, %eax
	; CHECK-NEXT: xorl $6, %eax			; CHECK-NEXT: xorl $6, %eax
	; CHECK-NEXT: orl %ecx, %eax			; CHECK-NEXT: orl %ecx, %eax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%load = load i32, i32* %p			%load = load i32, i32* %p
	%shift = ashr i32 %load, 16			%shift = ashr i32 %load, 16
	%use1 = xor i32 %shift, 6			%use1 = xor i32 %shift, 6
	%use2 = or i32 %shift, %use1			%use2 = or i32 %shift, %use1
	ret i32 %use2			ret i32 %use2
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombine] Fold SRA of a load into a narrower sign-extending load
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 402827

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/PowerPC/pr13891.ll

llvm/test/CodeGen/X86/combine-sra-load.ll

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombine] Fold SRA of a load into a narrower sign-extending loadClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 402827

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/PowerPC/pr13891.ll

llvm/test/CodeGen/X86/combine-sra-load.ll

[DAGCombine] Fold SRA of a load into a narrower sign-extending load
ClosedPublic