Diff 414172

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Show First 20 Lines • Show All 561 Lines • ▼ Show 20 Lines	private:

MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,		MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);		const SDLoc &dl, MVT VT, SDNode *Node);
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,		MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node,		const SDLoc &dl, MVT VT, SDNode *Node,
SDValue &InFlag);		SDValue &InFlag);

bool tryOptimizeRem8Extend(SDNode *N);		bool tryOptimizeRem8Extend(SDNode *N);
		bool postprocessTest64RR(SDNode *N, unsigned DeadOpUses);
bool postprocessTestRR(SDNode *N);		bool postprocessTestRR(SDNode *N);
bool postprocessKortestRR(SDNode *N);		bool postprocessKortestRR(SDNode *N);
bool postprocessSubregToReg(SDNode *N);		bool postprocessSubregToReg(SDNode *N);

bool onlyUsesZeroFlag(SDValue Flags) const;		bool onlyUsesZeroFlag(SDValue Flags) const;
bool hasNoSignFlagUses(SDValue Flags) const;		bool hasNoSignFlagUses(SDValue Flags) const;
bool hasNoCarryFlagUses(SDValue Flags) const;		bool hasNoCarryFlagUses(SDValue Flags) const;
};		};
▲ Show 20 Lines • Show All 882 Lines • ▼ Show 20 Lines	bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
} else {		} else {
// Ok we can drop this extend and just use the original extend.		// Ok we can drop this extend and just use the original extend.
ReplaceUses(N, N00.getNode());		ReplaceUses(N, N00.getNode());
}		}

return true;		return true;
}		}

// Look for a TESTrr+ANDrr pattern where both operands of the test are		// Look for test with movabsq operand and rewrite to shr+test where possible.
// the same. Rewrite to remove the AND.		bool X86DAGToDAGISel::postprocessTest64RR(SDNode *N, unsigned DeadOpUses) {
		assert(N->getMachineOpcode() == X86::TEST64rr && "expected TEST64rr");
		SDValue Op1 = N->getOperand(1);
		// Assume MOV64ri operands are always operand 0.
		if (!Op1->isMachineOpcode() \|\| Op1->getMachineOpcode() != X86::MOV64ri \|\|
		Op1->use_size() != 1 + DeadOpUses)
		return false;
		uint64_t C = Op1->getConstantOperandVal(0);
		if (!isShiftedMask_64(C) \|\| !onlyUsesZeroFlag(SDValue(N, 0)))
		return false;
		unsigned TrailZ = countTrailingZeros(C);
		unsigned LeadingZ = countLeadingZeros(C);
		unsigned PopCount = 64 - TrailZ - LeadingZ;
		unsigned TestOpc;
		MVT SubRegVT;
		unsigned SubRegIdx;
		if (PopCount == 8) {
		TestOpc = X86::TEST8rr;
		SubRegVT = MVT::i8;
		SubRegIdx = X86::sub_8bit;
		} else if (PopCount == 16) {
		TestOpc = X86::TEST16rr;
		SubRegVT = MVT::i16;
		SubRegIdx = X86::sub_16bit;
		} else if (PopCount == 32) {
		TestOpc = X86::TEST32rr;
		SubRegVT = MVT::i32;
		SubRegIdx = X86::sub_32bit;
		} else {
		return false;
		}
		SDLoc DL(N);
		SDValue ShiftC = CurDAG->getTargetConstant(TrailZ, DL, MVT::i8);
		MachineSDNode *Shr = CurDAG->getMachineNode(X86::SHR64ri, DL, MVT::i64,
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - MachineSDNode Shr = CurDAG->getMachineNode(X86::SHR64ri, DL, MVT::i64, - MVT::i32, N->getOperand(0), - ShiftC); - SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, SubRegVT, - SDValue(Shr, 0)); - MachineSDNode Test = CurDAG->getMachineNode(TestOpc, DL, MVT::i32, - SubReg, SubReg); + MachineSDNode Shr = CurDAG->getMachineNode( + X86::SHR64ri, DL, MVT::i64, MVT::i32, N->getOperand(0), ShiftC); + SDValue SubReg = 3 diff lines are omitted. See full path. Lint: Pre-merge checks:* clang-format: please reformat the code ``` - MachineSDNode *Shr = CurDAG->getMachineNode(X86…
		MVT::i32, N->getOperand(0),
		ShiftC);
		SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, SubRegVT,
		SDValue(Shr, 0));
		MachineSDNode *Test = CurDAG->getMachineNode(TestOpc, DL, MVT::i32,
		SubReg, SubReg);
		ReplaceUses(N, Test);
		return true;
		}

bool X86DAGToDAGISel::postprocessTestRR(SDNode *N) {		bool X86DAGToDAGISel::postprocessTestRR(SDNode *N) {
unsigned Opc = N->getMachineOpcode();		unsigned Opc = N->getMachineOpcode();
assert((Opc == X86::TEST8rr \|\| Opc == X86::TEST16rr \|\| Opc == X86::TEST32rr \|\|		assert((Opc == X86::TEST8rr \|\| Opc == X86::TEST16rr \|\| Opc == X86::TEST32rr \|\|
Opc == X86::TEST64rr) &&		Opc == X86::TEST64rr) &&
"expected TEST opcode");		"expected TEST opcode");
SDValue Op0 = N->getOperand(0);		SDValue Op0 = N->getOperand(0);
if (N->getOperand(1) != Op0 \|\| !N->isOnlyUserOf(Op0.getNode()) \|\|		if (N->getOperand(1) != Op0 \|\| !N->isOnlyUserOf(Op0.getNode()) \|\|
!Op0.isMachineOpcode()) {		!Op0.isMachineOpcode()) {
return false;		return false;
}		}
SDValue And = Op0;		SDValue And = Op0;
unsigned NewOpc;		unsigned NewOpc;
switch (And.getMachineOpcode()) {		switch (And.getMachineOpcode()) {
case X86::AND8rr:		case X86::AND8rr:
case X86::AND16rr:		case X86::AND16rr:
case X86::AND32rr:		case X86::AND32rr:
case X86::AND64rr: {		case X86::AND64rr: {
MachineSDNode *Test = CurDAG->getMachineNode(		MachineSDNode *Test = CurDAG->getMachineNode(
Opc, SDLoc(N), MVT::i32, And.getOperand(0), And.getOperand(1));		Opc, SDLoc(N), MVT::i32, And.getOperand(0), And.getOperand(1));
ReplaceUses(N, Test);		ReplaceUses(N, Test);
		if (Opc == X86::TEST64rr) {
		postprocessTest64RR(Test, 1);
		}
return true;		return true;
}		}
case X86::AND8rm:		case X86::AND8rm:
NewOpc = X86::TEST8mr;		NewOpc = X86::TEST8mr;
break;		break;
case X86::AND16rm:		case X86::AND16rm:
NewOpc = X86::TEST16mr;		NewOpc = X86::TEST16mr;
break;		break;
▲ Show 20 Lines • Show All 168 Lines • ▼ Show 20 Lines	while (Position != CurDAG->allnodes_begin()) {
case X86::MOVZX32rr8:		case X86::MOVZX32rr8:
case X86::MOVSX32rr8:		case X86::MOVSX32rr8:
case X86::MOVSX64rr8:		case X86::MOVSX64rr8:
if (tryOptimizeRem8Extend(N)) {		if (tryOptimizeRem8Extend(N)) {
MadeChange = true;		MadeChange = true;
continue;		continue;
}		}
break;		break;
		case X86::TEST64rr:
		if (postprocessTest64RR(N, 0)) {
		MadeChange = true;
		continue;
		}
		LLVM_FALLTHROUGH;
case X86::TEST8rr:		case X86::TEST8rr:
case X86::TEST16rr:		case X86::TEST16rr:
case X86::TEST32rr:		case X86::TEST32rr:
case X86::TEST64rr:
if (postprocessTestRR(N)) {		if (postprocessTestRR(N)) {
MadeChange = true;		MadeChange = true;
continue;		continue;
}		}
break;		break;
}		}
}		}

▲ Show 20 Lines • Show All 3,993 Lines • ▼ Show 20 Lines	if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
// We may have looked through a truncate so mask off any bits that		// We may have looked through a truncate so mask off any bits that
// shouldn't be part of the compare.		// shouldn't be part of the compare.
uint64_t Mask = MaskC->getZExtValue();		uint64_t Mask = MaskC->getZExtValue();
Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());		Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());

// Check if we can replace AND+IMM64 with a shift. This is possible for		// Check if we can replace AND+IMM64 with a shift. This is possible for
// masks like 0xFF000000 or 0x00FFFFFF and if we care only about the zero		// masks like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
// flag.		// flag.
if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&		if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
		craig.topperUnsubmitted Done Reply Inline Actions Is this code already doing something similar? Sanjay had another patch to this code D121147 recently. craig.topper: Is this code already doing something similar? Sanjay had another patch to this code D121147…
		MatzeBAuthorUnsubmitted Done Reply Inline Actions Interesting. I believe this case covers only the cases where the mask covers the highest bit in the register and so allows us to get rid of the `test` completely, while my case covers cases of the mask being "in-between" and not covering the highest bit so we still need the `test`... That said let me dig deeper whether there is code to be shared/reorganized/merged... MatzeB: Interesting. I believe this case covers only the cases where the mask covers the highest bit in…
onlyUsesZeroFlag(SDValue(Node, 0))) {		onlyUsesZeroFlag(SDValue(Node, 0))) {
unsigned ShiftOpcode = ISD::DELETED_NODE;		unsigned ShiftOpcode = ISD::DELETED_NODE;
unsigned ShiftAmt;		unsigned ShiftAmt;
if (isMask_64(~Mask)) {		if (isMask_64(~Mask)) {
ShiftOpcode = X86::SHR64ri;		ShiftOpcode = X86::SHR64ri;
ShiftAmt = countTrailingZeros(Mask);		ShiftAmt = countTrailingZeros(Mask);
} else if (isMask_64(Mask)) {		} else if (isMask_64(Mask)) {
ShiftOpcode = X86::SHL64ri;		ShiftOpcode = X86::SHL64ri;
ShiftAmt = countLeadingZeros(Mask);		ShiftAmt = countLeadingZeros(Mask);
}		}
if (ShiftOpcode != ISD::DELETED_NODE) {		if (ShiftOpcode != ISD::DELETED_NODE) {
SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64);		SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64);
SDValue Shift = SDValue(		SDValue Shift = SDValue(
CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32,		CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32,
N0.getOperand(0), ShiftC),		N0.getOperand(0), ShiftC),
0);		0);
		craig.topperUnsubmitted Done Reply Inline Actions Are we emitting TEST+SHR rather than SHR by itself. craig.topper: Are we emitting TEST+SHR rather than SHR by itself.
		MatzeBAuthorUnsubmitted Done Reply Inline Actions It seems we emit a `TEST` here but a separate transformation will identify the `TEST` as redundant and re-use the flags of the shift instruction. MatzeB: It seems we emit a `TEST` here but a separate transformation will identify the `TEST` as…
MachineSDNode *Test =		MachineSDNode *Test =
CurDAG->getMachineNode(X86::TEST64rr, dl, MVT::i32, Shift, Shift);		CurDAG->getMachineNode(X86::TEST64rr, dl, MVT::i32, Shift, Shift);
ReplaceNode(Node, Test);		ReplaceNode(Node, Test);
		craig.topperUnsubmitted Done Reply Inline Actions Initialize `TestOpcode` to `X86::TEST64rr` instead of `DELETED_NODE`. Then this becomes if (SubRegIdx != 0) Shift = CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift); Test = CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift); ReplaceNode(Node, Test) craig.topper: Initialize `TestOpcode` to `X86::TEST64rr` instead of `DELETED_NODE`. Then this becomes ``` if…
return;		return;
}		}
		craig.topperUnsubmitted Done Reply Inline Actions Same question. craig.topper: Same question.
		MatzeBAuthorUnsubmitted Done Reply Inline Actions as above. MatzeB: as above.
}		}

MVT VT;		MVT VT;
int SubRegOp;		int SubRegOp;
unsigned ROpc, MOpc;		unsigned ROpc, MOpc;

// For each of these checks we need to be careful if the sign flag is		// For each of these checks we need to be careful if the sign flag is
// being used. It is only safe to use the sign flag in two conditions,		// being used. It is only safe to use the sign flag in two conditions,
▲ Show 20 Lines • Show All 499 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/dag-test-mov64ri.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-- \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-- \| FileCheck %s

	define i1 @f_shr_testb(i64 %a) {			define i1 @f_shr_testb(i64 %a) {
	; CHECK-LABEL: f_shr_testb:			; CHECK-LABEL: f_shr_testb:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movabsq $287104476244869120, %rax # imm = 0x3FC000000000000			; CHECK-NEXT: shrq $50, %rdi
	; CHECK-NEXT: testq %rax, %rdi			; CHECK-NEXT: testb %dil, %dil
	; CHECK-NEXT: setne %al			; CHECK-NEXT: setne %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%v0 = and i64 %a, 287104476244869120 ; 0xff << 50			%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
	%v1 = icmp ne i64 %v0, 0			%v1 = icmp ne i64 %v0, 0
	ret i1 %v1			ret i1 %v1
	}			}

	define i1 @f_shr_testw(i64 %a) {			define i1 @f_shr_testw(i64 %a) {
	; CHECK-LABEL: f_shr_testw:			; CHECK-LABEL: f_shr_testw:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movabsq $562941363486720, %rax # imm = 0x1FFFE00000000			; CHECK-NEXT: shrq $33, %rdi
	; CHECK-NEXT: testq %rax, %rdi			; CHECK-NEXT: testw %di, %di
	; CHECK-NEXT: setne %al			; CHECK-NEXT: setne %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%v0 = and i64 %a, 562941363486720 ; 0xffff << 33			%v0 = and i64 %a, 562941363486720 ; 0xffff << 33
	%v1 = icmp ne i64 %v0, 0			%v1 = icmp ne i64 %v0, 0
	ret i1 %v1			ret i1 %v1
	}			}

	define i1 @f_shr_testl(i64 %a) {			define i1 @f_shr_testl(i64 %a) {
	; CHECK-LABEL: f_shr_testl:			; CHECK-LABEL: f_shr_testl:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movabsq $549755813760, %rax # imm = 0x7FFFFFFF80			; CHECK-NEXT: shrq $7, %rdi
	; CHECK-NEXT: testq %rax, %rdi			; CHECK-NEXT: testl %edi, %edi
	; CHECK-NEXT: sete %al			; CHECK-NEXT: sete %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%v0 = and i64 %a, 549755813760 ; 0xffffffff << 7			%v0 = and i64 %a, 549755813760 ; 0xffffffff << 7
	%v1 = icmp eq i64 %v0, 0			%v1 = icmp eq i64 %v0, 0
	ret i1 %v1			ret i1 %v1
	}			}

	define i1 @f_shr(i64 %a) {			define i1 @f_shr(i64 %a) {
	▲ Show 20 Lines • Show All 63 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll

	Show First 20 Lines • Show All 466 Lines • ▼ Show 20 Lines
	; X86-BMI2-NEXT: popl %esi			; X86-BMI2-NEXT: popl %esi
	; X86-BMI2-NEXT: retl			; X86-BMI2-NEXT: retl
	;			;
	; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:			; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
	; X64-BMI1: # %bb.0:			; X64-BMI1: # %bb.0:
	; X64-BMI1-NEXT: movq %rsi, %rcx			; X64-BMI1-NEXT: movq %rsi, %rcx
	; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx			; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
	; X64-BMI1-NEXT: shlq %cl, %rdi			; X64-BMI1-NEXT: shlq %cl, %rdi
	; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000			; X64-BMI1-NEXT: shrq $16, %rdi
	; X64-BMI1-NEXT: testq %rax, %rdi			; X64-BMI1-NEXT: testl %edi, %edi
	; X64-BMI1-NEXT: sete %al			; X64-BMI1-NEXT: sete %al
	; X64-BMI1-NEXT: retq			; X64-BMI1-NEXT: retq
	;			;
	; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:			; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
	; X64-BMI2: # %bb.0:			; X64-BMI2: # %bb.0:
	; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax			; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
	; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000			; X64-BMI2-NEXT: shrq $16, %rax
	; X64-BMI2-NEXT: testq %rcx, %rax			; X64-BMI2-NEXT: testl %eax, %eax
	; X64-BMI2-NEXT: sete %al			; X64-BMI2-NEXT: sete %al
	; X64-BMI2-NEXT: retq			; X64-BMI2-NEXT: retq
	%t0 = lshr i64 281474976645120, %y			%t0 = lshr i64 281474976645120, %y
	%t1 = and i64 %t0, %x			%t1 = and i64 %t0, %x
	%res = icmp eq i64 %t1, 0			%res = icmp eq i64 %t1, 0
	ret i1 %res			ret i1 %res
	}			}

	▲ Show 20 Lines • Show All 416 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll

	Show First 20 Lines • Show All 429 Lines • ▼ Show 20 Lines
	; X86-BMI2-NEXT: popl %esi			; X86-BMI2-NEXT: popl %esi
	; X86-BMI2-NEXT: retl			; X86-BMI2-NEXT: retl
	;			;
	; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:			; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
	; X64-BMI1: # %bb.0:			; X64-BMI1: # %bb.0:
	; X64-BMI1-NEXT: movq %rsi, %rcx			; X64-BMI1-NEXT: movq %rsi, %rcx
	; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx			; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
	; X64-BMI1-NEXT: shrq %cl, %rdi			; X64-BMI1-NEXT: shrq %cl, %rdi
	; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000			; X64-BMI1-NEXT: shrq $16, %rdi
	; X64-BMI1-NEXT: testq %rax, %rdi			; X64-BMI1-NEXT: testl %edi, %edi
	; X64-BMI1-NEXT: sete %al			; X64-BMI1-NEXT: sete %al
	; X64-BMI1-NEXT: retq			; X64-BMI1-NEXT: retq
	;			;
	; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:			; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
	; X64-BMI2: # %bb.0:			; X64-BMI2: # %bb.0:
	; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax			; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
	; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000			; X64-BMI2-NEXT: shrq $16, %rax
	; X64-BMI2-NEXT: testq %rcx, %rax			; X64-BMI2-NEXT: testl %eax, %eax
	; X64-BMI2-NEXT: sete %al			; X64-BMI2-NEXT: sete %al
	; X64-BMI2-NEXT: retq			; X64-BMI2-NEXT: retq
	%t0 = shl i64 281474976645120, %y			%t0 = shl i64 281474976645120, %y
	%t1 = and i64 %t0, %x			%t1 = and i64 %t0, %x
	%res = icmp eq i64 %t1, 0			%res = icmp eq i64 %t1, 0
	ret i1 %res			ret i1 %res
	}			}

	▲ Show 20 Lines • Show All 398 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

X86ISelDAGToDAG: Transform TEST + MOV64ri to SHR + TEST
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 414172

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

llvm/test/CodeGen/X86/dag-test-mov64ri.ll

llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll

llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll

This is an archive of the discontinued LLVM Phabricator instance.

X86ISelDAGToDAG: Transform TEST + MOV64ri to SHR + TESTClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 414172

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

llvm/test/CodeGen/X86/dag-test-mov64ri.ll

llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll

llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll

X86ISelDAGToDAG: Transform TEST + MOV64ri to SHR + TEST
ClosedPublic