This is an archive of the discontinued LLVM Phabricator instance.

[X86] Improve codegen for inverted overflow checking intrinsics
ClosedPublic

Authored by aemerson on Sep 21 2017, 5:19 PM.

Download Raw Diff

Details

Reviewers

craig.topper
RKSimon
spatel

Commits

rG7d6c55f8aafe: [X86] Improve codegen for inverted overflow checking intrinsics.
rL314514: [X86] Improve codegen for inverted overflow checking intrinsics.

Summary

[X86] Improve codegen for inverted overflow checking intrinsics.

Adds a new combine for: xor(setcc cc, val), 1 --> setcc (invert(cc), val)

This is the same optimization as the one for AArch64 in D38160 but implemented in a different way.

Diff Detail

Repository: rL LLVM

Event Timeline

aemerson created this revision.Sep 21 2017, 5:19 PM

Herald added a subscriber: kristof.beyls. · View Herald TranscriptSep 21 2017, 5:19 PM

craig.topper added reviewers: RKSimon, spatel.Sep 24 2017, 11:44 PM

Please commit overflow-intrinsic-setcc-fold.ll (with fixes suggested) with the current codegen so this patch show the delta

test/CodeGen/X86/overflow-intrinsic-setcc-fold.ll
1 ↗	(On Diff #116292)	Remove -mcpu=generic and regenerate the file with utils\update_llc_test_checks.py

You mean actually commit the current codegen's test output (with no code changes) and then resubmit this patch as a diff?

In D38161#882076, @aemerson wrote:

You mean actually commit the current codegen's test output (with no code changes) and then resubmit this patch as a diff?

Yes please.

aemerson mentioned this in rL314416: [X86] Add overflow intrinsic test in preparation for D38161..Sep 28 2017, 6:45 AM

Committed test in r314416. Updated to show diff in behaviour.

Makes sense to me, although it's a pity we can't move more of this into DAGCombine and stop it being so target specific

This revision is now accepted and ready to land.Sep 28 2017, 2:03 PM

Closed by commit rL314514: [X86] Improve codegen for inverted overflow checking intrinsics. (authored by aemerson). · Explain WhySep 29 2017, 6:55 AM

This revision was automatically updated to reflect the committed changes.

Our bots are not happy about this change: http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/2355 and http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-autoconf/builds/14195 for example, please fix

In D38161#884583, @alekseyshl wrote:

Our bots are not happy about this change: http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/2355 and http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-autoconf/builds/14195 for example, please fix

Are you sure it was this change?

foad mentioned this in D118461: [AMDGPU] Introduce new ISel combine for trunc-slr patterns.Jan 28 2022, 6:22 AM

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86ISelLowering.cpp

20 lines

test/

CodeGen/

X86/

overflow-intrinsic-setcc-fold.ll

36 lines

Diff 117136

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 34,103 Lines • ▼ Show 20 Lines	if (VT.isVector() && Subtarget.hasSSE2()) {
case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;		case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
}		}
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);		SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
return DAG.getBitcast(VT, IntOp);		return DAG.getBitcast(VT, IntOp);
}		}
return SDValue();		return SDValue();
}		}


		/// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val)
		static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) {
		if (N->getOpcode() != ISD::XOR)
		return SDValue();

		SDValue LHS = N->getOperand(0);
		auto *RHSC = dyn_cast<ConstantSDNode>(N->getOperand(1));
		if (!RHSC \|\| RHSC->getZExtValue() != 1 \|\| LHS->getOpcode() != X86ISD::SETCC)
		return SDValue();

		X86::CondCode NewCC = X86::GetOppositeBranchCondition(
		X86::CondCode(LHS->getConstantOperandVal(0)));
		SDLoc DL(N);
		return getSETCC(NewCC, LHS->getOperand(1), DL, DAG);
		}

static SDValue combineXor(SDNode *N, SelectionDAG &DAG,		static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {		const X86Subtarget &Subtarget) {
if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))		if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
return Cmp;		return Cmp;

if (DCI.isBeforeLegalizeOps())		if (DCI.isBeforeLegalizeOps())
return SDValue();		return SDValue();

		if (SDValue SetCC = foldXor1SetCC(N, DAG))
		return SetCC;

if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))		if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
return RV;		return RV;

if (Subtarget.hasCMov())		if (Subtarget.hasCMov())
if (SDValue RV = combineIntegerAbs(N, DAG))		if (SDValue RV = combineIntegerAbs(N, DAG))
return RV;		return RV;

if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))		if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
▲ Show 20 Lines • Show All 2,975 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/overflow-intrinsic-setcc-fold.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-apple-darwin -verify-machineinstrs \| FileCheck %s --check-prefix=CHECK			; RUN: llc < %s -mtriple=x86_64-apple-darwin -verify-machineinstrs \| FileCheck %s --check-prefix=CHECK

	define i1 @saddo_not_i32(i32 %v1, i32 %v2) {			define i1 @saddo_not_i32(i32 %v1, i32 %v2) {
	; CHECK-LABEL: saddo_not_i32:			; CHECK-LABEL: saddo_not_i32:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: addl %esi, %edi			; CHECK-NEXT: addl %esi, %edi
	; CHECK-NEXT: seto %al			; CHECK-NEXT: setno %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)			%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1			%obit = extractvalue {i32, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @saddo_not_i64(i64 %v1, i64 %v2) {			define i1 @saddo_not_i64(i64 %v1, i64 %v2) {
	; CHECK-LABEL: saddo_not_i64:			; CHECK-LABEL: saddo_not_i64:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: addq %rsi, %rdi			; CHECK-NEXT: addq %rsi, %rdi
	; CHECK-NEXT: seto %al			; CHECK-NEXT: setno %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)			%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1			%obit = extractvalue {i64, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @uaddo_not_i32(i32 %v1, i32 %v2) {			define i1 @uaddo_not_i32(i32 %v1, i32 %v2) {
	; CHECK-LABEL: uaddo_not_i32:			; CHECK-LABEL: uaddo_not_i32:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: addl %esi, %edi			; CHECK-NEXT: addl %esi, %edi
	; CHECK-NEXT: setb %al			; CHECK-NEXT: setae %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)			%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1			%obit = extractvalue {i32, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @uaddo_not_i64(i64 %v1, i64 %v2) {			define i1 @uaddo_not_i64(i64 %v1, i64 %v2) {
	; CHECK-LABEL: uaddo_not_i64:			; CHECK-LABEL: uaddo_not_i64:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: addq %rsi, %rdi			; CHECK-NEXT: addq %rsi, %rdi
	; CHECK-NEXT: setb %al			; CHECK-NEXT: setae %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)			%t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1			%obit = extractvalue {i64, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @ssubo_not_i32(i32 %v1, i32 %v2) {			define i1 @ssubo_not_i32(i32 %v1, i32 %v2) {
	; CHECK-LABEL: ssubo_not_i32:			; CHECK-LABEL: ssubo_not_i32:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: cmpl %esi, %edi			; CHECK-NEXT: cmpl %esi, %edi
	; CHECK-NEXT: seto %al			; CHECK-NEXT: setno %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)			%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1			%obit = extractvalue {i32, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @ssub_not_i64(i64 %v1, i64 %v2) {			define i1 @ssub_not_i64(i64 %v1, i64 %v2) {
	; CHECK-LABEL: ssub_not_i64:			; CHECK-LABEL: ssub_not_i64:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: cmpq %rsi, %rdi			; CHECK-NEXT: cmpq %rsi, %rdi
	; CHECK-NEXT: seto %al			; CHECK-NEXT: setno %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)			%t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1			%obit = extractvalue {i64, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @usubo_not_i32(i32 %v1, i32 %v2) {			define i1 @usubo_not_i32(i32 %v1, i32 %v2) {
	; CHECK-LABEL: usubo_not_i32:			; CHECK-LABEL: usubo_not_i32:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: cmpl %esi, %edi			; CHECK-NEXT: cmpl %esi, %edi
	; CHECK-NEXT: setb %al			; CHECK-NEXT: setae %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)			%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1			%obit = extractvalue {i32, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @usubo_not_i64(i64 %v1, i64 %v2) {			define i1 @usubo_not_i64(i64 %v1, i64 %v2) {
	; CHECK-LABEL: usubo_not_i64:			; CHECK-LABEL: usubo_not_i64:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: cmpq %rsi, %rdi			; CHECK-NEXT: cmpq %rsi, %rdi
	; CHECK-NEXT: setb %al			; CHECK-NEXT: setae %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)			%t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1			%obit = extractvalue {i64, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @smulo_not_i32(i32 %v1, i32 %v2) {			define i1 @smulo_not_i32(i32 %v1, i32 %v2) {
	; CHECK-LABEL: smulo_not_i32:			; CHECK-LABEL: smulo_not_i32:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: imull %esi, %edi			; CHECK-NEXT: imull %esi, %edi
	; CHECK-NEXT: seto %al			; CHECK-NEXT: setno %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)			%t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1			%obit = extractvalue {i32, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @smulo_not_i64(i64 %v1, i64 %v2) {			define i1 @smulo_not_i64(i64 %v1, i64 %v2) {
	; CHECK-LABEL: smulo_not_i64:			; CHECK-LABEL: smulo_not_i64:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: imulq %rsi, %rdi			; CHECK-NEXT: imulq %rsi, %rdi
	; CHECK-NEXT: seto %al			; CHECK-NEXT: setno %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)			%t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1			%obit = extractvalue {i64, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @umulo_not_i32(i32 %v1, i32 %v2) {			define i1 @umulo_not_i32(i32 %v1, i32 %v2) {
	; CHECK-LABEL: umulo_not_i32:			; CHECK-LABEL: umulo_not_i32:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: movl %edi, %eax			; CHECK-NEXT: movl %edi, %eax
	; CHECK-NEXT: mull %esi			; CHECK-NEXT: mull %esi
	; CHECK-NEXT: seto %al			; CHECK-NEXT: setno %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)			%t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1			%obit = extractvalue {i32, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	define i1 @umulo_not_i64(i64 %v1, i64 %v2) {			define i1 @umulo_not_i64(i64 %v1, i64 %v2) {
	; CHECK-LABEL: umulo_not_i64:			; CHECK-LABEL: umulo_not_i64:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: movq %rdi, %rax			; CHECK-NEXT: movq %rdi, %rax
	; CHECK-NEXT: mulq %rsi			; CHECK-NEXT: mulq %rsi
	; CHECK-NEXT: seto %al			; CHECK-NEXT: setno %al
	; CHECK-NEXT: xorb $1, %al
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)			%t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1			%obit = extractvalue {i64, i1} %t, 1
	%ret = xor i1 %obit, true			%ret = xor i1 %obit, true
	ret i1 %ret			ret i1 %ret
	}			}

	Show All 13 Lines