This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Transforms/InstCombine/
-
Transforms/
-
InstCombine/
2
InstCombineSelect.cpp
-
test/Transforms/InstCombine/
-
Transforms/
-
InstCombine/
-
saturating-add-sub.ll

Differential D69244

[InstCombine] Extra combine for uadd_sat
ClosedPublic

Authored by dmgreen on Oct 21 2019, 2:18 AM.

Download Raw Diff

Details

Reviewers

nikic
lebedev.ri
spatel

Commits

rGbf21f0d489fb: [InstCombine] Extra combine for uadd_sat

Summary

This is an extra fold for a canonical for of uadd_sat, as shown in D68651. Signed patterns for these are a little more involved, but unsigned is a simple enough extension to what was already present.

Name: uadd_sat_canon             
  %3 = add i8 %0, %1
  %4 = icmp ult i8 %3, %0
  %5 = select i1 %4, i8 -1, i8 %3
=>
  %5 = uadd_sat %1, %0

Diff Detail

Event Timeline

dmgreen created this revision.Oct 21 2019, 2:18 AM

Herald added a project: Restricted Project. · View Herald TranscriptOct 21 2019, 2:18 AM

Herald added a subscriber: hiraditya. · View Herald Transcript

lebedev.ri added inline comments.Oct 21 2019, 3:36 AM

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
785–787	Maybe if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) && match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) {

Update pattern

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
784	This comment needs rewording.

This revision is now accepted and ready to land.Oct 21 2019, 8:35 AM

@dmgreen if you're interested, there is at least one edge-case pattern:
https://godbolt.org/z/0dICgS

----------------------------------------
  %2 = icmp eq i32 %0, 0
  %3 = add i32 %0, -1
  %4 = select i1 %2, i32 0, i32 %3
  ret i32 %4
=>
  %4 = usub_sat i32 %0, 1
  ret i32 %4
  %3 = add i32 %0, -1
  %2 = icmp eq i32 %0, 0

Done: 1
Optimization is correct!

@lebedev.ri Might use m_UAddWithOverflow(), iirc it handles all these edge cases.

Err, make that two:
https://godbolt.org/z/F4uPIw

----------------------------------------
  %2 = icmp ugt i32 %0, 1
  %3 = add i32 %0, -2
  %4 = select i1 %2, i32 %3, i32 0
  ret i32 %4
=>
  %4 = usub_sat i32 %0, 2
  ret i32 %4
  %3 = add i32 %0, -2
  %2 = icmp ugt i32 %0, 1

Done: 1
Optimization is correct!

Thanks. I had seen that usub_sat was already handled in most cases, but hadn't seen those cases of it being constants.

It looks like canonicalizeSaturatedSubtract only handles sub's, not the add of a negative constant.

Closed by commit rGbf21f0d489fb: [InstCombine] Extra combine for uadd_sat (authored by dmgreen). · Explain WhyOct 28 2019, 8:27 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

lib/

Transforms/

InstCombine/

InstCombineSelect.cpp

7 lines

test/

Transforms/

InstCombine/

saturating-add-sub.ll

12 lines

Diff 225891

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Show First 20 Lines • Show All 775 Lines • ▼ Show 20 Lines	static Value canonicalizeSaturatedAdd(ICmpInst Cmp, Value TVal, Value FVal,
Y = Cmp1;		Y = Cmp1;
if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {		if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
// (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)		// (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)
// (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)		// (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)
BinaryOperator *BO = cast<BinaryOperator>(FVal);		BinaryOperator *BO = cast<BinaryOperator>(FVal);
return Builder.CreateBinaryIntrinsic(		return Builder.CreateBinaryIntrinsic(
Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));		Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
}		}
		// With an overflowing add
		lebedev.riUnsubmitted Not Done Reply Inline Actions This comment needs rewording. lebedev.ri: This comment needs rewording.
		if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&
		match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) {
		// ((X + Y) u< X) ? -1 : (X + Y) --> uadd.sat(X, Y)
		lebedev.riUnsubmitted Not Done Reply Inline Actions Maybe if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) && match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) { lebedev.ri: Maybe ``` if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) && match(FVal, m_c_Add…
		// ((X + Y) u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
		return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp1, Y);
		}

return nullptr;		return nullptr;
}		}

/// Fold the following code sequence:		/// Fold the following code sequence:
/// \code		/// \code
/// int a = ctlz(x & -x);		/// int a = ctlz(x & -x);
// x ? 31 - a : a;		// x ? 31 - a : a;
▲ Show 20 Lines • Show All 1,875 Lines • Show Last 20 Lines

llvm/test/Transforms/InstCombine/saturating-add-sub.ll

Show First 20 Lines • Show All 1,480 Lines • ▼ Show 20 Lines	;
%a = add i32 %x, 42		%a = add i32 %x, 42
%c = icmp ult i32 %x, -43		%c = icmp ult i32 %x, -43
%r = select i1 %c, i32 %a, i32 -1		%r = select i1 %c, i32 %a, i32 -1
ret i32 %r		ret i32 %r
}		}

define i32 @uadd_sat_canon(i32 %x, i32 %y) {		define i32 @uadd_sat_canon(i32 %x, i32 %y) {
; CHECK-LABEL: @uadd_sat_canon(		; CHECK-LABEL: @uadd_sat_canon(
; CHECK-NEXT: [[A:%.]] = add i32 [[X:%.]], [[Y:%.*]]		; CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.]], i32 [[Y:%.*]])
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], [[X]]		; CHECK-NEXT: ret i32 [[TMP1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
; CHECK-NEXT: ret i32 [[R]]
;		;
%a = add i32 %x, %y		%a = add i32 %x, %y
%c = icmp ult i32 %a, %x		%c = icmp ult i32 %a, %x
%r = select i1 %c, i32 -1, i32 %a		%r = select i1 %c, i32 -1, i32 %a
ret i32 %r		ret i32 %r
}		}

define i32 @uadd_sat_canon_y(i32 %x, i32 %y) {		define i32 @uadd_sat_canon_y(i32 %x, i32 %y) {
; CHECK-LABEL: @uadd_sat_canon_y(		; CHECK-LABEL: @uadd_sat_canon_y(
; CHECK-NEXT: [[A:%.]] = add i32 [[X:%.]], [[Y:%.*]]		; CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.]], i32 [[X:%.*]])
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], [[Y]]		; CHECK-NEXT: ret i32 [[TMP1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
; CHECK-NEXT: ret i32 [[R]]
;		;
%a = add i32 %x, %y		%a = add i32 %x, %y
%c = icmp ult i32 %a, %y		%c = icmp ult i32 %a, %y
%r = select i1 %c, i32 -1, i32 %a		%r = select i1 %c, i32 -1, i32 %a
ret i32 %r		ret i32 %r
}		}

define i32 @uadd_sat_canon_nuw(i32 %x, i32 %y) {		define i32 @uadd_sat_canon_nuw(i32 %x, i32 %y) {
▲ Show 20 Lines • Show All 193 Lines • Show Last 20 Lines