Diff 385077

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Show First 20 Lines • Show All 2,298 Lines • ▼ Show 20 Lines Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) {

// Also make sure that the number of uses is as expected. The 3 is for the // Also make sure that the number of uses is as expected. The 3 is for the

// the two items of the compare and the select, or 2 from a min/max. // the two items of the compare and the select, or 2 from a min/max.

unsigned ExpUses = isa<IntrinsicInst>(MinMax1) ? 2 : 3; unsigned ExpUses = isa<IntrinsicInst>(MinMax1) ? 2 : 3;

if (MinMax2->hasNUsesOrMore(ExpUses) || AddSub->hasNUsesOrMore(ExpUses)) if (MinMax2->hasNUsesOrMore(ExpUses) || AddSub->hasNUsesOrMore(ExpUses))

return nullptr; return nullptr;

// Create the new type (which can be a vector type) // Create the new type (which can be a vector type)

Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);

// Match the two extends from the add/sub

Value *A, *B;

if(!match(AddSub, m_BinOp(m_SExt(m_Value(A)), m_SExt(m_Value(B)))))

return nullptr;

// And check the incoming values are of a type smaller than or equal to the

// size of the saturation. Otherwise the higher bits can cause different

// results.

if (A->getType()->getScalarSizeInBits() > NewBitWidth ||

B->getType()->getScalarSizeInBits() > NewBitWidth)

return nullptr;

Intrinsic::ID IntrinsicID; Intrinsic::ID IntrinsicID;

if (AddSub->getOpcode() == Instruction::Add) if (AddSub->getOpcode() == Instruction::Add)

IntrinsicID = Intrinsic::sadd_sat; IntrinsicID = Intrinsic::sadd_sat;

else if (AddSub->getOpcode() == Instruction::Sub) else if (AddSub->getOpcode() == Instruction::Sub)

IntrinsicID = Intrinsic::ssub_sat; IntrinsicID = Intrinsic::ssub_sat;

else else

return nullptr; return nullptr;

// The two operands of the add/sub must be nsw-truncatable to the NewTy. This

// is usually achieved via a sext from a smaller type.

lebedev.riUnsubmitted

Done

return nullptr;

- // The two operands of the add/sub need to contain at least enough sign bits

- // to be valid in the NewBitWidth. This is usually achieved via a sext from a

+ // The two operands of the add/sub must be nsw-truncatable to the NewTy.

+ // This is usually achieved via a sext from a

// smaller type.

lebedev.ri:

if (ComputeMinSignedBits(AddSub->getOperand(0), 0, AddSub) > NewBitWidth ||

ComputeMinSignedBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)

lebedev.riUnsubmitted

Done

These aren't using InstCombine's wrapper?

lebedev.ri: These aren't using InstCombine's wrapper?

dmgreenAuthorUnsubmitted

Done

This function is in InstCombinerImpl, so this should go through the wrappers. The other part in processUGT_ADDCST_ADD needs to call them via IC that is passed to the function.

dmgreen: This function is in InstCombinerImpl, so this should go through the wrappers. The other part in…

return nullptr;

// Finally create and return the sat intrinsic, truncated to the new type // Finally create and return the sat intrinsic, truncated to the new type

lebedev.riUnsubmitted

Done

I've spent more time trying to reason about this, which means this can be improved.
Please add ComputeMinSignedBits() wrapper next to ComputeNumSignBits(),
and use it here. Then this becomes

if (ComputeMinSignedBits(AddSub->getOperand(0), 0, AddSub) > NewBitWidth ||
    ComputeMinSignedBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)

which at least to me is immensely more readable.

lebedev.ri: I've spent more time trying to reason about this, which means this can be improved. Please add…

dmgreenAuthorUnsubmitted

Done

Sounds good. Do you mean next to the ComputeNumSignBits in InstCombiner, or next to the base variant in ValueTracking?

I added it to InstCombiner for the moment, but can easily move it if you think that is better.

dmgreen: Sounds good. Do you mean next to the ComputeNumSignBits in InstCombiner, or next to the base…

lebedev.riUnsubmitted

Done

Eh, probably both, instcombine-one should use the valuetracking-one.

lebedev.ri: Eh, probably both, instcombine-one should use the valuetracking-one.

dmgreenAuthorUnsubmitted

Done

OK will do.

dmgreen: OK will do.

Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);

Value *AT = Builder.CreateSExt(A, NewTy); Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);

Value *BT = Builder.CreateSExt(B, NewTy); Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);

Value *Sat = Builder.CreateCall(F, {AT, BT}); Value *Sat = Builder.CreateCall(F, {AT, BT});

lebedev.riUnsubmitted

Done

Should these not be CreateTruncOrBitCast?

lebedev.ri: Should these not be `CreateTruncOrBitCast`?

dmgreenAuthorUnsubmitted

Done

I think CreateTrunc works how you imagine CreateTruncOrBitCast works, if everything is an integer type. It starts out with if (V->getType() == DestTy) return V;

dmgreen: I think CreateTrunc works how you imagine CreateTruncOrBitCast works, if everything is an…

return CastInst::Create(Instruction::SExt, Sat, Ty); return CastInst::Create(Instruction::SExt, Sat, Ty);

} }

/// Reduce a sequence of min/max with a common operand. /// Reduce a sequence of min/max with a common operand.

static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,

Value *RHS, Value *RHS,

InstCombiner::BuilderTy &Builder) { InstCombiner::BuilderTy &Builder) {

assert(SelectPatternResult::isMinOrMax(SPF) && "Expected a min/max"); assert(SelectPatternResult::isMinOrMax(SPF) && "Expected a min/max");

▲ Show 20 Lines • Show All 998 Lines • Show Last 20 Lines

llvm/test/Transforms/InstCombine/sadd_sat.ll

Show First 20 Lines • Show All 692 Lines • ▼ Show 20 Lines	entry:
%1 = icmp slt i64 %spec.store.select, 2147483647		%1 = icmp slt i64 %spec.store.select, 2147483647
%spec.store.select8 = select i1 %1, i64 %spec.store.select, i64 2147483647		%spec.store.select8 = select i1 %1, i64 %spec.store.select, i64 2147483647
ret i64 %spec.store.select8		ret i64 %spec.store.select8
}		}

define i32 @ashrA(i64 %a, i32 %b) {		define i32 @ashrA(i64 %a, i32 %b) {
; CHECK-LABEL: @ashrA(		; CHECK-LABEL: @ashrA(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.]] = ashr i64 [[A:%.]], 32		; CHECK-NEXT: [[TMP0:%.]] = lshr i64 [[A:%.]], 32
; CHECK-NEXT: [[CONV1:%.]] = sext i32 [[B:%.]] to i64		; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV]], [[CONV1]]		; CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[B:%.]])
; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647)		; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648)
; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32
; CHECK-NEXT: ret i32 [[CONV7]]
;		;
entry:		entry:
%conv = ashr i64 %a, 32		%conv = ashr i64 %a, 32
%conv1 = sext i32 %b to i64		%conv1 = sext i32 %b to i64
%add = add i64 %conv1, %conv		%add = add i64 %conv1, %conv
%spec.store.select = call i64 @llvm.smin.i64(i64 %add, i64 2147483647)		%spec.store.select = call i64 @llvm.smin.i64(i64 %add, i64 2147483647)
%spec.store.select8 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)		%spec.store.select8 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
%conv7 = trunc i64 %spec.store.select8 to i32		%conv7 = trunc i64 %spec.store.select8 to i32
ret i32 %conv7		ret i32 %conv7
}		}

define i32 @ashrB(i32 %a, i64 %b) {		define i32 @ashrB(i32 %a, i64 %b) {
; CHECK-LABEL: @ashrB(		; CHECK-LABEL: @ashrB(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.]] = sext i32 [[A:%.]] to i64		; CHECK-NEXT: [[TMP0:%.]] = lshr i64 [[B:%.]], 32
; CHECK-NEXT: [[CONV1:%.]] = ashr i64 [[B:%.]], 32		; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]]		; CHECK-NEXT: [[TMP2:%.]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[A:%.]])
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[ADD]], -2147483648		; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 -2147483648
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[SPEC_STORE_SELECT]], 2147483647
; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 2147483647
; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32
; CHECK-NEXT: ret i32 [[CONV7]]
;		;
entry:		entry:
%conv = sext i32 %a to i64		%conv = sext i32 %a to i64
%conv1 = ashr i64 %b, 32		%conv1 = ashr i64 %b, 32
%add = add i64 %conv1, %conv		%add = add i64 %conv1, %conv
%0 = icmp sgt i64 %add, -2147483648		%0 = icmp sgt i64 %add, -2147483648
%spec.store.select = select i1 %0, i64 %add, i64 -2147483648		%spec.store.select = select i1 %0, i64 %add, i64 -2147483648
%1 = icmp slt i64 %spec.store.select, 2147483647		%1 = icmp slt i64 %spec.store.select, 2147483647
%spec.store.select8 = select i1 %1, i64 %spec.store.select, i64 2147483647		%spec.store.select8 = select i1 %1, i64 %spec.store.select, i64 2147483647
%conv7 = trunc i64 %spec.store.select8 to i32		%conv7 = trunc i64 %spec.store.select8 to i32
ret i32 %conv7		ret i32 %conv7
}		}

define i32 @ashrAB(i64 %a, i64 %b) {		define i32 @ashrAB(i64 %a, i64 %b) {
; CHECK-LABEL: @ashrAB(		; CHECK-LABEL: @ashrAB(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.]] = ashr i64 [[A:%.]], 32		; CHECK-NEXT: [[TMP0:%.]] = lshr i64 [[A:%.]], 32
; CHECK-NEXT: [[CONV1:%.]] = ashr i64 [[B:%.]], 32		; CHECK-NEXT: [[TMP1:%.]] = lshr i64 [[B:%.]], 32
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]]		; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[ADD]], -2147483648		; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 -2147483648		; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP2]], i32 [[TMP3]])
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[SPEC_STORE_SELECT]], 2147483647		; CHECK-NEXT: ret i32 [[TMP4]]
; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 2147483647
; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32
; CHECK-NEXT: ret i32 [[CONV7]]
;		;
entry:		entry:
%conv = ashr i64 %a, 32		%conv = ashr i64 %a, 32
%conv1 = ashr i64 %b, 32		%conv1 = ashr i64 %b, 32
%add = add i64 %conv1, %conv		%add = add i64 %conv1, %conv
%0 = icmp sgt i64 %add, -2147483648		%0 = icmp sgt i64 %add, -2147483648
%spec.store.select = select i1 %0, i64 %add, i64 -2147483648		%spec.store.select = select i1 %0, i64 %add, i64 -2147483648
%1 = icmp slt i64 %spec.store.select, 2147483647		%1 = icmp slt i64 %spec.store.select, 2147483647
Show All 26 Lines	entry:
%conv7 = trunc i64 %spec.store.select8 to i32		%conv7 = trunc i64 %spec.store.select8 to i32
ret i32 %conv7		ret i32 %conv7
}		}

define i32 @ashrA33(i64 %a, i32 %b) {		define i32 @ashrA33(i64 %a, i32 %b) {
; CHECK-LABEL: @ashrA33(		; CHECK-LABEL: @ashrA33(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.]] = ashr i64 [[A:%.]], 33		; CHECK-NEXT: [[CONV:%.]] = ashr i64 [[A:%.]], 33
; CHECK-NEXT: [[CONV1:%.]] = sext i32 [[B:%.]] to i64		; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[CONV]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV]], [[CONV1]]		; CHECK-NEXT: [[TMP1:%.]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[B:%.]])
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[ADD]], -2147483648		; CHECK-NEXT: ret i32 [[TMP1]]
; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 -2147483648
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[SPEC_STORE_SELECT]], 2147483647
; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 2147483647
; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32
; CHECK-NEXT: ret i32 [[CONV7]]
;		;
entry:		entry:
%conv = ashr i64 %a, 33		%conv = ashr i64 %a, 33
%conv1 = sext i32 %b to i64		%conv1 = sext i32 %b to i64
%add = add i64 %conv1, %conv		%add = add i64 %conv1, %conv
%0 = icmp sgt i64 %add, -2147483648		%0 = icmp sgt i64 %add, -2147483648
%spec.store.select = select i1 %0, i64 %add, i64 -2147483648		%spec.store.select = select i1 %0, i64 %add, i64 -2147483648
%1 = icmp slt i64 %spec.store.select, 2147483647		%1 = icmp slt i64 %spec.store.select, 2147483647
Show All 25 Lines	entry:
%spec.store.select8 = select <2 x i1> %1, <2 x i16> %spec.store.select, <2 x i16> <i16 127, i16 127>		%spec.store.select8 = select <2 x i1> %1, <2 x i16> %spec.store.select, <2 x i16> <i16 127, i16 127>
%conv7 = trunc <2 x i16> %spec.store.select8 to <2 x i8>		%conv7 = trunc <2 x i16> %spec.store.select8 to <2 x i8>
ret <2 x i8> %conv7		ret <2 x i8> %conv7
}		}

define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) {		define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) {
; CHECK-LABEL: @ashrv2i8_s(		; CHECK-LABEL: @ashrv2i8_s(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.]] = ashr <2 x i16> [[A:%.]], <i16 8, i16 8>		; CHECK-NEXT: [[TMP0:%.]] = lshr <2 x i16> [[A:%.]], <i16 8, i16 8>
; CHECK-NEXT: [[CONV1:%.]] = sext <2 x i8> [[B:%.]] to <2 x i16>		; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[TMP0]] to <2 x i8>
; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i16> [[CONV]], [[CONV1]]		; CHECK-NEXT: [[TMP2:%.]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP1]], <2 x i8> [[B:%.]])
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <2 x i16> [[ADD]], <i16 -128, i16 -128>		; CHECK-NEXT: ret <2 x i8> [[TMP2]]
; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select <2 x i1> [[TMP0]], <2 x i16> [[ADD]], <2 x i16> <i16 -128, i16 -128>
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i16> [[SPEC_STORE_SELECT]], <i16 127, i16 127>
; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> <i16 127, i16 127>
; CHECK-NEXT: [[CONV7:%.*]] = trunc <2 x i16> [[SPEC_STORE_SELECT8]] to <2 x i8>
; CHECK-NEXT: ret <2 x i8> [[CONV7]]
;		;
entry:		entry:
%conv = ashr <2 x i16> %a, <i16 8, i16 8>		%conv = ashr <2 x i16> %a, <i16 8, i16 8>
%conv1 = sext <2 x i8> %b to <2 x i16>		%conv1 = sext <2 x i8> %b to <2 x i16>
%add = add <2 x i16> %conv1, %conv		%add = add <2 x i16> %conv1, %conv
%0 = icmp sgt <2 x i16> %add, <i16 -128, i16 -128>		%0 = icmp sgt <2 x i16> %add, <i16 -128, i16 -128>
%spec.store.select = select <2 x i1> %0, <2 x i16> %add, <2 x i16> <i16 -128, i16 -128>		%spec.store.select = select <2 x i1> %0, <2 x i16> %add, <2 x i16> <i16 -128, i16 -128>
%1 = icmp slt <2 x i16> %spec.store.select, <i16 127, i16 127>		%1 = icmp slt <2 x i16> %spec.store.select, <i16 127, i16 127>
%spec.store.select8 = select <2 x i1> %1, <2 x i16> %spec.store.select, <2 x i16> <i16 127, i16 127>		%spec.store.select8 = select <2 x i1> %1, <2 x i16> %spec.store.select, <2 x i16> <i16 127, i16 127>
%conv7 = trunc <2 x i16> %spec.store.select8 to <2 x i8>		%conv7 = trunc <2 x i16> %spec.store.select8 to <2 x i8>
ret <2 x i8> %conv7		ret <2 x i8> %conv7
}		}

define i16 @or(i8 %X, i16 %Y) {		define i16 @or(i8 %X, i16 %Y) {
; CHECK-LABEL: @or(		; CHECK-LABEL: @or(
; CHECK-NEXT: [[CONV10:%.]] = sext i8 [[X:%.]] to i16		; CHECK-NEXT: [[TMP1:%.]] = trunc i16 [[Y:%.]] to i8
; CHECK-NEXT: [[CONV14:%.]] = or i16 [[Y:%.]], -16		; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], -16
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i16 [[CONV10]], [[CONV14]]		; CHECK-NEXT: [[TMP3:%.]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.]], i8 [[TMP2]])
; CHECK-NEXT: [[L9:%.*]] = icmp sgt i16 [[SUB]], -128		; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3]] to i16
; CHECK-NEXT: [[L10:%.*]] = select i1 [[L9]], i16 [[SUB]], i16 -128
; CHECK-NEXT: [[L11:%.*]] = icmp slt i16 [[L10]], 127
; CHECK-NEXT: [[L12:%.*]] = select i1 [[L11]], i16 [[L10]], i16 127
; CHECK-NEXT: ret i16 [[L12]]		; CHECK-NEXT: ret i16 [[L12]]
;		;
%conv10 = sext i8 %X to i16		%conv10 = sext i8 %X to i16
%conv14 = or i16 %Y, 65520		%conv14 = or i16 %Y, 65520
%sub = sub nsw i16 %conv10, %conv14		%sub = sub nsw i16 %conv10, %conv14
%l9 = icmp sgt i16 %sub, -128		%l9 = icmp sgt i16 %sub, -128
%l10 = select i1 %l9, i16 %sub, i16 -128		%l10 = select i1 %l9, i16 %sub, i16 -128
%l11 = icmp slt i16 %l10, 127		%l11 = icmp slt i16 %l10, 127
Show All 28 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Generalize sadd.sat combine to compute sign bits.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 385077

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

llvm/test/Transforms/InstCombine/sadd_sat.ll

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Generalize sadd.sat combine to compute sign bits.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 385077

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

llvm/test/Transforms/InstCombine/sadd_sat.ll

[InstCombine] Generalize sadd.sat combine to compute sign bits.
ClosedPublic