This is an archive of the discontinued LLVM Phabricator instance.

[ValueTracking] Known bits support for unsigned saturating add/sub
ClosedPublic

Authored by nikic on Feb 17 2019, 10:37 AM.

Download Raw Diff

Details

Reviewers

spatel
RKSimon

Commits

rGed3ca9272f83: [ValueTracking] Known bits support for unsigned saturating add/sub
rL355223: [ValueTracking] Known bits support for unsigned saturating add/sub

Summary

This adds known bits support for the unsigned saturating add/sub intrinsics. The procedure is basically:

Compute known bits for a normal add/sub with one extra bit to capture the carry-out.
If known overflow, use the saturation value (all ones or zero).
If known no overflow, use the add/sub result.
If unknown overflow: For additions we can preserve leading ones, because either there is no carry into the ones and they stay, or there is one, the result overflows and all ones is used (which also has leading ones). Additionally we preserve ones in the add result, as we're selecting between the add result and all ones. For subtractions the situation is the same but with zeros instead of ones.

I'm only handling the unsigned case here as it is simpler. It's likely not worthwhile having known bits for the signed cases at all, as we just can't get a lot of information there.

Diff Detail

Event Timeline

nikic created this revision.Feb 17 2019, 10:37 AM

Herald added a project: Restricted Project. · View Herald TranscriptFeb 17 2019, 10:37 AM

Herald added subscribers: llvm-commits, jdoerfert. · View Herald Transcript

spatel added inline comments.Feb 19 2019, 6:59 AM

lib/Analysis/ValueTracking.cpp
1540–1546	This is more accurate than what we currently do in computeOverflowForUnsignedAdd(), right? Can we adjust that existing API to use this method and then call it?

nikic mentioned this in D58593: [ValueTracking] More accurate unsigned add/sub overflow detection.Feb 24 2019, 2:49 AM

Simplify implementation: Remove checks for overflow, relying on InstCombine to handle the always/never overflow cases. Focus on only the maybe overflow case here.

LGTM

This revision is now accepted and ready to land.Feb 28 2019, 3:35 PM

Closed by commit rL355223: [ValueTracking] Known bits support for unsigned saturating add/sub (authored by nikic). · Explain WhyMar 1 2019, 12:06 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

Analysis/

ValueTracking.cpp

43 lines

unittests/

Analysis/

ValueTrackingTest.cpp

118 lines

Diff 187167

lib/Analysis/ValueTracking.cpp

Show First 20 Lines • Show All 1,520 Lines • ▼ Show 20 Lines	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);		computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);

Known.Zero =		Known.Zero =
Known2.Zero.shl(ShiftAmt) \| Known3.Zero.lshr(BitWidth - ShiftAmt);		Known2.Zero.shl(ShiftAmt) \| Known3.Zero.lshr(BitWidth - ShiftAmt);
Known.One =		Known.One =
Known2.One.shl(ShiftAmt) \| Known3.One.lshr(BitWidth - ShiftAmt);		Known2.One.shl(ShiftAmt) \| Known3.One.lshr(BitWidth - ShiftAmt);
break;		break;
}		}
		case Intrinsic::uadd_sat:
		case Intrinsic::usub_sat: {
		computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
		computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);

		unsigned LeadingOnes = std::max(Known.countMinLeadingOnes(),
		Known2.countMinLeadingOnes());
		unsigned LeadingZeros = std::max(Known.countMinLeadingZeros(),
		Known2.countMinLeadingZeros());

		// Perform the operation with one extra bit for overflow.
		bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat;
		KnownBits ExtLHS = Known.zext(BitWidth + 1);
		ExtLHS.Zero.setSignBit();
		KnownBits ExtRHS = Known2.zext(BitWidth + 1);
		ExtRHS.Zero.setSignBit();
		KnownBits ExtRes = KnownBits::computeForAddSub(
		IsAdd, /* NSW */ false, ExtLHS, ExtRHS);
		spatelUnsubmitted Not Done Reply Inline Actions This is more accurate than what we currently do in computeOverflowForUnsignedAdd(), right? Can we adjust that existing API to use this method and then call it? spatel: This is more accurate than what we currently do in computeOverflowForUnsignedAdd(), right? Can…

		if (ExtRes.isNonNegative()) {
		// Operation never overflows, use operation known bits.
		Known = ExtRes.trunc(BitWidth);
		} else if (ExtRes.isNegative()) {
		// Operation always overflows, saturate to all ones / zeros.
		if (IsAdd)
		Known.setAllOnes();
		else
		Known.setAllZero();
		} else {
		// Operation may overflow. For add: Known leading ones of the inputs
		// are preserved. Furthermore one bits in the add result are known,
		// as we select against an all-ones value. For sub: Same with zeros.
		Known = ExtRes.trunc(BitWidth);
		if (IsAdd) {
		Known.One.setHighBits(LeadingOnes);
		Known.Zero.clearAllBits();
		} else {
		Known.Zero.setHighBits(LeadingZeros);
		Known.One.clearAllBits();
		}
		}
		break;
		}
case Intrinsic::x86_sse42_crc32_64_64:		case Intrinsic::x86_sse42_crc32_64_64:
Known.Zero.setBitsFrom(32);		Known.Zero.setBitsFrom(32);
break;		break;
}		}
}		}
break;		break;
case Instruction::ExtractElement:		case Instruction::ExtractElement:
// Look through extract element. At the moment we keep this simple and skip		// Look through extract element. At the moment we keep this simple and skip
▲ Show 20 Lines • Show All 3,918 Lines • Show Last 20 Lines

unittests/Analysis/ValueTrackingTest.cpp

Show First 20 Lines • Show All 609 Lines • ▼ Show 20 Lines	parseAssembly(
" %aaa = or i16 %aa, 3840\n"		" %aaa = or i16 %aa, 3840\n"
" %bbb = or i16 %bb, 3840\n"		" %bbb = or i16 %bb, 3840\n"
" %A = call i16 @llvm.fshl.i16(i16 %aaa, i16 %bbb, i16 0)\n"		" %A = call i16 @llvm.fshl.i16(i16 %aaa, i16 %bbb, i16 0)\n"
" ret i16 %A\n"		" ret i16 %A\n"
"}\n"		"}\n"
"declare i16 @llvm.fshl.i16(i16, i16, i16)\n");		"declare i16 @llvm.fshl.i16(i16, i16, i16)\n");
expectKnownBits(/zero/ 15u, /one/ 3840u);		expectKnownBits(/zero/ 15u, /one/ 3840u);
}		}

		TEST_F(ComputeKnownBitsTest, ComputeKnownUAddSatLeadingOnes) {
		// uadd.sat(1111...1, ........)
		// = 1111....
		parseAssembly(
		"define i8 @test(i8 %a, i8 %b) {\n"
		" %aa = or i8 %a, 241\n"
		" %A = call i8 @llvm.uadd.sat.i8(i8 %aa, i8 %b)\n"
		" ret i8 %A\n"
		"}\n"
		"declare i8 @llvm.uadd.sat.i8(i8, i8)\n");
		expectKnownBits(/zero/ 0u, /one/ 240u);
		}

		TEST_F(ComputeKnownBitsTest, ComputeKnownUAddSatAlwaysOverflows) {
		// uadd.sat(1111...., ...1....)
		// = 11111111
		parseAssembly(
		"define i8 @test(i8 %a, i8 %b) {\n"
		" %aa = or i8 %a, 240\n"
		" %bb = or i8 %b, 16\n"
		" %A = call i8 @llvm.uadd.sat.i8(i8 %aa, i8 %bb)\n"
		" ret i8 %A\n"
		"}\n"
		"declare i8 @llvm.uadd.sat.i8(i8, i8)\n");
		expectKnownBits(/zero/ 0u, /one/ 255u);
		}

		TEST_F(ComputeKnownBitsTest, ComputeKnownUAddSatNeverOverflows) {
		// uadd.sat(00...011, .0...110)
		// = .....001
		parseAssembly(
		"define i8 @test(i8 %a, i8 %b) {\n"
		" %aa = or i8 %a, 3\n"
		" %aaa = and i8 %aa, 59\n"
		" %bb = or i8 %b, 6\n"
		" %bbb = and i8 %bb, 190\n"
		" %A = call i8 @llvm.uadd.sat.i8(i8 %aaa, i8 %bbb)\n"
		" ret i8 %A\n"
		"}\n"
		"declare i8 @llvm.uadd.sat.i8(i8, i8)\n");
		expectKnownBits(/zero/ 6u, /one/ 1u);
		}

		TEST_F(ComputeKnownBitsTest, ComputeKnownUAddSatOnesPreserved) {
		// uadd.sat(00...011, .1...110)
		// = .......1
		parseAssembly(
		"define i8 @test(i8 %a, i8 %b) {\n"
		" %aa = or i8 %a, 3\n"
		" %aaa = and i8 %aa, 59\n"
		" %bb = or i8 %b, 70\n"
		" %bbb = and i8 %bb, 254\n"
		" %A = call i8 @llvm.uadd.sat.i8(i8 %aaa, i8 %bbb)\n"
		" ret i8 %A\n"
		"}\n"
		"declare i8 @llvm.uadd.sat.i8(i8, i8)\n");
		expectKnownBits(/zero/ 0u, /one/ 1u);
		}

		TEST_F(ComputeKnownBitsTest, ComputeKnownUSubSatLeadingZeros) {
		// usub.sat(0000...0, ........)
		// = 0000....
		parseAssembly(
		"define i8 @test(i8 %a, i8 %b) {\n"
		" %aa = and i8 %a, 14\n"
		" %A = call i8 @llvm.usub.sat.i8(i8 %aa, i8 %b)\n"
		" ret i8 %A\n"
		"}\n"
		"declare i8 @llvm.usub.sat.i8(i8, i8)\n");
		expectKnownBits(/zero/ 240u, /one/ 0u);
		}

		TEST_F(ComputeKnownBitsTest, ComputeKnownUSubSatAlwaysOverflows) {
		// usub.sat(0000...., ...1....)
		// = 00000000
		parseAssembly(
		"define i8 @test(i8 %a, i8 %b) {\n"
		" %aa = and i8 %a, 15\n"
		" %bb = or i8 %b, 16\n"
		" %A = call i8 @llvm.usub.sat.i8(i8 %aa, i8 %bb)\n"
		" ret i8 %A\n"
		"}\n"
		"declare i8 @llvm.usub.sat.i8(i8, i8)\n");
		expectKnownBits(/zero/ 255u, /one/ 0u);
		}

		TEST_F(ComputeKnownBitsTest, ComputeKnownUSubSatNeverOverflows) {
		// usub.sat(11...011, .0...110)
		// = .....101
		parseAssembly(
		"define i8 @test(i8 %a, i8 %b) {\n"
		" %aa = or i8 %a, 195\n"
		" %aaa = and i8 %aa, 251\n"
		" %bb = or i8 %b, 6\n"
		" %bbb = and i8 %bb, 190\n"
		" %A = call i8 @llvm.usub.sat.i8(i8 %aaa, i8 %bbb)\n"
		" ret i8 %A\n"
		"}\n"
		"declare i8 @llvm.usub.sat.i8(i8, i8)\n");
		expectKnownBits(/zero/ 2u, /one/ 5u);
		}

		TEST_F(ComputeKnownBitsTest, ComputeKnownUSubSatZerosPreserved) {
		// usub.sat(11...011, .1...110)
		// = ......0.
		parseAssembly(
		"define i8 @test(i8 %a, i8 %b) {\n"
		" %aa = or i8 %a, 195\n"
		" %aaa = and i8 %aa, 251\n"
		" %bb = or i8 %b, 70\n"
		" %bbb = and i8 %bb, 254\n"
		" %A = call i8 @llvm.usub.sat.i8(i8 %aaa, i8 %bbb)\n"
		" ret i8 %A\n"
		"}\n"
		"declare i8 @llvm.usub.sat.i8(i8, i8)\n");
		expectKnownBits(/zero/ 2u, /one/ 0u);
		}