This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
include/llvm/Support/
-
llvm/
-
Support/
28/28
LEB128.h
-
utils/TableGen/
-
TableGen/
-
FixedLenDecoderEmitter.cpp

Differential D78796

[Support] Refactor LEB128 encoding into an input iterator
AcceptedPublic

Authored by nlguillemot on Apr 23 2020, 11:26 PM.

Download Raw Diff

Details

Reviewers

bogner
qcolombet
fhahn
rtereshin
dsanders

Summary

Refactors the logic for LEB128 encoding into a std iterator style input
iterator. This allows the output of LEB128 encoding to be passed into std
algorithms. This allowed refactoring the existing LEB128 encoding
functions to become syntactical sugar for calls to std::copy, and
this patch updates some existing uses of LEB128 encoding to take
advantage of the iterator APIs to write cleaner code. Also, To help separate
the iterator syntactical sugar from the LEB128 logic, the contents of the
inner loop of LEB128 encoding was factored out into a separate function.

Diff Detail

Event Timeline

nlguillemot created this revision.Apr 23 2020, 11:26 PM

nlguillemot added a parent revision: D78795: [Support] Add raw_ostream_iterator: ostream_iterator for raw_ostream..

nlguillemot added a child revision: D78797: [Support] Refactor LEB128 decoding into an output iterator.Apr 23 2020, 11:30 PM

The end goal of this series of patches is to support encoding/decoding APInt to/from [U|S]LEB128. This patch is an initial step in that direction. It refactors the logic to avoid code duplication and makes the interface to LEB128 encoding much more generic, as shown by the existing cases becoming very short.

In a later patch, I'd like to do some template magic to support LEB128InputIterator<APInt> in a way that tries to share code with the uint64_t/int64_t implementations.

Refactored the encode functions more aggressively by factoring out the common logic for writing to an array or to a raw_ostream:

diff
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 729ee5ca745..f65dc919e39 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -168,43 +168,52 @@ public:
   ///@}
 };
 
+/// Utility function to encode a SLEB128 or ULEB128 value to a buffer. Returns
+/// the length in bytes of the encoded value.
+template <class ValueT>
+unsigned encodeLEB128(const ValueT &Value, bool IsSigned, uint8_t *p,
+                      unsigned PadTo = 0) {
+  uint8_t *orig_p = p;
+  p = std::copy(LEB128InputIterator<ValueT>(Value, IsSigned, PadTo),
+                LEB128InputIterator<ValueT>(), p);
+  return (unsigned)(p - orig_p);
+}
+
+/// Utility function to encode a SLEB128 or ULEB128 value to an output stream.
+/// Returns the length in bytes of the encoded value.
+template <class ValueT>
+inline unsigned encodeLEB128(const ValueT &Value, bool IsSigned,
+                             raw_ostream &OS, unsigned PadTo = 0) {
+  uint64_t TellBefore = OS.tell();
+  std::copy(LEB128InputIterator<ValueT>(Value, IsSigned, PadTo),
+            LEB128InputIterator<ValueT>(), raw_ostream_iterator<uint8_t>(OS));
+  return (unsigned)(OS.tell() - TellBefore);
+}
+
 /// Utility function to encode a SLEB128 value to an output stream. Returns
 /// the length in bytes of the encoded value.
 inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS,
                               unsigned PadTo = 0) {
-  uint64_t TellBefore = OS.tell();
-  std::copy(LEB128InputIterator<int64_t>(Value, /* IsSigned */ true, PadTo),
-            LEB128InputIterator<int64_t>(), raw_ostream_iterator<uint8_t>(OS));
-  return (unsigned)(OS.tell() - TellBefore);
+  return encodeLEB128(Value, /* IsSigned */ true, OS, PadTo);
 }

 /// Utility function to encode a SLEB128 value to a buffer. Returns
 /// the length in bytes of the encoded value.
 inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) {
-  uint8_t *orig_p = p;
-  p = std::copy(LEB128InputIterator<int64_t>(Value, /* IsSigned */ true, PadTo),
-                LEB128InputIterator<int64_t>(), p);
-  return (unsigned)(p - orig_p);
+  return encodeLEB128(Value, /* IsSigned */ true, p, PadTo);
 }

 /// Utility function to encode a ULEB128 value to an output stream. Returns
 /// the length in bytes of the encoded value.
 inline unsigned encodeULEB128(uint64_t Value, raw_ostream &OS,
                               unsigned PadTo = 0) {
-  uint64_t TellBefore = OS.tell();
-  std::copy(LEB128InputIterator<uint64_t>(Value, /* IsSigned */ false, PadTo),
-            LEB128InputIterator<uint64_t>(), raw_ostream_iterator<uint8_t>(OS));
-  return (unsigned)(OS.tell() - TellBefore);
+  return encodeLEB128(Value, /* IsSigned */ false, OS, PadTo);
 }

 /// Utility function to encode a ULEB128 value to a buffer. Returns
 /// the length in bytes of the encoded value.
 inline unsigned encodeULEB128(uint64_t Value, uint8_t *p, unsigned PadTo = 0) {
-  uint8_t *orig_p = p;
-  p = std::copy(
-      LEB128InputIterator<uint64_t>(Value, /* IsSigned */ false, PadTo),
-      LEB128InputIterator<uint64_t>(), p);
-  return (unsigned)(p - orig_p);
+  return encodeLEB128(Value, /* IsSigned */ false, p, PadTo);
 }

 /// Utility function to decode a ULEB128 value.

Removed unnecessary inline

diff
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 2549ff86ccd..8c70b41d27c 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -182,8 +182,8 @@ unsigned encodeLEB128(const ValueT &Value, bool IsSigned, uint8_t *p,
 /// Utility function to encode a SLEB128 or ULEB128 value to an output stream.
 /// Returns the length in bytes of the encoded value.
 template <class ValueT>
-inline unsigned encodeLEB128(const ValueT &Value, bool IsSigned,
-                             raw_ostream &OS, unsigned PadTo = 0) {
+unsigned encodeLEB128(const ValueT &Value, bool IsSigned, raw_ostream &OS,
+                      unsigned PadTo = 0) {
   uint64_t TellBefore = OS.tell();
   std::copy(LEB128InputIterator<ValueT>(Value, IsSigned, PadTo),
             LEB128InputIterator<ValueT>(), raw_ostream_iterator<uint8_t>(OS));

nlguillemot marked an inline comment as done.Apr 24 2020, 8:59 PM

nlguillemot added inline comments.

llvm/include/llvm/Support/LEB128.h
116	Self-review: This comment is wrong, it's not always ZExt. Whether the padding is ZExt or SExt depends on IsSigned.

Updated comments about zext to include a mention of sext.

diff
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -35,7 +35,7 @@ template <class ValueT> class LEB128InputIterator {
   /// Whether there will be more output after the previously outputted byte.
   bool More;

-  /// The output will be zext-ed to this number of bytes if necessary.
+  /// The output will be sext-ed/zext-ed to this number of bytes if necessary.
   unsigned PadTo;

   /// The current number of outputted bytes.
@@ -87,8 +87,9 @@ public:
   /// If IsSigned is true, then it encodes as SLEB128. If it's false, it encodes
   /// as ULEB128.
   ///
-  /// \param PadTo ZExt the output to this number of bytes if fewer than this
-  /// number of bytes have been outputted.
+  /// \param PadTo Pads the output to this number of bytes if fewer than this
+  /// number of bytes have been outputted. If IsSigned is true, then the padding
+  /// is sign-extended. If IsSigned is false, then it's zero-extended.
   explicit LEB128InputIterator(ValueT Value, bool IsSigned, unsigned PadTo)
       : IsEnd(false), Value(std::move(Value)), IsSigned(IsSigned), PadTo(PadTo),
         Count(0) {

nlguillemot added a reviewer: rtereshin.Apr 26 2020, 11:35 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 26 2020, 11:35 AM

dblaikie added a subscriber: dblaikie.Apr 27 2020, 10:42 PM

I'm not sure this is worth a full iterator abstraction - would the uses be that much the worse for it if they had a non-iterator type they had to iterate manually & pull values from? A more simplified iterator abstraction, essentially:

ULEBifier U(Value, IsSigned, PadTo);
while (Optional<char> C = U.next())
  OS.write(*C);

Or something like that.

In D78796#2007033, @dblaikie wrote:
I'm not sure this is worth a full iterator abstraction - would the uses be that much the worse for it if they had a non-iterator type they had to iterate manually & pull values from? A more simplified iterator abstraction, essentially:
ULEBifier U(Value, IsSigned, PadTo);
while (Optional<char> C = U.next())
  OS.write(*C);
Or something like that.

I understand that the std iterator design is not the simplest. It needs a bunch of boilerplate, and the STL iterator APIs have their own quirks that might increase the mental burden for users. With these cons in mind, let me try to justify why I think this design is the right direction.

The main advantage of using the iterator interface is to reuse code in std::/llvm::. For example, in FixedLenDecoderEmitter.cpp, there's the following code:

// Encode and emit the value to filter against.
uint8_t Buffer[16];
unsigned Len = encodeULEB128(Filter.first, Buffer);
Table.insert(Table.end(), Buffer, Buffer + Len);

With the iterator style interface, this turns into a "one-liner" (ok it's really three lines with formatting but hey...) :

// Encode and emit the value to filter against.
std::copy(
  LEB128InputIterator<unsigned>(Filter.first, /* IsSigned */ false, /* PadTo */ 0),
  LEBInputIterator<unsigned>(), std::back_inserter(Table));

At this point, we could simplify it further by make further syntactical improvements:

Make a utility function like makeULEB128InputIterator() to deduce the ValueT template argument, to avoid explicitly passing IsSigned, and to have PadTo = 0 as a default value.
Make a utility function like makeULEB128InputRange() that uses llvm::make_range() to automatically package the end iterator with the begin iterator.

If we had these further improvements, we could then use llvm::copy from STLExtras to get the following code, which is finally truly a "one-liner":

// Encode and emit the value to filter against.
copy(makeULEB128InputRange(Filter.first), std::back_inserter(Table));

This shows that we can use this API to simplify existing code, and we can also reuse existing code in std:: and llvm:: to simplify it further. The standard iterator interface is what enables this.

The advantage of being able to reuse code in std::/llvm:: is also demonstrated in this patch itself: The existing implementations were reduced to "one-liners" of std::copy that only differ by the output iterator.

Another point: If somebody wants to write a loop in the style that you showed with ULEBifier, that can also be done with this interface (though the example below could be improved by using prefix increment):

LEB128InputIterator<unsigned> U(Value, IsSigned, PadTo);
while (U != LEBInputIterator<unsigned>())
  OS.write(*U++);

If we can express the same code with both interface designs, then I think we might as well use the more general design that allows us to build on top of the existing std::/llvm:: algorithms. I think it would be particularly unfortunate if we started with something like ULEBifier, then later we added an iterator style wrapper for it anyways, since that would create code duplication and redundant APIs.

I'm inclined to agree that the patch series as-is doesn't really warrant the iterators as the interface as no callers have been updated. However, I also don't see much that's iterator specific (ULEBifier would be roughly similar code leaving the iterator portion as trivial wrappers on the ULEBifier) and there are a few places (particularly in tablegen) that are emitting LEB's into containers where the loop to add bytes one by one is just noise and something like std::copy(to_uleb(...), std::back_inserter(Table)); would be somewhat nice. The loop could easily be hidden in something like append_uleb(Table, ...) though I don't think there's a strong argument for (or against) iterators.

What I would suggest is separating out the byte-sequence generation into a ULEBifier as David suggested but still keeping the iterator object as a thin adapter (effectively implementing that while loop) to support inter-operation with STL functions like std::copy.

llvm/include/llvm/Support/LEB128.h
24–28	Would it make sense to have a RemainingBytes that counts down to zero? Is there a reason to keep PadTo and Count separate?
25–26	We could fold this into Optional<uint8_t> CurrByte
31–32	It may be worth mentioning that this is needed for types that don't carry signedness like APInt. unsigned/int/etc. wouldn't need it
34–35	This is misleading as the iterator can produce more bytes after this becomes false. I think it's also unnecessary as each time encodeNextByte() shifts Value right by 7 it's bringing in the correct padding bits at the top. We could just keep reading from Value for the padding bytes.
36	Is testing this on each byte measurably slower? It seems unlikely but this is called a lot and I notice that the previous code didn't do it. If it does, it would be good if we can have the template instantiation pick one side or the other
147–162	I think this belongs inside encodeNextByte(). This operator should essentially be something like: assert(CurrByte.hasValue() && "operator++() called on past-the-end LEB128InputIterator"); CurrByte = encodeNextByte(); return this; where CurrByte is Optional<uint8_t>. As noted in another comment, I think special-casing the padding bytes isn't really needed.
181–183	I see no harm in allowing two past-the-end iterators from different sequences to be equal but I wonder if it's necessary/useful. With the ULEBifier object it could give you an end iterator and then this operator would be a plain equality comparison.

In D78796#2025650, @dsanders wrote:

I'm inclined to agree that the patch series as-is doesn't really warrant the iterators as the interface as no callers have been updated. However, I also don't see much that's iterator specific (ULEBifier would be roughly similar code leaving the iterator portion as trivial wrappers on the ULEBifier) and there are a few places (particularly in tablegen) that are emitting LEB's into containers where the loop to add bytes one by one is just noise and something like std::copy(to_uleb(...), std::back_inserter(Table)); would be somewhat nice. The loop could easily be hidden in something like append_uleb(Table, ...) though I don't think there's a strong argument for (or against) iterators.

What I would suggest is separating out the byte-sequence generation into a ULEBifier as David suggested but still keeping the iterator object as a thin adapter (effectively implementing that while loop) to support inter-operation with STL functions like std::copy.

What is the core issue with the iterator interface that makes it desirable to have something like ULEBifier<T> instead?

If it's a matter of user interface design, then I would rather implement ULEBifier<T> in terms of the iterator, since the iterator is more generic.

If it's a matter of separating the iterator-specific boilerplate from the logic, then I would rather do that by refactoring the implementation to put the core logic in private member functions. That would make it more clear to see what is iterator boilerplate and what is not.

In either case, I would rather not have 2 APIs that do the same thing, for the sake of consistency in the codebase and to avoid code duplication.

In D78796#2025835, @nlguillemot wrote:

In D78796#2025650, @dsanders wrote:

I'm inclined to agree that the patch series as-is doesn't really warrant the iterators as the interface as no callers have been updated. However, I also don't see much that's iterator specific (ULEBifier would be roughly similar code leaving the iterator portion as trivial wrappers on the ULEBifier) and there are a few places (particularly in tablegen) that are emitting LEB's into containers where the loop to add bytes one by one is just noise and something like std::copy(to_uleb(...), std::back_inserter(Table)); would be somewhat nice. The loop could easily be hidden in something like append_uleb(Table, ...) though I don't think there's a strong argument for (or against) iterators.

What I would suggest is separating out the byte-sequence generation into a ULEBifier as David suggested but still keeping the iterator object as a thin adapter (effectively implementing that while loop) to support inter-operation with STL functions like std::copy.

What is the core issue with the iterator interface that makes it desirable to have something like ULEBifier<T> instead?

For me it's that iterators reference and indirect into the elements of a container. They shouldn't be the container themselves

In D78796#2026009, @dsanders wrote:

In D78796#2025835, @nlguillemot wrote:

In D78796#2025650, @dsanders wrote:

I'm inclined to agree that the patch series as-is doesn't really warrant the iterators as the interface as no callers have been updated. However, I also don't see much that's iterator specific (ULEBifier would be roughly similar code leaving the iterator portion as trivial wrappers on the ULEBifier) and there are a few places (particularly in tablegen) that are emitting LEB's into containers where the loop to add bytes one by one is just noise and something like std::copy(to_uleb(...), std::back_inserter(Table)); would be somewhat nice. The loop could easily be hidden in something like append_uleb(Table, ...) though I don't think there's a strong argument for (or against) iterators.

What I would suggest is separating out the byte-sequence generation into a ULEBifier as David suggested but still keeping the iterator object as a thin adapter (effectively implementing that while loop) to support inter-operation with STL functions like std::copy.

What is the core issue with the iterator interface that makes it desirable to have something like ULEBifier<T> instead?

For me it's that iterators reference and indirect into the elements of a container. They shouldn't be the container themselves

If we think of an integer as a container of bits then it's not that different from a normal iterator. In this case we make a copy of the input because it's convenient and makes the interface simpler to use, but we could refactor the code to avoid mutating a copy of the input if that's important.

Hey Nicolas, just add some thoughts on this patch.

llvm/include/llvm/Support/LEB128.h
36	`IsSigned` could easily be changed to a template parameter. Could then turn this is into `if constexpr (IsSigned)` (if you can use C++17), or just SFINAE it. Better yet, use [[ https://en.cppreference.com/w/cpp/types/is_signed \| `is_signed` type trait ]] on `ValueT`.
106	This constructor leaves other data members uninitialized which may be fine for an end iterator, but is a code smell C.41.
119–121	Would be a good idea to initialize `More` in the initializer list just in case something changes in the future and `More` is read before being initially assigned to in `encodeNextByte`. Could also apply to `CurrByte`...
158	If `PadTo` is 0, may have unintentional underflow since it is `unsigned` (this may be you intention though, and if so it's worth a comment). Seeing it a lot in the other code. Maybe underflow is part of the plan here?

Thanks for the comments. Added some replies.

llvm/include/llvm/Support/LEB128.h
24–28	For context, the `Count` and `PadTo` variables are there to match the original implementation of the code. Maybe could use a refactoring.
34–35	his is misleading as the iterator can produce more bytes after this becomes false. For context, I named it that way because that's how it was named in the original implementation of the code. I agree it could be better.
36	Last I heard, we have to support C++14, so no `if constexpr` fanciness allowed. :( Using `is_signed` would work for types like `uint64_t` and `int64_t`, but not `APInt`, so we somehow need to figure out how to handle that case. Seems like there's general agreement that it might as well be a template parameter though.
106	I don't think this breaks that core guideline, since the invariants are set and the object is totally usable, though there should probably be an assert for `!IsEnd` at the start of `operator++` to enforce the API requirements. On the other hand I agree it might be good to initialize the other members anyways, just to be on the safe side, and to avoid having a copy constructor that reads uninitialized memory. The alternative is to hand-write the copy constructor to avoid copying the other fields if `IsEnd` is set, which sounds ugly and less maintainable.
158	If `PadTo` is 0 then it's impossible for the check above of `if (Count < PadTo)` to pass, so we would never hit this line of code. This code should be safe at least. If there are other cases we should double-check them as well.
181–183	The same design issue also happens with existing std input iterators like `std::istream_iterator`, so I think it's acceptable even if slightly odd.

Sorry for the delay coming back to this.

If there's sufficient agreement/justification/push to have an iterator interface, I think it'd be OK/maybe better to just have that, rather than the two - in either wrapping order. I do appreciate the "range-like" object (ULEBifier or whatever it's called) to make it easier to use with range-based algorithms, etc.

Address various review feedback (see relevant comment threads).
Made IsSigned a template parameter.
Moved the core logic of LEB128 encoding into its own function to better separate it from the syntactical sugar.
Added some convenience functions for creating LEB128 input iterator ranges.
Used these new input iterator ranges to simplify some code in FixedLenDecoderEmitter.cpp.

Herald added a subscriber: dexonsmith. · View Herald TranscriptFeb 3 2021, 5:19 PM

Some tweaks and addressing some more review comments.

nlguillemot marked an inline comment as done.Feb 3 2021, 5:34 PM

nlguillemot added inline comments.

llvm/include/llvm/Support/LEB128.h
24–28	I tried implementing `RemainingBytes` instead of `PadTo`/`Count`, but the problem is that we don't actually know how many bytes are remaining because the algorithm encodes LEB128 bytes one-by-one. `PadTo` is usually equal to 0 which results in having no padding.
25–26	Implement in the latest update: Removed the `IsEnd` member and turned `CurrByte` into an `Optional` instead, where `CurrByte == None` means the same thing as `IsEnd == true` did.
31–32	Added a mention of this in the comments.
34–35	Updated the name of `More` to make more sense and slightly simplified the logic surrounding it. Also the padding bits now come from the 7 least significant bits like you suggested.
36	`IsSigned` is now a template parameter.
106	All members are now initialized to sensible values to avoid potential undefined behavior.
119–121	All members are now initialized in all cases.
147–162	Moved the logic inside the core function and now the operator looks like what you described.
158	Avoided any potential underflow by switching the logic to `if (Count + 1 < PadTo)`
181–183	After messing with it for a while I think it's simplest to keep it this way. Mainly because it simplifies the logic under it, since the member-by-member comparison doesn't have to worry about comparing the members of end iterators, or comparing the members of an end iterator with the members of a non-end iterator.

nlguillemot marked an inline comment as done.Feb 3 2021, 5:52 PM

(leaving this for @dsanders to close on whether their feedback has been suitably addressed - feel free to loop me back in if tie breaking or re-approval is required, etc)

LGTM

FWIW, I still feel the encoder state shouldn't live inside the iterator but I am a bit happier that the iterator doesn't own the non-trivial encoder logic anymore. It's more consistent with pointing into a container rather than being the container, even if the pointer is unusually descriptive and the container is really more of a description of all the possible containers disambiguated by references to a specific element. I do wonder why do the logic and not the storage though. All that said, pragmatically I don't really want to insist on that as it's been in review for a very long time now so I think the middle ground is, LGTM as it currently is and if someone needs to use it in a generator-like style at some point they'll be the ones to put detail::encodeLEB128Byte() and the state in an object and have the iterators use that. Similarly, if we grow more places where iterators don't make sense they can either use the existing functions or do the same transformation just described.

This revision is now accepted and ready to land.Feb 8 2021, 3:47 PM

In D78796#2549891, @dsanders wrote:

I do wonder why do the logic and not the storage though.

At some point while working on this I tried putting all the members in a separate struct, but it kinda felt like duplicating code so I decided to leave it as is. Like you say, it might be an interesting refactor.

Revision Contents

Path

Size

llvm/

include/

llvm/

Support/

LEB128.h

303 lines

utils/

TableGen/

FixedLenDecoderEmitter.cpp

43 lines

Diff 321276

llvm/include/llvm/Support/LEB128.h

	Show All 9 Lines
	// ULEB128 values.			// ULEB128 values.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef LLVM_SUPPORT_LEB128_H			#ifndef LLVM_SUPPORT_LEB128_H
	#define LLVM_SUPPORT_LEB128_H			#define LLVM_SUPPORT_LEB128_H

	#include "llvm/Support/raw_ostream.h"			#include "llvm/Support/raw_ostream.h"
				#include <algorithm>
				#include <iterator>

	namespace llvm {			namespace llvm {

	/// Utility function to encode a SLEB128 value to an output stream. Returns			namespace detail {
	/// the length in bytes of the encoded value.			/// Encodes one byte's worth of LEB128 data from the LSB of the given value.
	inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS,			/// This function is used in the inner loop of LEB128 encoding.
	unsigned PadTo = 0) {			///
				dsandersUnsubmitted Done Reply Inline Actions We could fold this into Optional<uint8_t> CurrByte dsanders: We could fold this into Optional<uint8_t> CurrByte
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions Implement in the latest update: Removed the `IsEnd` member and turned `CurrByte` into an `Optional` instead, where `CurrByte == None` means the same thing as `IsEnd == true` did. nlguillemot: Implement in the latest update: Removed the `IsEnd` member and turned `CurrByte` into an…
	bool More;			/// \param IsSigned How to treat the signedness of the value. If IsSigned is
	unsigned Count = 0;			/// true, then it encodes as SLEB128. If it's false, it encodes as ULEB128. This
				dsandersUnsubmitted Done Reply Inline Actions Would it make sense to have a RemainingBytes that counts down to zero? Is there a reason to keep PadTo and Count separate? dsanders: Would it make sense to have a RemainingBytes that counts down to zero? Is there a reason to…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions For context, the `Count` and `PadTo` variables are there to match the original implementation of the code. Maybe could use a refactoring. nlguillemot: For context, the `Count` and `PadTo` variables are there to match the original implementation…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions I tried implementing `RemainingBytes` instead of `PadTo`/`Count`, but the problem is that we don't actually know how many bytes are remaining because the algorithm encodes LEB128 bytes one-by-one. `PadTo` is usually equal to 0 which results in having no padding. nlguillemot: I tried implementing `RemainingBytes` instead of `PadTo`/`Count`, but the problem is that we…
	do {			/// is not particularly useful for types like int64_t/uint64_t since we already
	uint8_t Byte = Value & 0x7f;			/// know the signedness from their types, but it's important to extend LEB128
				/// encoding to types like APInt which don't inherently have a signedness.
				///
				dsandersUnsubmitted Done Reply Inline Actions It may be worth mentioning that this is needed for types that don't carry signedness like APInt. unsigned/int/etc. wouldn't need it dsanders: It may be worth mentioning that this is needed for types that don't carry signedness like APInt.
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions Added a mention of this in the comments. nlguillemot: Added a mention of this in the comments.
				/// \param ValueT The type of the value of the source of the LEB128 conversion.
				///
				/// \param Value The value to convert to LEB128 bytes. Shifted in-place as
				dsandersUnsubmitted Done Reply Inline Actions This is misleading as the iterator can produce more bytes after this becomes false. I think it's also unnecessary as each time encodeNextByte() shifts Value right by 7 it's bringing in the correct padding bits at the top. We could just keep reading from Value for the padding bytes. dsanders: This is misleading as the iterator can produce more bytes after this becomes false. I think…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions his is misleading as the iterator can produce more bytes after this becomes false. For context, I named it that way because that's how it was named in the original implementation of the code. I agree it could be better. nlguillemot: > his is misleading as the iterator can produce more bytes after this becomes false. For…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions Updated the name of `More` to make more sense and slightly simplified the logic surrounding it. Also the padding bits now come from the 7 least significant bits like you suggested. nlguillemot: Updated the name of `More` to make more sense and slightly simplified the logic surrounding it.
				/// bits are consumed from its LSB to create LEB128-encoded bytes.
				dsandersUnsubmitted Done Reply Inline Actions Is testing this on each byte measurably slower? It seems unlikely but this is called a lot and I notice that the previous code didn't do it. If it does, it would be good if we can have the template instantiation pick one side or the other dsanders: Is testing this on each byte measurably slower? It seems unlikely but this is called a lot and…
				mkitzanUnsubmitted Done Reply Inline Actions `IsSigned` could easily be changed to a template parameter. Could then turn this is into `if constexpr (IsSigned)` (if you can use C++17), or just SFINAE it. Better yet, use [[ https://en.cppreference.com/w/cpp/types/is_signed \| `is_signed` type trait ]] on `ValueT`. mkitzan: `IsSigned` could easily be changed to a template parameter. Could then turn this is into `if…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions Last I heard, we have to support C++14, so no `if constexpr` fanciness allowed. :( Using `is_signed` would work for types like `uint64_t` and `int64_t`, but not `APInt`, so we somehow need to figure out how to handle that case. Seems like there's general agreement that it might as well be a template parameter though. nlguillemot: Last I heard, we have to support C++14, so no `if constexpr` fanciness allowed. :( Using…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions `IsSigned` is now a template parameter. nlguillemot: `IsSigned` is now a template parameter.
				///
				/// \param AllValueBitsEncoded Whether there are still more non-padding bits
				/// from the value to encode. Updated in-place when all that's left is padding.
				///
				/// \param Count The number of bytes encoded so far. Updated in-place to account
				/// for newly encoded bytes.
				///
				/// \param PadTo Pads the output to this number of bytes if fewer than this
				/// number of bytes have been outputted. If IsSigned is true, then the padding
				/// is sign-extended. If IsSigned is false, then it's zero-extended.
				///
				/// \return The next LEB128-encoded byte, or None if the encoding is complete.
				template <bool IsSigned, class ValueT>
				Optional<uint8_t> encodeLEB128Byte(ValueT &Value, bool &AllValueBitsEncoded,
				unsigned &Count, unsigned PadTo = 0) {
				// If the value is not done being encoded, then read the next bits from it and
				// encode them as LEB128.
				if (!AllValueBitsEncoded) {
				// Get the next byte from the input value.
				uint8_t CurrByte = Value & 0x7f;
				if (IsSigned) {
				int64_t SValue = static_cast<int64_t>(Value);
	// NOTE: this assumes that this signed shift is an arithmetic right shift.			// NOTE: this assumes that this signed shift is an arithmetic right shift.
	Value >>= 7;			SValue >>= 7;
	More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) \|\|			AllValueBitsEncoded =
	((Value == -1) && ((Byte & 0x40) != 0))));			// The value was positive, all set bits have been encoded, and this
				// byte sets the sign bit to 0, so encoding the value is complete.
				(SValue == 0 && (CurrByte & 0x40) == 0) \|\|
				// The value was negative, only negative sign bits are left, and this
				// byte sets the sign bit to 1, so encoding the value is complete.
				(SValue == -1 && (CurrByte & 0x40) != 0);
				Value = static_cast<ValueT>(SValue);
				} else {
				// Logical right shift.
				uint64_t UValue = static_cast<uint64_t>(Value);
				UValue >>= 7;
				AllValueBitsEncoded = UValue == 0;
				Value = static_cast<ValueT>(UValue);
				}
				if (!AllValueBitsEncoded \|\| Count + 1 < PadTo)
				CurrByte \|= 0x80; // Mark this byte to show that more bytes will follow.
	Count++;			Count++;
	if (More \|\| Count < PadTo)			return CurrByte;
	Byte \|= 0x80; // Mark this byte to show that more bytes will follow.			}
	OS << char(Byte);
	} while (More);

	// Pad with 0x80 and emit a terminating byte at the end.			// If there are no more bytes to read from the value itself, then encode a
				// padding byte next.
	if (Count < PadTo) {			if (Count < PadTo) {
	uint8_t PadValue = Value < 0 ? 0x7f : 0x00;			// Pad with 0s or 1s depending on whether we want to zext or sext.
	for (; Count < PadTo - 1; ++Count)			assert((Value == 0 \|\| static_cast<int64_t>(Value) == -1) &&
	OS << char(PadValue \| 0x80);			"Assuming the leftover Value can be used as signed padding.");
	OS << char(PadValue);			uint8_t PadByte = Value & 0x7f;
				// Add a continuation bit to say that there is more padding after this.
				if (Count + 1 < PadTo)
				PadByte \|= 0x80;
	Count++;			Count++;
				return PadByte;
	}			}
	return Count;
				// At this point, all necessary padding has been added, so there are no
				// more bytes to output.
				return None;
	}			}
				} // namespace detail

	/// Utility function to encode a SLEB128 value to a buffer. Returns			/// Input iterator interface to encode a value as LEB128 bytes.
	/// the length in bytes of the encoded value.			template <bool IsSigned, class ValueT> class LEB128InputIterator {
	inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) {			/// The value that the iterator reads from to encode bits as LEB128.
	uint8_t *orig_p = p;			ValueT Value = ValueT();

				mkitzanUnsubmitted Done Reply Inline Actions This constructor leaves other data members uninitialized which may be fine for an end iterator, but is a code smell C.41. mkitzan: This constructor leaves other data members uninitialized which may be fine for an end iterator…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions I don't think this breaks that core guideline, since the invariants are set and the object is totally usable, though there should probably be an assert for `!IsEnd` at the start of `operator++` to enforce the API requirements. On the other hand I agree it might be good to initialize the other members anyways, just to be on the safe side, and to avoid having a copy constructor that reads uninitialized memory. The alternative is to hand-write the copy constructor to avoid copying the other fields if `IsEnd` is set, which sounds ugly and less maintainable. nlguillemot: I don't think this breaks that core guideline, since the invariants are set and the object is…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions All members are now initialized to sensible values to avoid potential undefined behavior. nlguillemot: All members are now initialized to sensible values to avoid potential undefined behavior.
				/// The output will be sext-ed/zext-ed to this number of bytes if necessary.
				unsigned PadTo = 0;

				/// The number of bytes encoded so far.
	unsigned Count = 0;			unsigned Count = 0;
	bool More;
	do {
	uint8_t Byte = Value & 0x7f;
	// NOTE: this assumes that this signed shift is an arithmetic right shift.
	Value >>= 7;
	More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) \|\|
	((Value == -1) && ((Byte & 0x40) != 0))));
	Count++;
	if (More \|\| Count < PadTo)
	Byte \|= 0x80; // Mark this byte to show that more bytes will follow.
	*p++ = Byte;
	} while (More);

	// Pad with 0x80 and emit a terminating byte at the end.			/// Whether there are still more non-padding bits from the Value to encode.
	if (Count < PadTo) {			bool AllValueBitsEncoded = false;
	uint8_t PadValue = Value < 0 ? 0x7f : 0x00;
	for (; Count < PadTo - 1; ++Count)			/// The current LEB128 encoded byte that this iterator will return.
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions Self-review: This comment is wrong, it's not always ZExt. Whether the padding is ZExt or SExt depends on IsSigned. nlguillemot: Self-review: This comment is wrong, it's not always ZExt. Whether the padding is ZExt or SExt…
	*p++ = (PadValue \| 0x80);			/// If this is None then this object represents an end() iterator.
	*p++ = PadValue;			Optional<uint8_t> CurrByte;

				/// Consumes 7 bits from Value and encodes them as a LEB128 byte in CurrByte.
				void getNextByte() {
				mkitzanUnsubmitted Done Reply Inline Actions Would be a good idea to initialize `More` in the initializer list just in case something changes in the future and `More` is read before being initially assigned to in `encodeNextByte`. Could also apply to `CurrByte`... mkitzan: Would be a good idea to initialize `More` in the initializer list just in case something…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions All members are now initialized in all cases. nlguillemot: All members are now initialized in all cases.
				CurrByte = detail::encodeLEB128Byte<IsSigned, ValueT>(
				Value, AllValueBitsEncoded, Count, PadTo);
				}

				public:
				/// Boilerplate typedefs for C++ iterators.
				///@{
				using iterator_category = std::input_iterator_tag;
				using value_type = uint8_t;
				using difference_type = std::ptrdiff_t;
				using pointer = const uint8_t *;
				using reference = const uint8_t &;
				///@}

				/// Constructs an end() iterator.
				LEB128InputIterator() = default;

				/// Initializes an iterator wrapper for \ref getLEB128Byte.
				explicit LEB128InputIterator(ValueT Value, unsigned PadTo = 0)
				: Value(std::move(Value)), PadTo(PadTo) {
				// Initialize the iterator to the first LEB128-encoded byte.
				getNextByte();
				}

				/// Constructs a copy of \p Other.
				LEB128InputIterator(const LEB128InputIterator &Other) = default;

				/// Get the current LEB128-encoded byte.
				///@{
				const uint8_t &operator*() const {
				assert(CurrByte && "operator*() used on past-the-end LEB128InputIterator");
				return *CurrByte;
				}
				const uint8_t operator->() const { return &operator(); }
				///@}

				/// Increment the iterator to the next LEB128-encoded byte.
				mkitzanUnsubmitted Done Reply Inline Actions If `PadTo` is 0, may have unintentional underflow since it is `unsigned` (this may be you intention though, and if so it's worth a comment). Seeing it a lot in the other code. Maybe underflow is part of the plan here? mkitzan: If `PadTo` is 0, may have unintentional underflow since it is `unsigned` (this may be you…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions If `PadTo` is 0 then it's impossible for the check above of `if (Count < PadTo)` to pass, so we would never hit this line of code. This code should be safe at least. If there are other cases we should double-check them as well. nlguillemot: If `PadTo` is 0 then it's impossible for the check above of `if (Count < PadTo)` to pass, so we…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions Avoided any potential underflow by switching the logic to `if (Count + 1 < PadTo)` nlguillemot: Avoided any potential underflow by switching the logic to `if (Count + 1 < PadTo)`
				///@{
				LEB128InputIterator &operator++() {
				assert(CurrByte && "operator++() used on past-the-end LEB128InputIterator");
				getNextByte();
				dsandersUnsubmitted Done Reply Inline Actions I think this belongs inside encodeNextByte(). This operator should essentially be something like: assert(CurrByte.hasValue() && "operator++() called on past-the-end LEB128InputIterator"); CurrByte = encodeNextByte(); return this; where CurrByte is Optional<uint8_t>. As noted in another comment, I think special-casing the padding bytes isn't really needed. dsanders: I think this belongs inside encodeNextByte(). This operator should essentially be something…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions Moved the logic inside the core function and now the operator looks like what you described. nlguillemot: Moved the logic inside the core function and now the operator looks like what you described.
				return *this;
				}
				LEB128InputIterator operator++(int) {
				LEB128InputIterator Prev = *this;
				operator++();
				return Prev;
				}
				///@}

				/// Checks whether both iterators are equal. Two iterators are equal if both
				/// of them are end() iterators or both of them would generate the same
				/// sequence of outputs.
				///@{
				bool operator==(const LEB128InputIterator &Other) const {
				// Both are end() iterators, so they compare the same.
				// NOTE: This is similar to the design of istream_iterator, and it
				// simplifies the comparison logic below.
				if (!CurrByte && !Other.CurrByte)
				return true;
				// Otherwise, consider two iterators equal if they would generate the same
				// sequence of bytes.
				dsandersUnsubmitted Done Reply Inline Actions I see no harm in allowing two past-the-end iterators from different sequences to be equal but I wonder if it's necessary/useful. With the ULEBifier object it could give you an end iterator and then this operator would be a plain equality comparison. dsanders: I see no harm in allowing two past-the-end iterators from different sequences to be equal but I…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions The same design issue also happens with existing std input iterators like `std::istream_iterator`, so I think it's acceptable even if slightly odd. nlguillemot: The same design issue also happens with existing std input iterators like `std…
				nlguillemotAuthorUnsubmitted Done Reply Inline Actions After messing with it for a while I think it's simplest to keep it this way. Mainly because it simplifies the logic under it, since the member-by-member comparison doesn't have to worry about comparing the members of end iterators, or comparing the members of an end iterator with the members of a non-end iterator. nlguillemot: After messing with it for a while I think it's simplest to keep it this way. Mainly because it…
				return CurrByte == Other.CurrByte && Value == Other.Value &&
				PadTo == Other.PadTo && Count == Other.Count &&
				AllValueBitsEncoded == Other.AllValueBitsEncoded;
				}
				bool operator!=(const LEB128InputIterator &Other) const {
				return !operator==(Other);
				}
				///@}
				};

				// Convenience function for LEB128 input iterator ranges.
				template <bool IsSigned, class ValueT,
				class InputIteratorT = LEB128InputIterator<IsSigned, ValueT>,
				class... ArgTs>
				auto makeLEB128InputRange(ValueT Value, ArgTs &&... Args) {
				return make_range(
				InputIteratorT(std::move(Value), std::forward<ArgTs>(Args)...),
				InputIteratorT());
				}

				// Convenience function for SLEB128 input iterator ranges.
				template <class... ArgTs> auto makeSLEB128InputRange(ArgTs &&... Args) {
				return makeLEB128InputRange</* IsSigned */ true>(
				std::forward<ArgTs>(Args)...);
				}

				// Convenience function for ULEB128 input iterator ranges.
				template <class... ArgTs> auto makeULEB128InputRange(ArgTs &&... Args) {
				return makeLEB128InputRange</* IsSigned */ false>(
				std::forward<ArgTs>(Args)...);
	}			}

				/// Utility function to encode a SLEB128 or ULEB128 value to a buffer. Returns
				/// the length in bytes of the encoded value.
				template <bool IsSigned, class ValueT,
				class InputIteratorT = LEB128InputIterator<IsSigned, ValueT>>
				unsigned encodeLEB128(const ValueT &Value, uint8_t *p, unsigned PadTo = 0) {
				uint8_t *orig_p = p;
				p = std::copy(InputIteratorT(Value, PadTo), InputIteratorT(), p);
	return (unsigned)(p - orig_p);			return (unsigned)(p - orig_p);
	}			}

	/// Utility function to encode a ULEB128 value to an output stream. Returns			/// Utility function to encode a SLEB128 or ULEB128 value to an output stream.
	/// the length in bytes of the encoded value.			/// Returns the length in bytes of the encoded value.
	inline unsigned encodeULEB128(uint64_t Value, raw_ostream &OS,			template <bool IsSigned, class ValueT,
				class InputIteratorT = LEB128InputIterator<IsSigned, ValueT>>
				unsigned encodeLEB128(const ValueT &Value, raw_ostream &OS,
	unsigned PadTo = 0) {			unsigned PadTo = 0) {
	unsigned Count = 0;			uint64_t TellBefore = OS.tell();
	do {			std::copy(InputIteratorT(Value, PadTo), InputIteratorT(),
	uint8_t Byte = Value & 0x7f;			raw_ostream_iterator<uint8_t>(OS));
	Value >>= 7;			return (unsigned)(OS.tell() - TellBefore);
	Count++;			}
	if (Value != 0 \|\| Count < PadTo)
	Byte \|= 0x80; // Mark this byte to show that more bytes will follow.
	OS << char(Byte);
	} while (Value != 0);

	// Pad with 0x80 and emit a null byte at the end.			/// Utility function to encode a SLEB128 value to an output stream. Returns
	if (Count < PadTo) {			/// the length in bytes of the encoded value.
	for (; Count < PadTo - 1; ++Count)			inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS,
	OS << '\x80';			unsigned PadTo = 0) {
	OS << '\x00';			return encodeLEB128</* IsSigned */ true>(Value, OS, PadTo);
	Count++;
	}			}
	return Count;
				/// Utility function to encode a SLEB128 value to a buffer. Returns
				/// the length in bytes of the encoded value.
				inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) {
				return encodeLEB128</* IsSigned */ true>(Value, p, PadTo);
	}			}

	/// Utility function to encode a ULEB128 value to a buffer. Returns			/// Utility function to encode a ULEB128 value to an output stream. Returns
	/// the length in bytes of the encoded value.			/// the length in bytes of the encoded value.
	inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,			inline unsigned encodeULEB128(uint64_t Value, raw_ostream &OS,
	unsigned PadTo = 0) {			unsigned PadTo = 0) {
	uint8_t *orig_p = p;			return encodeLEB128</* IsSigned */ false>(Value, OS, PadTo);
	unsigned Count = 0;
	do {
	uint8_t Byte = Value & 0x7f;
	Value >>= 7;
	Count++;
	if (Value != 0 \|\| Count < PadTo)
	Byte \|= 0x80; // Mark this byte to show that more bytes will follow.
	*p++ = Byte;
	} while (Value != 0);

	// Pad with 0x80 and emit a null byte at the end.
	if (Count < PadTo) {
	for (; Count < PadTo - 1; ++Count)
	*p++ = '\x80';
	*p++ = '\x00';
	}			}

	return (unsigned)(p - orig_p);			/// Utility function to encode a ULEB128 value to a buffer. Returns
				/// the length in bytes of the encoded value.
				inline unsigned encodeULEB128(uint64_t Value, uint8_t *p, unsigned PadTo = 0) {
				return encodeLEB128</* IsSigned */ false>(Value, p, PadTo);
	}			}

	/// Utility function to decode a ULEB128 value.			/// Utility function to decode a ULEB128 value.
	inline uint64_t decodeULEB128(const uint8_t p, unsigned n = nullptr,			inline uint64_t decodeULEB128(const uint8_t p, unsigned n = nullptr,
	const uint8_t *end = nullptr,			const uint8_t *end = nullptr,
	const char **error = nullptr) {			const char **error = nullptr) {
	const uint8_t *orig_p = p;			const uint8_t *orig_p = p;
	uint64_t Value = 0;			uint64_t Value = 0;
	▲ Show 20 Lines • Show All 66 Lines • Show Last 20 Lines

llvm/utils/TableGen/FixedLenDecoderEmitter.cpp

Show First 20 Lines • Show All 683 Lines • ▼ Show 20 Lines	if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) {
FixupList &CurScope = TableInfo.FixupStack.back();		FixupList &CurScope = TableInfo.FixupStack.back();
// Resolve any NumToSkip fixups in the current scope.		// Resolve any NumToSkip fixups in the current scope.
resolveTableFixups(Table, CurScope, Table.size());		resolveTableFixups(Table, CurScope, Table.size());
CurScope.clear();		CurScope.clear();
PrevFilter = 0; // Don't re-process the filter's fallthrough.		PrevFilter = 0; // Don't re-process the filter's fallthrough.
} else {		} else {
Table.push_back(MCD::OPC_FilterValue);		Table.push_back(MCD::OPC_FilterValue);
// Encode and emit the value to filter against.		// Encode and emit the value to filter against.
uint8_t Buffer[16];		copy(makeULEB128InputRange(Filter.first), std::back_inserter(Table));
unsigned Len = encodeULEB128(Filter.first, Buffer);
Table.insert(Table.end(), Buffer, Buffer + Len);
// Reserve space for the NumToSkip entry. We'll backpatch the value		// Reserve space for the NumToSkip entry. We'll backpatch the value
// later.		// later.
PrevFilter = Table.size();		PrevFilter = Table.size();
Table.push_back(0);		Table.push_back(0);
Table.push_back(0);		Table.push_back(0);
Table.push_back(0);		Table.push_back(0);
}		}

▲ Show 20 Lines • Show All 579 Lines • ▼ Show 20 Lines	void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
// than a stream.		// than a stream.
raw_svector_ostream PS(Predicate);		raw_svector_ostream PS(Predicate);
unsigned I = 0;		unsigned I = 0;
emitPredicateMatch(PS, I, Opc);		emitPredicateMatch(PS, I, Opc);

// Figure out the index into the predicate table for the predicate just		// Figure out the index into the predicate table for the predicate just
// computed.		// computed.
unsigned PIdx = getPredicateIndex(TableInfo, PS.str());		unsigned PIdx = getPredicateIndex(TableInfo, PS.str());
SmallString<16> PBytes;
raw_svector_ostream S(PBytes);
encodeULEB128(PIdx, S);

TableInfo.Table.push_back(MCD::OPC_CheckPredicate);		TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
// Predicate index		// Predicate index
for (unsigned i = 0, e = PBytes.size(); i != e; ++i)		copy(makeULEB128InputRange(PIdx), std::back_inserter(TableInfo.Table));
TableInfo.Table.push_back(PBytes[i]);
// Push location for NumToSkip backpatching.		// Push location for NumToSkip backpatching.
TableInfo.FixupStack.back().push_back(TableInfo.Table.size());		TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
}		}

void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,		void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
Show All 35 Lines	void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
bool NeedPositiveMask = PositiveMask.getBoolValue();		bool NeedPositiveMask = PositiveMask.getBoolValue();
bool NeedNegativeMask = NegativeMask.getBoolValue();		bool NeedNegativeMask = NegativeMask.getBoolValue();

if (!NeedPositiveMask && !NeedNegativeMask)		if (!NeedPositiveMask && !NeedNegativeMask)
return;		return;

TableInfo.Table.push_back(MCD::OPC_SoftFail);		TableInfo.Table.push_back(MCD::OPC_SoftFail);

SmallString<16> MaskBytes;
raw_svector_ostream S(MaskBytes);
if (NeedPositiveMask) {		if (NeedPositiveMask) {
encodeULEB128(PositiveMask.getZExtValue(), S);		copy(makeULEB128InputRange(PositiveMask.getZExtValue()),
for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)		std::back_inserter(TableInfo.Table));
TableInfo.Table.push_back(MaskBytes[i]);
} else		} else
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
if (NeedNegativeMask) {		if (NeedNegativeMask) {
MaskBytes.clear();		copy(makeULEB128InputRange(NegativeMask.getZExtValue()),
encodeULEB128(NegativeMask.getZExtValue(), S);		std::back_inserter(TableInfo.Table));
for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
TableInfo.Table.push_back(MaskBytes[i]);
} else		} else
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
}		}

// Emits table entries to decode the singleton.		// Emits table entries to decode the singleton.
void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,		void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
EncodingIDAndOpcode Opc) const {		EncodingIDAndOpcode Opc) const {
std::vector<unsigned> StartBits;		std::vector<unsigned> StartBits;
Show All 11 Lines	void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
emitPredicateTableEntry(TableInfo, Opc.EncodingID);		emitPredicateTableEntry(TableInfo, Opc.EncodingID);

// Check any additional encoding fields needed.		// Check any additional encoding fields needed.
for (unsigned I = Size; I != 0; --I) {		for (unsigned I = Size; I != 0; --I) {
unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1;		unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1;
TableInfo.Table.push_back(MCD::OPC_CheckField);		TableInfo.Table.push_back(MCD::OPC_CheckField);
TableInfo.Table.push_back(StartBits[I-1]);		TableInfo.Table.push_back(StartBits[I-1]);
TableInfo.Table.push_back(NumBits);		TableInfo.Table.push_back(NumBits);
uint8_t Buffer[16], *p;		copy(makeULEB128InputRange(FieldVals[I - 1]),
encodeULEB128(FieldVals[I-1], Buffer);		std::back_inserter(TableInfo.Table));
for (p = Buffer; *p >= 128 ; ++p)
TableInfo.Table.push_back(*p);
TableInfo.Table.push_back(*p);
// Push location for NumToSkip backpatching.		// Push location for NumToSkip backpatching.
TableInfo.FixupStack.back().push_back(TableInfo.Table.size());		TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
// The fixup is always 24-bits, so go ahead and allocate the space		// The fixup is always 24-bits, so go ahead and allocate the space
// in the table so all our relative position calculations work OK even		// in the table so all our relative position calculations work OK even
// before we fully resolve the real value here.		// before we fully resolve the real value here.
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
Show All 13 Lines	void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
// or not. This means OPC_Decode will be the final step in the decoding		// or not. This means OPC_Decode will be the final step in the decoding
// process. If it is not complete, then the Fail return code from the		// process. If it is not complete, then the Fail return code from the
// decoder method indicates that additional processing should be done to see		// decoder method indicates that additional processing should be done to see
// if there is any other instruction that also matches the bitpattern and		// if there is any other instruction that also matches the bitpattern and
// can decode it.		// can decode it.
TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :		TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :
MCD::OPC_TryDecode);		MCD::OPC_TryDecode);
NumEncodingsSupported++;		NumEncodingsSupported++;
uint8_t Buffer[16], *p;		copy(makeULEB128InputRange(Opc.Opcode), std::back_inserter(TableInfo.Table));
encodeULEB128(Opc.Opcode, Buffer);
for (p = Buffer; *p >= 128 ; ++p)
TableInfo.Table.push_back(*p);
TableInfo.Table.push_back(*p);

SmallString<16> Bytes;
raw_svector_ostream S(Bytes);
encodeULEB128(DIdx, S);

// Decoder index		// Decoder index
for (unsigned i = 0, e = Bytes.size(); i != e; ++i)		copy(makeULEB128InputRange(DIdx), std::back_inserter(TableInfo.Table));
TableInfo.Table.push_back(Bytes[i]);

if (!HasCompleteDecoder) {		if (!HasCompleteDecoder) {
// Push location for NumToSkip backpatching.		// Push location for NumToSkip backpatching.
TableInfo.FixupStack.back().push_back(TableInfo.Table.size());		TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
// Allocate the space for the fixup.		// Allocate the space for the fixup.
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
TableInfo.Table.push_back(0);		TableInfo.Table.push_back(0);
▲ Show 20 Lines • Show All 1,106 Lines • Show Last 20 Lines