Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -110,6 +110,8 @@ ------------------- - Implemented `WG14 N2674 The noreturn attribute `_. +- Implemented `WG14 N2763 Adding a fundamental type for N-bit integers `_. +- Implemented `WG14 N2775 Literal suffixes for bit-precise integers `_. C++ Language Changes in Clang ----------------------------- Index: clang/include/clang/Basic/DiagnosticCommonKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticCommonKinds.td +++ clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -207,6 +207,12 @@ def err_size_t_literal_too_large: Error< "%select{signed |}0'size_t' literal is out of range of possible " "%select{signed |}0'size_t' values">; +def ext_c2x_bitint_suffix : ExtWarn< + "'_BitInt' suffix for literals is a C2x extension">, + InGroup; +def warn_c2x_compat_bitint_suffix : Warning< + "'_BitInt' suffix for literals is incompatible with C standards before C2x">, + InGroup, DefaultIgnore; def err_integer_literal_too_large : Error< "integer literal is too large to be represented in any %select{signed |}0" "integer type">; Index: clang/include/clang/Lex/LiteralSupport.h =================================================================== --- clang/include/clang/Lex/LiteralSupport.h +++ clang/include/clang/Lex/LiteralSupport.h @@ -69,10 +69,11 @@ bool isImaginary : 1; // 1.0i bool isFloat16 : 1; // 1.0f16 bool isFloat128 : 1; // 1.0q - uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. - bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk + bool isBitInt : 1; // 1wb, 1uwb (C2x) + uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. + bool isFixedPointLiteral() const { return (saw_period || saw_exponent) && saw_fixed_point_suffix; @@ -120,6 +121,13 @@ /// calculating the digit sequence of the exponent. bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale); + /// Get the digits that comprise the literal. This excludes any prefix or + /// suffix associated with the literal. + StringRef getLiteralDigits() const { + assert(!hadError && "cannot reliably get the literal digits with an error"); + return StringRef(DigitsBegin, SuffixBegin - DigitsBegin); + } + private: void ParseNumberStartingWithZero(SourceLocation TokLoc); Index: clang/lib/AST/StmtPrinter.cpp =================================================================== --- clang/lib/AST/StmtPrinter.cpp +++ clang/lib/AST/StmtPrinter.cpp @@ -1153,6 +1153,11 @@ bool isSigned = Node->getType()->isSignedIntegerType(); OS << toString(Node->getValue(), 10, isSigned); + if (const auto *BT = Node->getType()->getAs()) { + OS << (isSigned ? "uwb" : "wb"); + return; + } + // Emit suffixes. Integer literals are always a builtin integer type. switch (Node->getType()->castAs()->getKind()) { default: llvm_unreachable("Unexpected type for integer literal!"); Index: clang/lib/Lex/LiteralSupport.cpp =================================================================== --- clang/lib/Lex/LiteralSupport.cpp +++ clang/lib/Lex/LiteralSupport.cpp @@ -711,6 +711,7 @@ isFract = false; isAccum = false; hadError = false; + isBitInt = false; // This routine assumes that the range begin/end matches the regex for integer // and FP constants (specifically, the 'pp-number' regex), and assumes that @@ -895,6 +896,24 @@ if (isImaginary) break; // Cannot be repeated. isImaginary = true; continue; // Success. + case 'w': + case 'W': + if (isFPConstant) + break; // Invalid for floats. + if (HasSize) + break; // Invalid if we already have a size for the literal. + + // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We + // explicitly do not support the suffix in C++ as an extension because a + // library-based UDL that resolves to a library type may be more + // appropriate there. + if (!LangOpts.CPlusPlus && (s[0] == 'w' && s[1] == 'b') || + (s[0] == 'W' && s[1] == 'B')) { + isBitInt = true; + HasSize = true; + ++s; // Skip both characters (2nd char skipped on continue). + continue; // Success. + } } // If we reached here, there was an error or a ud-suffix. break; @@ -916,6 +935,7 @@ isFloat16 = false; isHalf = false; isImaginary = false; + isBitInt = false; MicrosoftInteger = 0; saw_fixed_point_suffix = false; isFract = false; @@ -1145,8 +1165,14 @@ // floating point constant, the radix will change to 10. Octal floating // point constants are not permitted (only decimal and hexadecimal). radix = 8; - DigitsBegin = s; + const char *PossibleNewDigitStart = s; s = SkipOctalDigits(s); + // When the value is 0 followed by a suffix (like 0wb), we want to leave 0 + // as the start of the digits. So if skipping octal digits does not skip + // anything, we leave the digit start where it was. + if (s != PossibleNewDigitStart) + DigitsBegin = PossibleNewDigitStart; + if (s == ThisTokEnd) return; // Done, simple octal number like 01234 Index: clang/lib/Lex/PPExpressions.cpp =================================================================== --- clang/lib/Lex/PPExpressions.cpp +++ clang/lib/Lex/PPExpressions.cpp @@ -331,6 +331,14 @@ : diag::ext_cxx2b_size_t_suffix : diag::err_cxx2b_size_t_suffix); + // 'wb/uwb' literals are a C2x feature. We explicitly do not support the + // suffix in C++ as an extension because a library-based UDL that resolves + // to a library type may be more appropriate there. + if (Literal.isBitInt) + PP.Diag(PeekTok, PP.getLangOpts().C2x + ? diag::warn_c2x_compat_bitint_suffix + : diag::ext_c2x_bitint_suffix); + // Parse the integer literal into Result. if (Literal.GetIntegerValue(Result.Val)) { // Overflow parsing integer literal. Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -3918,9 +3918,27 @@ : diag::ext_cxx2b_size_t_suffix : diag::err_cxx2b_size_t_suffix); - // Get the value in the widest-possible width. - unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth(); - llvm::APInt ResultVal(MaxWidth, 0); + // 'wb/uwb' literals are a C2x feature. We support _BitInt as a type in C++, + // but we do not currently support the suffix in C++ mode because it's not + // entirely clear whether WG21 will prefer this suffix to return a library + // type such as std::bit_int instead of returning a _BitInt. + if (Literal.isBitInt && !getLangOpts().CPlusPlus) + PP.Diag(Tok.getLocation(), getLangOpts().C2x + ? diag::warn_c2x_compat_bitint_suffix + : diag::ext_c2x_bitint_suffix); + + // Get the value in the widest-possible width. What is "widest" depends on + // whether the literal is a bit-precise integer or not. For a bit-precise + // integer type, try to scan the source to determine how many bits are + // needed to represent the value. This may seem a bit expensive, but trying + // to get the integer value from an overly-wide APInt is *extremely* + // expensive, so the naive approach of assuming + // llvm::IntegerType::MAX_INT_BITS is a big performance hit. + unsigned BitsNeeded = + Literal.isBitInt ? llvm::APInt::getSufficientBitsNeeded( + Literal.getLiteralDigits(), Literal.getRadix()) + : Context.getTargetInfo().getIntMaxTWidth(); + llvm::APInt ResultVal(BitsNeeded, 0); if (Literal.GetIntegerValue(ResultVal)) { // If this value didn't fit into uintmax_t, error and force to ull. @@ -3952,6 +3970,32 @@ } } + // Bit-precise integer literals are automagically-sized based on the + // width required by the literal. + if (Literal.isBitInt) { + // The signed version has one more bit for the sign value. There are no + // zero-width bit-precise integers, even if the literal value is 0. + Width = Literal.isUnsigned ? std::max(ResultVal.getActiveBits(), 1u) + : std::max(ResultVal.getMinSignedBits(), 2u); + + // Diagnose if the width of the constant is larger than BITINT_MAXWIDTH, + // and reset the type to the largest supported width. + unsigned int MaxBitIntWidth = + Context.getTargetInfo().getMaxBitIntWidth(); + if (Width > MaxBitIntWidth) { + Diag(Tok.getLocation(), diag::err_integer_literal_too_large) + << Literal.isUnsigned; + Width = MaxBitIntWidth; + } + + // Reset the result value to the smaller APInt and select the correct + // type to be used. Note, we zext even for signed values because the + // literal itself is always an unsigned value (a preceeding - is a + // unary operator, not part of the literal). + ResultVal = ResultVal.zextOrTrunc(Width); + Ty = Context.getBitIntType(Literal.isUnsigned, Width); + } + // Check C++2b size_t literals. if (Literal.isSizeT) { assert(!Literal.MicrosoftInteger && Index: clang/test/AST/bitint-suffix.c =================================================================== --- /dev/null +++ clang/test/AST/bitint-suffix.c @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -std=c2x -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s + +// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void (void)' +void func(void) { + // Ensure that we calculate the correct type from the literal suffix. + + // Note: 0wb should create an _BitInt(2) because a signed bit-precise + // integer requires one bit for the sign and one bit for the value, + // at a minimum. + // CHECK: TypedefDecl 0x{{[^ ]*}} col:27 zero_wb 'typeof (0wb)':'_BitInt(2)' + typedef __typeof__(0wb) zero_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:28 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)' + typedef __typeof__(-0wb) neg_zero_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:27 one_wb 'typeof (1wb)':'_BitInt(2)' + typedef __typeof__(1wb) one_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:28 neg_one_wb 'typeof (-1wb)':'_BitInt(2)' + typedef __typeof__(-1wb) neg_one_wb; + + // CHECK: TypedefDecl 0x{{[^ ]*}} col:28 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)' + typedef __typeof__(0uwb) zero_uwb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:29 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)' + typedef __typeof__(-0uwb) neg_zero_uwb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:28 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)' + typedef __typeof__(1uwb) one_uwb; + + // Try a value that is too large to fit in [u]intmax_t. + + // CHECK: TypedefDecl 0x{{[^ ]*}} col:47 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)' + typedef __typeof__(18446744073709551616uwb) huge_uwb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:46 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)' + typedef __typeof__(18446744073709551616wb) huge_wb; +} + +// Test the examples from the paper. +// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 from_paper 'void (void)' +void from_paper(void) { + // CHECK: TypedefDecl 0x{{[^ ]*}} col:28 neg_three_wb 'typeof (-3wb)':'_BitInt(3)' + typedef __typeof__(-3wb) neg_three_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:30 neg_three_hex_wb 'typeof (-3wb)':'_BitInt(3)' + typedef __typeof__(-0x3wb) neg_three_hex_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:27 three_wb 'typeof (3wb)':'_BitInt(3)' + typedef __typeof__(3wb) three_wb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:28 three_uwb 'typeof (3uwb)':'unsigned _BitInt(2)' + typedef __typeof__(3uwb) three_uwb; + // CHECK: TypedefDecl 0x{{[^ ]*}} col:29 neg_three_uwb 'typeof (-3uwb)':'unsigned _BitInt(2)' + typedef __typeof__(-3uwb) neg_three_uwb; +} + Index: clang/test/Lexer/bitint-constants-compat.c =================================================================== --- /dev/null +++ clang/test/Lexer/bitint-constants-compat.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s +// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s +// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s + +#if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \ + compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \ + cpp-error {{invalid suffix 'uwb' on integer constant}} +#endif + +void func(void) { + 18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \ + compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \ + cpp-error {{invalid suffix 'wb' on integer constant}} +} Index: clang/test/Lexer/bitint-constants.c =================================================================== --- /dev/null +++ clang/test/Lexer/bitint-constants.c @@ -0,0 +1,144 @@ +// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify -Wno-unused %s + +// Test that the preprocessor behavior makes sense. +#if 1wb != 1 +#error "wb suffix must be recognized by preprocessor" +#endif +#if 1uwb != 1 +#error "uwb suffix must be recognized by preprocessor" +#endif +#if !(-1wb < 0) +#error "wb suffix must be interpreted as signed" +#endif +#if !(-1uwb > 0) +#error "uwb suffix must be interpreted as unsigned" +#endif + +#if 18446744073709551615uwb != 18446744073709551615ULL +#error "expected the max value for uintmax_t to compare equal" +#endif + +// Test that the preprocessor gives appropriate diagnostics when the +// literal value is larger than what can be stored in a [u]intmax_t. +#if 18446744073709551616wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}} +#error "never expected to get here due to error" +#endif +#if 18446744073709551616uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}} +#error "never expected to get here due to error" +#endif + +// Despite using a bit-precise integer, this is expected to overflow +// because all preprocessor arithmetic is done in [u]intmax_t, so this +// should result in the value 0. +#if 18446744073709551615uwb + 1 != 0ULL +#error "expected modulo arithmetic with uintmax_t width" +#endif + +// Because this bit-precise integer is signed, it will also overflow, +// but Clang handles that by converting to uintmax_t instead of +// intmax_t. +#if 18446744073709551615wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}} +#error "expected modulo arithmetic with uintmax_t width" +#endif + +// Test that just because the preprocessor can't figure out the bit +// width doesn't mean we can't form the constant, it just means we +// can't use the value in a preprocessor conditional. +unsigned _BitInt(65) Val = 18446744073709551616uwb; + +void ValidSuffix(void) { + // Decimal literals. + 1wb; + 1WB; + -1wb; + _Static_assert((int)1wb == 1, "not 1?"); + _Static_assert((int)-1wb == -1, "not -1?"); + + 1uwb; + 1uWB; + 1Uwb; + 1UWB; + _Static_assert((unsigned int)1uwb == 1u, "not 1?"); + + 1'2wb; + 1'2uwb; + _Static_assert((int)1'2wb == 12, "not 12?"); + _Static_assert((unsigned int)1'2uwb == 12u, "not 12?"); + + // Hexadecimal literals. + 0x1wb; + 0x1uwb; + 0x0'1'2'3wb; + 0xA'B'c'duwb; + _Static_assert((int)0x0'1'2'3wb == 0x0123, "not 0x0123"); + _Static_assert((unsigned int)0xA'B'c'duwb == 0xABCDu, "not 0xABCD"); + + // Binary literals. + 0b1wb; + 0b1uwb; + 0b1'0'1'0'0'1wb; + 0b0'1'0'1'1'0uwb; + _Static_assert((int)0b1wb == 1, "not 1?"); + _Static_assert((unsigned int)0b1uwb == 1u, "not 1?"); + + // Octal literals. + 01wb; + 01uwb; + 0'6'0wb; + 0'0'1uwb; + 0wbu; + 0WBu; + 0wbU; + 0WBU; + 0wb; + _Static_assert((int)0wb == 0, "not 0?"); + _Static_assert((unsigned int)0wbu == 0u, "not 0?"); + + // Imaginary or Complex. These are allowed because _Complex can work with any + // integer type, and that includes _BitInt. + 1iwb; + 1wbj; +} + +void InvalidSuffix(void) { + // Can't mix the case of wb or WB, and can't rearrange the letters. + 0wB; // expected-error {{invalid suffix 'wB' on integer constant}} + 0Wb; // expected-error {{invalid suffix 'Wb' on integer constant}} + 0bw; // expected-error {{invalid digit 'b' in octal constant}} + 0BW; // expected-error {{invalid digit 'B' in octal constant}} + + // Trailing digit separators should still diagnose. + 1'2'wb; // expected-error {{digit separator cannot appear at end of digit sequence}} + 1'2'uwb; // expected-error {{digit separator cannot appear at end of digit sequence}} + + // Long. + 1lwb; // expected-error {{invalid suffix}} + 1wbl; // expected-error {{invalid suffix}} + 1luwb; // expected-error {{invalid suffix}} + 1ulwb; // expected-error {{invalid suffix}} + + // Long long. + 1llwb; // expected-error {{invalid suffix}} + 1uwbll; // expected-error {{invalid suffix}} + + // Floating point. + 0.1wb; // expected-error {{invalid suffix}} + 0.1fwb; // expected-error {{invalid suffix}} + + // Repetitive suffix. + 1wbwb; // expected-error {{invalid suffix}} + 1uwbuwb; // expected-error {{invalid suffix}} + 1wbuwb; // expected-error {{invalid suffix}} + 1uwbwb; // expected-error {{invalid suffix}} +} + +void ValidSuffixInvalidValue(void) { + // This is a valid suffix, but the value is larger than one that fits within + // the width of BITINT_MAXWIDTH. When this value changes in the future, the + // test cases should pick a new value that can't be represented by a _BitInt, + // but also add a test case that a 129-bit literal still behaves as-expected. + _Static_assert(__BITINT_MAXWIDTH__ <= 128, + "Need to pick a bigger constant for the test case below."); + 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1wb; // expected-error {{integer literal is too large to be represented in any signed integer type}} + 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1uwb; // expected-error {{integer literal is too large to be represented in any integer type}} +} Index: llvm/include/llvm/ADT/APInt.h =================================================================== --- llvm/include/llvm/ADT/APInt.h +++ llvm/include/llvm/ADT/APInt.h @@ -1506,6 +1506,11 @@ /// equivalent of the string given by \p str. static unsigned getBitsNeeded(StringRef str, uint8_t radix); + /// Get the bits that are sufficient to represent the string value. This may + /// over estimate the amount of bits required, but it does not require + /// parsing the value in the string. + static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix); + /// The APInt version of the countLeadingZeros functions in /// MathExtras.h. /// Index: llvm/lib/Support/APInt.cpp =================================================================== --- llvm/lib/Support/APInt.cpp +++ llvm/lib/Support/APInt.cpp @@ -502,12 +502,51 @@ return retBits; } +unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) { + assert(!Str.empty() && "Invalid string length"); + size_t StrLen = Str.size(); + + // Each computation below needs to know if it's negative. + unsigned IsNegative = false; + if (Str[0] == '-' || Str[0] == '+') { + IsNegative = Str[0] == '-'; + StrLen--; + assert(StrLen && "String is only a sign, needs a value."); + } + + // For radixes of power-of-two values, the bits required is accurately and + // easily computed. + if (Radix == 2) + return StrLen + IsNegative; + if (Radix == 8) + return StrLen * 3 + IsNegative; + if (Radix == 16) + return StrLen * 4 + IsNegative; + + // Compute a sufficient number of bits that is always large enough but might + // be too large. This avoids the assertion in the constructor. This + // calculation doesn't work appropriately for the numbers 0-9, so just use 4 + // bits in that case. + if (Radix == 10) + return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative; + + assert(Radix == 36); + return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative; +} + unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { - assert(!str.empty() && "Invalid string length"); - assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || - radix == 36) && - "Radix should be 2, 8, 10, 16, or 36!"); + // Compute a sufficient number of bits that is always large enough but might + // be too large. + unsigned sufficient = getSufficientBitsNeeded(str, radix); + + // For bases 2, 8, and 16, the sufficient number of bits is exact and we can + // return the value directly. For bases 10 and 36, we need to do extra work. + if (radix == 2 || radix == 8 || radix == 16) + return sufficient; + // This is grossly inefficient but accurate. We could probably do something + // with a computation of roughly slen*64/20 and then adjust by the value of + // the first few digits. But, I'm not sure how accurate that could be. size_t slen = str.size(); // Each computation below needs to know if it's negative. @@ -519,28 +558,6 @@ assert(slen && "String is only a sign, needs a value."); } - // For radixes of power-of-two values, the bits required is accurately and - // easily computed - if (radix == 2) - return slen + isNegative; - if (radix == 8) - return slen * 3 + isNegative; - if (radix == 16) - return slen * 4 + isNegative; - - // FIXME: base 36 - - // This is grossly inefficient but accurate. We could probably do something - // with a computation of roughly slen*64/20 and then adjust by the value of - // the first few digits. But, I'm not sure how accurate that could be. - - // Compute a sufficient number of bits that is always large enough but might - // be too large. This avoids the assertion in the constructor. This - // calculation doesn't work appropriately for the numbers 0-9, so just use 4 - // bits in that case. - unsigned sufficient - = radix == 10? (slen == 1 ? 4 : slen * 64/18) - : (slen == 1 ? 7 : slen * 16/3); // Convert to the actual binary value. APInt tmp(sufficient, StringRef(p, slen), radix);