Index: lib/builtins/divdf3.c =================================================================== --- lib/builtins/divdf3.c +++ lib/builtins/divdf3.c @@ -67,7 +67,7 @@ if (!bAbs) return fromRep(infRep | quotientSign); - // one or both of a or b is denormal, the other (if applicable) is a + // One or both of a or b is denormal. The other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if (aAbs < implicitBit) @@ -76,9 +76,9 @@ scale -= normalize(&bSignificand); } - // Or in the implicit significand bit. (If we fell through from the + // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) + // won't hurt anything. aSignificand |= implicitBit; bSignificand |= implicitBit; int quotientExponent = aExponent - bExponent + scale; @@ -89,14 +89,14 @@ // is accurate to about 3.5 binary digits. const uint32_t q31b = bSignificand >> 21; uint32_t recip32 = UINT32_C(0x7504f333) - q31b; + // 0x7504F333 / 2^32 + 1 = 3/4 + 1/sqrt(2) // Now refine the reciprocal estimate using a Newton-Raphson iteration: // // x1 = x0 * (2 - x0 * b) // // This doubles the number of correct binary digits in the approximation - // with each iteration, so after three iterations, we have about 28 binary - // digits of accuracy. + // with each iteration. uint32_t correction32; correction32 = -((uint64_t)recip32 * q31b >> 32); recip32 = (uint64_t)recip32 * correction32 >> 31; @@ -105,13 +105,12 @@ correction32 = -((uint64_t)recip32 * q31b >> 32); recip32 = (uint64_t)recip32 * correction32 >> 31; - // recip32 might have overflowed to exactly zero in the preceding - // computation if the high word of b is exactly 1.0. This would sabotage - // the full-width final stage of the computation that follows, so we adjust - // recip32 downward by one bit. + // The reciprocal may have overflowed to zero if the upper half of b is + // exactly 1.0. This would sabatoge the full-width final stage of the + // computation that follows, so we adjust the reciprocal down by one bit. recip32--; - // We need to perform one more iteration to get us to 56 binary digits; + // We need to perform one more iteration to get us to 56 binary digits. // The last iteration needs to happen with extra precision. const uint32_t q63blo = bSignificand << 11; uint64_t correction, reciprocal; @@ -120,11 +119,10 @@ uint32_t cLo = correction; reciprocal = (uint64_t)recip32 * cHi + ((uint64_t)recip32 * cLo >> 32); - // We already adjusted the 32-bit estimate, now we need to adjust the final - // 64-bit reciprocal estimate downward to ensure that it is strictly smaller - // than the infinitely precise exact reciprocal. Because the computation - // of the Newton-Raphson step is truncating at every step, this adjustment - // is small; most of the work is already done. + // Adjust the final 64-bit reciprocal estimate downward to ensure that it is + // strictly smaller than the infinitely precise exact reciprocal. Because + // the computation of the Newton-Raphson step is truncating at every step, + // this adjustment is small; most of the work is already done. reciprocal -= 2; // The numerical reciprocal is accurate to within 2^-56, lies in the @@ -134,7 +132,7 @@ // // 1. q < a/b // 2. q is in the interval [0.5, 2.0) - // 3. the error in q is bounded away from 2^-53 (actually, we have a + // 3. The error in q is bounded away from 2^-53 (actually, we have a // couple of bits to spare, but this is all we need). // We need a 64 x 64 multiply high to compute q, which isn't a basic @@ -151,7 +149,7 @@ // // 0 <= r < ulp(q)*b // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we // already have the correct result. The exact halfway case cannot occur. // We also take this time to right shift quotient if it falls in the [1,2) // range and adjust the exponent accordingly. @@ -191,13 +189,13 @@ else { const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit + // Clear the implicit bit. rep_t absResult = quotient & significandMask; - // Insert the exponent + // Insert the exponent. absResult |= (rep_t)writtenExponent << significandBits; - // Round + // Round. absResult += round; - // Insert the sign and return + // Insert the sign and return. const double result = fromRep(absResult | quotientSign); return result; } Index: lib/builtins/divsf3.c =================================================================== --- lib/builtins/divsf3.c +++ lib/builtins/divsf3.c @@ -67,7 +67,7 @@ if (!bAbs) return fromRep(infRep | quotientSign); - // one or both of a or b is denormal, the other (if applicable) is a + // One or both of a or b is denormal. The other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if (aAbs < implicitBit) @@ -76,12 +76,13 @@ scale -= normalize(&bSignificand); } - // Or in the implicit significand bit. (If we fell through from the + // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) + // won't hurt anything. aSignificand |= implicitBit; bSignificand |= implicitBit; int quotientExponent = aExponent - bExponent + scale; + // 0x7504F333 / 2^32 + 1 = 3/4 + 1/sqrt(2) // Align the significand of b as a Q31 fixed-point number in the range // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax @@ -95,8 +96,7 @@ // x1 = x0 * (2 - x0 * b) // // This doubles the number of correct binary digits in the approximation - // with each iteration, so after three iterations, we have about 28 binary - // digits of accuracy. + // with each iteration. uint32_t correction; correction = -((uint64_t)reciprocal * q31b >> 32); reciprocal = (uint64_t)reciprocal * correction >> 31; @@ -105,12 +105,10 @@ correction = -((uint64_t)reciprocal * q31b >> 32); reciprocal = (uint64_t)reciprocal * correction >> 31; - // Exhaustive testing shows that the error in reciprocal after three steps - // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our - // expectations. We bump the reciprocal by a tiny value to force the error - // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to - // be specific). This also causes 1/1 to give a sensible approximation - // instead of zero (due to overflow). + // Adust the final 32-bit reciprocal estimate downward to ensure that it is + // strictly smaller than the infinitely precise exact reciprocal. Because + // the computation of the Newton-Raphson step is truncating at every step, + // this adjustment is small; most of the work is already done. reciprocal -= 2; // The numerical reciprocal is accurate to within 2^-28, lies in the @@ -120,11 +118,11 @@ // // 1. q < a/b // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) - // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes + // 3. The error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes // from the fact that we truncate the product, and the 2^27 term // is the error in the reciprocal of b scaled by the maximum // possible value of a. As a consequence of this error bound, - // either q or nextafter(q) is the correctly rounded + // either q or nextafter(q) is the correctly rounded. rep_t quotient = (uint64_t)reciprocal * (aSignificand << 1) >> 32; // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). @@ -136,7 +134,7 @@ // // 0 <= r < ulp(q)*b // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we // already have the correct result. The exact halfway case cannot occur. // We also take this time to right shift quotient if it falls in the [1,2) // range and adjust the exponent accordingly. @@ -176,13 +174,13 @@ else { const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit + // Clear the implicit bit. rep_t absResult = quotient & significandMask; - // Insert the exponent + // Insert the exponent. absResult |= (rep_t)writtenExponent << significandBits; - // Round + // Round. absResult += round; - // Insert the sign and return + // Insert the sign and return. return fromRep(absResult | quotientSign); } } Index: lib/builtins/divtf3.c =================================================================== --- lib/builtins/divtf3.c +++ lib/builtins/divtf3.c @@ -68,7 +68,7 @@ if (!bAbs) return fromRep(infRep | quotientSign); - // one or both of a or b is denormal, the other (if applicable) is a + // One or both of a or b is denormal. The other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if (aAbs < implicitBit) @@ -77,9 +77,9 @@ scale -= normalize(&bSignificand); } - // Or in the implicit significand bit. (If we fell through from the + // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) + // won't hurt anything. aSignificand |= implicitBit; bSignificand |= implicitBit; int quotientExponent = aExponent - bExponent + scale; @@ -110,10 +110,9 @@ correction64 = -((rep_t)recip64 * q63b >> 64); recip64 = (rep_t)recip64 * correction64 >> 63; - // recip64 might have overflowed to exactly zero in the preceeding - // computation if the high word of b is exactly 1.0. This would sabotage - // the full-width final stage of the computation that follows, so we adjust - // recip64 downward by one bit. + // The reciprocal may have overflowed to zero if the upper half of b is + // exactly 1.0. This would sabatoge the full-width final stage of the + // computation that follows, so we adjust the reciprocal down by one bit. recip64--; // We need to perform one more iteration to get us to 112 binary digits; @@ -137,11 +136,10 @@ reciprocal = r64cH + (r64cL >> 64); - // We already adjusted the 64-bit estimate, now we need to adjust the final - // 128-bit reciprocal estimate downward to ensure that it is strictly smaller - // than the infinitely precise exact reciprocal. Because the computation - // of the Newton-Raphson step is truncating at every step, this adjustment - // is small; most of the work is already done. + // Adjust the final 128-bit reciprocal estimate downward to ensure that it + // is strictly smaller than the infinitely precise exact reciprocal. Because + // the computation of the Newton-Raphson step is truncating at every step, + // this adjustment is small; most of the work is already done. reciprocal -= 2; // The numerical reciprocal is accurate to within 2^-112, lies in the @@ -151,7 +149,7 @@ // // 1. q < a/b // 2. q is in the interval [0.5, 2.0) - // 3. the error in q is bounded away from 2^-113 (actually, we have a + // 3. The error in q is bounded away from 2^-113 (actually, we have a // couple of bits to spare, but this is all we need). // We need a 128 x 128 multiply high to compute q, which isn't a basic @@ -168,7 +166,7 @@ // // 0 <= r < ulp(q)*b // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we // already have the correct result. The exact halfway case cannot occur. // We also take this time to right shift quotient if it falls in the [1,2) // range and adjust the exponent accordingly. @@ -208,13 +206,13 @@ return fromRep(quotientSign); } else { const bool round = (residual << 1) >= bSignificand; - // Clear the implicit bit + // Clear the implicit bit. rep_t absResult = quotient & significandMask; - // Insert the exponent + // Insert the exponent. absResult |= (rep_t)writtenExponent << significandBits; - // Round + // Round. absResult += round; - // Insert the sign and return + // Insert the sign and return. const long double result = fromRep(absResult | quotientSign); return result; } Index: lib/builtins/fp_add_impl.inc =================================================================== --- lib/builtins/fp_add_impl.inc +++ lib/builtins/fp_add_impl.inc @@ -44,7 +44,7 @@ // zero + anything = anything if (!aAbs) { - // but we need to get the sign right for zero + zero + // We need to get the sign right for zero + zero. if (!bAbs) return fromRep(toRep(a) & toRep(b)); else @@ -76,14 +76,15 @@ bExponent = normalize(&bSignificand); // The sign of the result is the sign of the larger operand, a. If they - // have opposite signs, we are performing a subtraction; otherwise addition. + // have opposite signs, we are performing a subtraction. Otherwise, we + // perform addition. const rep_t resultSign = aRep & signBit; const bool subtraction = (aRep ^ bRep) & signBit; - // Shift the significands to give us round, guard and sticky, and or in the - // implicit significand bit. (If we fell through from the denormal path it + // Shift the significands to give us round, guard and sticky, and set the + // implicit significand bit. If we fell through from the denormal path it // was already set by normalize( ), but setting it twice won't hurt - // anything.) + // anything. aSignificand = (aSignificand | implicitBit) << 3; bSignificand = (bSignificand | implicitBit) << 3; @@ -95,7 +96,7 @@ const bool sticky = bSignificand << (typeWidth - align); bSignificand = bSignificand >> align | sticky; } else { - bSignificand = 1; // sticky; b is known to be non-zero. + bSignificand = 1; // Set the sticky bit. b is known to be non-zero. } } if (subtraction) { @@ -105,7 +106,7 @@ return fromRep(0); // If partial cancellation occured, we need to left-shift the result - // and adjust the exponent: + // and adjust the exponent. if (aSignificand < implicitBit << 3) { const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); aSignificand <<= shift; @@ -115,7 +116,7 @@ aSignificand += bSignificand; // If the addition carried up, we need to right-shift the result and - // adjust the exponent: + // adjust the exponent. if (aSignificand & implicitBit << 4) { const bool sticky = aSignificand & 1; aSignificand = aSignificand >> 1 | sticky; @@ -123,12 +124,12 @@ } } - // If we have overflowed the type, return +/- infinity: + // If we have overflowed the type, return +/- infinity. if (aExponent >= maxExponent) return fromRep(infRep | resultSign); if (aExponent <= 0) { - // Result is denormal before rounding; the exponent is zero and we + // The result is denormal before rounding. The exponent is zero and we // need to shift the significand. const int shift = 1 - aExponent; const bool sticky = aSignificand << (typeWidth - shift); @@ -146,8 +147,8 @@ result |= (rep_t)aExponent << significandBits; result |= resultSign; - // Final rounding. The result may overflow to infinity, but that is the - // correct result in that case. + // Perform the final rounding. The result may overflow to infinity, but + // that is the correct result in that case. if (roundGuardSticky > 0x4) result++; if (roundGuardSticky == 0x4) Index: lib/builtins/fp_extend_impl.inc =================================================================== --- lib/builtins/fp_extend_impl.inc +++ lib/builtins/fp_extend_impl.inc @@ -27,11 +27,11 @@ // // Finally, the following assumptions are made: // -// 1. floating-point types and integer types have the same endianness on the -// target platform +// 1. Floating-point types and integer types have the same endianness on the +// target platform. // -// 2. quiet NaNs, if supported, are indicated by the leading bit of the -// significand field being set +// 2. Quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set. // //===----------------------------------------------------------------------===// @@ -59,7 +59,7 @@ const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; - // Break a into a sign and representation of the absolute value + // Break a into a sign and representation of the absolute value. const src_rep_t aRep = srcToRep(a); const src_rep_t aAbs = aRep & srcAbsMask; const src_rep_t sign = aRep & srcSignMask; @@ -101,7 +101,7 @@ absResult = 0; } - // Apply the signbit to (dst_t)abs(a). + // Apply the signbit to the absolute value. const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); return dstFromRep(result); } Index: lib/builtins/fp_fixint_impl.inc =================================================================== --- lib/builtins/fp_fixint_impl.inc +++ lib/builtins/fp_fixint_impl.inc @@ -16,7 +16,7 @@ static __inline fixint_t __fixint(fp_t a) { const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); const fixint_t fixint_min = -fixint_max - 1; - // Break a into sign, exponent, significand + // Break a into sign, exponent, significand parts. const rep_t aRep = toRep(a); const rep_t aAbs = aRep & absMask; const fixint_t sign = aRep & signBit ? -1 : 1; Index: lib/builtins/fp_fixuint_impl.inc =================================================================== --- lib/builtins/fp_fixuint_impl.inc +++ lib/builtins/fp_fixuint_impl.inc @@ -14,7 +14,7 @@ #include "fp_lib.h" static __inline fixuint_t __fixuint(fp_t a) { - // Break a into sign, exponent, significand + // Break a into sign, exponent, significand parts. const rep_t aRep = toRep(a); const rep_t aAbs = aRep & absMask; const int sign = aRep & signBit ? -1 : 1; Index: lib/builtins/fp_mul_impl.inc =================================================================== --- lib/builtins/fp_mul_impl.inc +++ lib/builtins/fp_mul_impl.inc @@ -46,7 +46,7 @@ } if (bAbs == infRep) { - //? non-zero * infinity = +/- infinity + // non-zero * infinity = +/- infinity if (aAbs) return fromRep(bAbs | productSign); // zero * infinity = NaN @@ -61,7 +61,7 @@ if (!bAbs) return fromRep(productSign); - // one or both of a or b is denormal, the other (if applicable) is a + // One or both of a or b is denormal. The other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if (aAbs < implicitBit) @@ -70,24 +70,21 @@ scale += normalize(&bSignificand); } - // Or in the implicit significand bit. (If we fell through from the + // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) + // won't hurt anything. aSignificand |= implicitBit; bSignificand |= implicitBit; - // Get the significand of a*b. Before multiplying the significands, shift - // one of them left to left-align it in the field. Thus, the product will - // have (exponentBits + 2) integral digits, all but two of which must be - // zero. Normalizing this result is just a conditional left-shift by one - // and bumping the exponent accordingly. + // Perform a basic multiplication on the significands. One of them must be + // shifted beforehand to be aligned with the exponent. rep_t productHi, productLo; wideMultiply(aSignificand, bSignificand << exponentBits, &productHi, &productLo); int productExponent = aExponent + bExponent - exponentBias + scale; - // Normalize the significand, adjust exponent if needed. + // Normalize the significand and adjust the exponent if needed. if (productHi & implicitBit) productExponent++; else @@ -98,10 +95,10 @@ return fromRep(infRep | productSign); if (productExponent <= 0) { - // Result is denormal before rounding + // The result is denormal before rounding. // // If the result is so small that it just underflows to zero, return - // a zero of the appropriate sign. Mathematically there is no need to + // zero with the appropriate sign. Mathematically, there is no need to // handle this case separately, but we make it a special case to // simplify the shift logic. const unsigned int shift = REP_C(1) - (unsigned int)productExponent; @@ -112,17 +109,17 @@ // bit is the high bit of productLo. wideRightShiftWithSticky(&productHi, &productLo, shift); } else { - // Result is normal before rounding; insert the exponent. + // The result is normal before rounding. Insert the exponent. productHi &= significandMask; productHi |= (rep_t)productExponent << significandBits; } - // Insert the sign of the result: + // Insert the sign of the result. productHi |= productSign; - // Final rounding. The final result may overflow to infinity, or underflow - // to zero, but those are the correct results in those cases. We use the - // default IEEE-754 round-to-nearest, ties-to-even rounding mode. + // Perform the final rounding. The final result may overflow to infinity, + // or underflow to zero, but those are the correct results in those cases. + // We use the default IEEE-754 round-to-nearest, ties-to-even rounding mode. if (productLo > signBit) productHi++; if (productLo == signBit) Index: lib/builtins/fp_trunc_impl.inc =================================================================== --- lib/builtins/fp_trunc_impl.inc +++ lib/builtins/fp_trunc_impl.inc @@ -28,11 +28,11 @@ // // Finally, the following assumptions are made: // -// 1. floating-point types and integer types have the same endianness on the -// target platform +// 1. Floating-point types and integer types have the same endianness on the +// target platform. // -// 2. quiet NaNs, if supported, are indicated by the leading bit of the -// significand field being set +// 2. Quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set. // //===----------------------------------------------------------------------===// @@ -69,7 +69,7 @@ const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); const dst_rep_t dstNaNCode = dstQNaN - 1; - // Break a into a sign and representation of the absolute value + // Break a into a sign and representation of the absolute value. const src_rep_t aRep = srcToRep(a); const src_rep_t aAbs = aRep & srcAbsMask; const src_rep_t sign = aRep & srcSignMask; @@ -83,10 +83,10 @@ absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; const src_rep_t roundBits = aAbs & roundMask; - // Round to nearest + // Round to nearest. if (roundBits > halfway) absResult++; - // Ties to even + // Tie to even. else if (roundBits == halfway) absResult += absResult & 1; } else if (aAbs > srcInfinity) { @@ -126,7 +126,7 @@ } } - // Apply the signbit to (dst_t)abs(a). + // Apply the signbit to the absolute value. const dst_rep_t result = absResult | sign >> (srcBits - dstBits); return dstFromRep(result); }