Index: lib/builtins/fp_mul.h
===================================================================
--- /dev/null
+++ lib/builtins/fp_mul.h
@@ -0,0 +1,132 @@
+//===---- lib/fp_mul.h - floating point multiplication ------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float multiplication with the IEEE-754 default
+// rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_MUL_HEADER
+#define FP_MUL_HEADER
+
+#include "fp_lib.h"
+
+#define __MULFP(precision) \
+static inline fp_t __mul##precision##f3__(fp_t a, fp_t b) { \
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; \
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; \
+    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; \
+    \
+    rep_t aSignificand = toRep(a) & significandMask; \
+    rep_t bSignificand = toRep(b) & significandMask; \
+    int scale = 0; \
+    \
+    /* Detect if a or b is zero, denormal, infinity, or NaN.*/ \
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { \
+        \
+        const rep_t aAbs = toRep(a) & absMask; \
+        const rep_t bAbs = toRep(b) & absMask; \
+        \
+        /* NaN * anything = qNaN*/ \
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit); \
+        /* anything * NaN = qNaN*/ \
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit); \
+        \
+        if (aAbs == infRep) { \
+            /* infinity * non-zero = +/- infinity*/ \
+            if (bAbs) return fromRep(aAbs | productSign); \
+            /* infinity * zero = NaN */ \
+            else return fromRep(qnanRep); \
+        } \
+        \
+        if (bAbs == infRep) { \
+            /*? non-zero * infinity = +/- infinity*/ \
+            if (aAbs) return fromRep(bAbs | productSign); \
+            /* zero * infinity = NaN */ \
+            else return fromRep(qnanRep); \
+        } \
+        \
+        /* zero * anything = +/- zero*/ \
+        if (!aAbs) return fromRep(productSign); \
+        /* anything * zero = +/- zero*/ \
+        if (!bAbs) return fromRep(productSign); \
+        \
+        /* one or both of a or b is denormal, the other (if applicable) is a
+           normal number.  Renormalize one or both of a and b, and set scale to
+           include the necessary exponent adjustment. */ \
+        if (aAbs < implicitBit) scale += normalize(&aSignificand); \
+        if (bAbs < implicitBit) scale += normalize(&bSignificand); \
+    } \
+    \
+    /* Or in the implicit significand bit.  (If we fell through from the
+       denormal path it was already set by normalize( ), but setting it twice
+       won't hurt anything.) */ \
+    aSignificand |= implicitBit; \
+    bSignificand |= implicitBit; \
+    \
+    /* Get the significand of a*b.  Before multiplying the significands, shift
+       one of them left to left-align it in the field.  Thus, the product will
+       have (exponentBits + 2) integral digits, all but two of which must be
+       zero.  Normalizing this result is just a conditional left-shift by one
+       and bumping the exponent accordingly.*/ \
+    rep_t productHi, productLo; \
+    wideMultiply(aSignificand, bSignificand << exponentBits, \
+                 &productHi, &productLo); \
+    \
+    int productExponent = aExponent + bExponent - exponentBias + scale; \
+    \
+    /* Normalize the significand, adjust exponent if needed.*/ \
+    if (productHi & implicitBit) productExponent++; \
+    else wideLeftShift(&productHi, &productLo, 1); \
+    \
+    /* If we have overflowed the type, return +/- infinity.*/ \
+    if (productExponent >= maxExponent) return fromRep(infRep | productSign); \
+    \
+    if (productExponent <= 0) { \
+        /* Result is denormal before rounding*/ \
+        \
+        /* If the result is so small that it just underflows to zero, return
+           a zero of the appropriate sign.  Mathematically there is no need to
+           handle this case separately, but we make it a special case to
+           simplify the shift logic. */ \
+        const unsigned int shift = REP_C(1) - (unsigned int)productExponent; \
+        if (shift >= typeWidth) return fromRep(productSign); \
+        \
+        /* Otherwise, shift the significand of the result so that the round
+           bit is the high bit of productLo.*/ \
+        wideRightShiftWithSticky(&productHi, &productLo, shift); \
+    } \
+    else { \
+        /* Result is normal before rounding; insert the exponent.*/ \
+        productHi &= significandMask; \
+        productHi |= (rep_t)productExponent << significandBits; \
+    } \
+    \
+    /* Insert the sign of the result:*/ \
+    productHi |= productSign; \
+    \
+    /* Final rounding.  The final result may overflow to infinity, or underflow
+       to zero, but those are the correct results in those cases.  We use the
+       default IEEE-754 round-to-nearest, ties-to-even rounding mode.*/ \
+    if (productLo > signBit) productHi++; \
+    if (productLo == signBit) productHi += productHi & 1; \
+    return fromRep(productHi); \
+}
+
+#if defined SINGLE_PRECISION
+    COMPILER_RT_ABI __MULFP(s);
+#elif defined DOUBLE_PRECISION
+    COMPILER_RT_ABI __MULFP(d);
+#elif defined QUAD_PRECISION
+    COMPILER_RT_ABI __MULFP(t);
+#else
+#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
+#endif
+
+#endif // FP_MUL_HEADER
Index: lib/builtins/muldf3.c
===================================================================
--- lib/builtins/muldf3.c
+++ lib/builtins/muldf3.c
@@ -13,110 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DOUBLE_PRECISION
-#include "fp_lib.h"
+#include "fp_mul.h"
 
 ARM_EABI_FNALIAS(dmul, muldf3)
 
-COMPILER_RT_ABI fp_t
-__muldf3(fp_t a, fp_t b) {
-    
-    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
-    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
-    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
-    
-    rep_t aSignificand = toRep(a) & significandMask;
-    rep_t bSignificand = toRep(b) & significandMask;
-    int scale = 0;
-    
-    // Detect if a or b is zero, denormal, infinity, or NaN.
-    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
-        
-        const rep_t aAbs = toRep(a) & absMask;
-        const rep_t bAbs = toRep(b) & absMask;
-        
-        // NaN * anything = qNaN
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
-        // anything * NaN = qNaN
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
-        
-        if (aAbs == infRep) {
-            // infinity * non-zero = +/- infinity
-            if (bAbs) return fromRep(aAbs | productSign);
-            // infinity * zero = NaN
-            else return fromRep(qnanRep);
-        }
-        
-        if (bAbs == infRep) {
-            // non-zero * infinity = +/- infinity
-            if (aAbs) return fromRep(bAbs | productSign);
-            // zero * infinity = NaN
-            else return fromRep(qnanRep);
-        }
-        
-        // zero * anything = +/- zero
-        if (!aAbs) return fromRep(productSign);
-        // anything * zero = +/- zero
-        if (!bAbs) return fromRep(productSign);
-        
-        // one or both of a or b is denormal, the other (if applicable) is a
-        // normal number.  Renormalize one or both of a and b, and set scale to
-        // include the necessary exponent adjustment.
-        if (aAbs < implicitBit) scale += normalize(&aSignificand);
-        if (bAbs < implicitBit) scale += normalize(&bSignificand);
-    }
-    
-    // Or in the implicit significand bit.  (If we fell through from the
-    // denormal path it was already set by normalize( ), but setting it twice
-    // won't hurt anything.)
-    aSignificand |= implicitBit;
-    bSignificand |= implicitBit;
-    
-    // Get the significand of a*b.  Before multiplying the significands, shift
-    // one of them left to left-align it in the field.  Thus, the product will
-    // have (exponentBits + 2) integral digits, all but two of which must be
-    // zero.  Normalizing this result is just a conditional left-shift by one
-    // and bumping the exponent accordingly.
-    rep_t productHi, productLo;
-    wideMultiply(aSignificand, bSignificand << exponentBits,
-                 &productHi, &productLo);
-    
-    int productExponent = aExponent + bExponent - exponentBias + scale;
-    
-    // Normalize the significand, adjust exponent if needed.
-    if (productHi & implicitBit) productExponent++;
-    else wideLeftShift(&productHi, &productLo, 1);
-    
-    // If we have overflowed the type, return +/- infinity.
-    if (productExponent >= maxExponent) return fromRep(infRep | productSign);
-    
-    if (productExponent <= 0) {
-        // Result is denormal before rounding
-        //
-        // If the result is so small that it just underflows to zero, return
-        // a zero of the appropriate sign.  Mathematically there is no need to
-        // handle this case separately, but we make it a special case to
-        // simplify the shift logic.
-        const unsigned int shift = 1U - (unsigned int)productExponent;
-        if (shift >= typeWidth) return fromRep(productSign);
-        
-        // Otherwise, shift the significand of the result so that the round
-        // bit is the high bit of productLo.
-        wideRightShiftWithSticky(&productHi, &productLo, shift);
-    }
-    
-    else {
-        // Result is normal before rounding; insert the exponent.
-        productHi &= significandMask;
-        productHi |= (rep_t)productExponent << significandBits;
-    }
-    
-    // Insert the sign of the result:
-    productHi |= productSign;
-    
-    // Final rounding.  The final result may overflow to infinity, or underflow
-    // to zero, but those are the correct results in those cases.  We use the
-    // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
-    if (productLo > signBit) productHi++;
-    if (productLo == signBit) productHi += productHi & 1;
-    return fromRep(productHi);
+fp_t __muldf3(fp_t a, fp_t b) {
+    return __muldf3__(a, b);
 }
Index: lib/builtins/mulsf3.c
===================================================================
--- lib/builtins/mulsf3.c
+++ lib/builtins/mulsf3.c
@@ -13,100 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define SINGLE_PRECISION
-#include "fp_lib.h"
+#include "fp_mul.h"
 
 ARM_EABI_FNALIAS(fmul, mulsf3)
 
-COMPILER_RT_ABI fp_t
-__mulsf3(fp_t a, fp_t b) {
-    
-    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
-    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
-    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
-    
-    rep_t aSignificand = toRep(a) & significandMask;
-    rep_t bSignificand = toRep(b) & significandMask;
-    int scale = 0;
-    
-    // Detect if a or b is zero, denormal, infinity, or NaN.
-    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
-        
-        const rep_t aAbs = toRep(a) & absMask;
-        const rep_t bAbs = toRep(b) & absMask;
-        
-        // NaN * anything = qNaN
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
-        // anything * NaN = qNaN
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
-        
-        if (aAbs == infRep) {
-            // infinity * non-zero = +/- infinity
-            if (bAbs) return fromRep(aAbs | productSign);
-            // infinity * zero = NaN
-            else return fromRep(qnanRep);
-        }
-        
-        if (bAbs == infRep) {
-            // non-zero * infinity = +/- infinity
-            if (aAbs) return fromRep(bAbs | productSign);
-            // zero * infinity = NaN
-            else return fromRep(qnanRep);
-        }
-        
-        // zero * anything = +/- zero
-        if (!aAbs) return fromRep(productSign);
-        // anything * zero = +/- zero
-        if (!bAbs) return fromRep(productSign);
-        
-        // one or both of a or b is denormal, the other (if applicable) is a
-        // normal number.  Renormalize one or both of a and b, and set scale to
-        // include the necessary exponent adjustment.
-        if (aAbs < implicitBit) scale += normalize(&aSignificand);
-        if (bAbs < implicitBit) scale += normalize(&bSignificand);
-    }
-    
-    // Or in the implicit significand bit.  (If we fell through from the
-    // denormal path it was already set by normalize( ), but setting it twice
-    // won't hurt anything.)
-    aSignificand |= implicitBit;
-    bSignificand |= implicitBit;
-    
-    // Get the significand of a*b.  Before multiplying the significands, shift
-    // one of them left to left-align it in the field.  Thus, the product will
-    // have (exponentBits + 2) integral digits, all but two of which must be
-    // zero.  Normalizing this result is just a conditional left-shift by one
-    // and bumping the exponent accordingly.
-    rep_t productHi, productLo;
-    wideMultiply(aSignificand, bSignificand << exponentBits,
-                 &productHi, &productLo);
-    
-    int productExponent = aExponent + bExponent - exponentBias + scale;
-    
-    // Normalize the significand, adjust exponent if needed.
-    if (productHi & implicitBit) productExponent++;
-    else wideLeftShift(&productHi, &productLo, 1);
-    
-    // If we have overflowed the type, return +/- infinity.
-    if (productExponent >= maxExponent) return fromRep(infRep | productSign);
-    
-    if (productExponent <= 0) {
-        // Result is denormal before rounding, the exponent is zero and we
-        // need to shift the significand.
-        wideRightShiftWithSticky(&productHi, &productLo, 1U - (unsigned)productExponent);
-    }
-    
-    else {
-        // Result is normal before rounding; insert the exponent.
-        productHi &= significandMask;
-        productHi |= (rep_t)productExponent << significandBits;
-    }
-    
-    // Insert the sign of the result:
-    productHi |= productSign;
-    
-    // Final rounding.  The final result may overflow to infinity, or underflow
-    // to zero, but those are the correct results in those cases.
-    if (productLo > signBit) productHi++;
-    if (productLo == signBit) productHi += productHi & 1;
-    return fromRep(productHi);
+fp_t __mulsf3(fp_t a, fp_t b) {
+    return __mulsf3__(a, b);
 }
Index: lib/builtins/multf3.c
===================================================================
--- /dev/null
+++ lib/builtins/multf3.c
@@ -0,0 +1,25 @@
+//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+// __uint128_t is undefined in 32-bit machine toolchain
+#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16
+
+#define QUAD_PRECISION
+#include "fp_mul.h"
+
+fp_t __multf3(fp_t a, fp_t b) {
+    return __multf3__(a, b);
+}
+
+#endif
Index: test/builtins/Unit/multf3_test.c
===================================================================
--- /dev/null
+++ test/builtins/Unit/multf3_test.c
@@ -0,0 +1,62 @@
+//===--------------- multf3_test.c - Test __multf3 ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __multf3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+
+// Returns: a * b
+long double __multf3(long double a, long double b);
+
+int test__multf3(long double a, long double b)
+{
+    long double x = __multf3(a, b);
+    long double expected = a * b;
+    if (x != expected)
+    {
+        printf("error in test__multf3(%.20Lf, %.20Lf) = %.20Lf, "
+               "expected %.20Lf\n", a, b, x, expected);
+    }
+    return x != expected;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16
+    // Since NaN can't be compared with NaN, we don't test the
+    // cases that produce NaN result
+
+    // inf * any = inf
+    if (test__multf3(0x1.0p+16384L, 0x1.23456789abcdefp+5L))
+        return 1;
+    // any * any
+    if (test__multf3(0x1.23456789abcdefp+5L, 0x1.edcba987654321fp-1L))
+        return 1;
+    if (test__multf3(0x1.23456789abcdefp-50L, 0x1.edcba987654321fp-9L))
+        return 1;
+    if (test__multf3(0x1.23456789abcdefp+456L, 0x1.edcba987654321fp+600L))
+        return 1;
+    if (test__multf3(0x1.23456789abcdefp-234L, 0x1.edcba987654321fp-455L))
+        return 1;
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}