Index: lib/builtins/fp_mul_impl.inc
===================================================================
--- /dev/null
+++ lib/builtins/fp_mul_impl.inc
@@ -0,0 +1,116 @@
+//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float multiplication with the IEEE-754 default
+// rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+COMPILER_RT_ABI static inline fp_t __mulXf3__(fp_t a, fp_t b) {
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
+
+    rep_t aSignificand = toRep(a) & significandMask;
+    rep_t bSignificand = toRep(b) & significandMask;
+    int scale = 0;
+
+    /* Detect if a or b is zero, denormal, infinity, or NaN.*/
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+
+        const rep_t aAbs = toRep(a) & absMask;
+        const rep_t bAbs = toRep(b) & absMask;
+
+        /* NaN * anything = qNaN*/
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        /* anything * NaN = qNaN*/
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+        if (aAbs == infRep) {
+            /* infinity * non-zero = +/- infinity*/
+            if (bAbs) return fromRep(aAbs | productSign);
+            /* infinity * zero = NaN */
+            else return fromRep(qnanRep);
+        }
+
+        if (bAbs == infRep) {
+            /*? non-zero * infinity = +/- infinity*/
+            if (aAbs) return fromRep(bAbs | productSign);
+            /* zero * infinity = NaN */
+            else return fromRep(qnanRep);
+        }
+
+        /* zero * anything = +/- zero*/
+        if (!aAbs) return fromRep(productSign);
+        /* anything * zero = +/- zero*/
+        if (!bAbs) return fromRep(productSign);
+
+        /* one or both of a or b is denormal, the other (if applicable) is a
+           normal number.  Renormalize one or both of a and b, and set scale to
+           include the necessary exponent adjustment. */
+        if (aAbs < implicitBit) scale += normalize(&aSignificand);
+        if (bAbs < implicitBit) scale += normalize(&bSignificand);
+    }
+
+    /* Or in the implicit significand bit.  (If we fell through from the
+       denormal path it was already set by normalize( ), but setting it twice
+       won't hurt anything.) */
+    aSignificand |= implicitBit;
+    bSignificand |= implicitBit;
+
+    /* Get the significand of a*b.  Before multiplying the significands, shift
+       one of them left to left-align it in the field.  Thus, the product will
+       have (exponentBits + 2) integral digits, all but two of which must be
+       zero.  Normalizing this result is just a conditional left-shift by one
+       and bumping the exponent accordingly.*/
+    rep_t productHi, productLo;
+    wideMultiply(aSignificand, bSignificand << exponentBits,
+                 &productHi, &productLo);
+
+    int productExponent = aExponent + bExponent - exponentBias + scale;
+
+    /* Normalize the significand, adjust exponent if needed.*/
+    if (productHi & implicitBit) productExponent++;
+    else wideLeftShift(&productHi, &productLo, 1);
+
+    /* If we have overflowed the type, return +/- infinity.*/
+    if (productExponent >= maxExponent) return fromRep(infRep | productSign);
+
+    if (productExponent <= 0) {
+        /* Result is denormal before rounding*/
+
+        /* If the result is so small that it just underflows to zero, return
+           a zero of the appropriate sign.  Mathematically there is no need to
+           handle this case separately, but we make it a special case to
+           simplify the shift logic. */
+        const unsigned int shift = REP_C(1) - (unsigned int)productExponent;
+        if (shift >= typeWidth) return fromRep(productSign);
+
+        /* Otherwise, shift the significand of the result so that the round
+           bit is the high bit of productLo.*/
+        wideRightShiftWithSticky(&productHi, &productLo, shift);
+    }
+    else {
+        /* Result is normal before rounding; insert the exponent.*/
+        productHi &= significandMask;
+        productHi |= (rep_t)productExponent << significandBits;
+    }
+
+    /* Insert the sign of the result:*/
+    productHi |= productSign;
+
+    /* Final rounding.  The final result may overflow to infinity, or underflow
+       to zero, but those are the correct results in those cases.  We use the
+       default IEEE-754 round-to-nearest, ties-to-even rounding mode.*/
+    if (productLo > signBit) productHi++;
+    if (productLo == signBit) productHi += productHi & 1;
+    return fromRep(productHi);
+}
Index: lib/builtins/muldf3.c
===================================================================
--- lib/builtins/muldf3.c
+++ lib/builtins/muldf3.c
@@ -13,110 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DOUBLE_PRECISION
-#include "fp_lib.h"
+#include "fp_mul_impl.inc"
 
 ARM_EABI_FNALIAS(dmul, muldf3)
 
-COMPILER_RT_ABI fp_t
-__muldf3(fp_t a, fp_t b) {
-    
-    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
-    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
-    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
-    
-    rep_t aSignificand = toRep(a) & significandMask;
-    rep_t bSignificand = toRep(b) & significandMask;
-    int scale = 0;
-    
-    // Detect if a or b is zero, denormal, infinity, or NaN.
-    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
-        
-        const rep_t aAbs = toRep(a) & absMask;
-        const rep_t bAbs = toRep(b) & absMask;
-        
-        // NaN * anything = qNaN
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
-        // anything * NaN = qNaN
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
-        
-        if (aAbs == infRep) {
-            // infinity * non-zero = +/- infinity
-            if (bAbs) return fromRep(aAbs | productSign);
-            // infinity * zero = NaN
-            else return fromRep(qnanRep);
-        }
-        
-        if (bAbs == infRep) {
-            // non-zero * infinity = +/- infinity
-            if (aAbs) return fromRep(bAbs | productSign);
-            // zero * infinity = NaN
-            else return fromRep(qnanRep);
-        }
-        
-        // zero * anything = +/- zero
-        if (!aAbs) return fromRep(productSign);
-        // anything * zero = +/- zero
-        if (!bAbs) return fromRep(productSign);
-        
-        // one or both of a or b is denormal, the other (if applicable) is a
-        // normal number.  Renormalize one or both of a and b, and set scale to
-        // include the necessary exponent adjustment.
-        if (aAbs < implicitBit) scale += normalize(&aSignificand);
-        if (bAbs < implicitBit) scale += normalize(&bSignificand);
-    }
-    
-    // Or in the implicit significand bit.  (If we fell through from the
-    // denormal path it was already set by normalize( ), but setting it twice
-    // won't hurt anything.)
-    aSignificand |= implicitBit;
-    bSignificand |= implicitBit;
-    
-    // Get the significand of a*b.  Before multiplying the significands, shift
-    // one of them left to left-align it in the field.  Thus, the product will
-    // have (exponentBits + 2) integral digits, all but two of which must be
-    // zero.  Normalizing this result is just a conditional left-shift by one
-    // and bumping the exponent accordingly.
-    rep_t productHi, productLo;
-    wideMultiply(aSignificand, bSignificand << exponentBits,
-                 &productHi, &productLo);
-    
-    int productExponent = aExponent + bExponent - exponentBias + scale;
-    
-    // Normalize the significand, adjust exponent if needed.
-    if (productHi & implicitBit) productExponent++;
-    else wideLeftShift(&productHi, &productLo, 1);
-    
-    // If we have overflowed the type, return +/- infinity.
-    if (productExponent >= maxExponent) return fromRep(infRep | productSign);
-    
-    if (productExponent <= 0) {
-        // Result is denormal before rounding
-        //
-        // If the result is so small that it just underflows to zero, return
-        // a zero of the appropriate sign.  Mathematically there is no need to
-        // handle this case separately, but we make it a special case to
-        // simplify the shift logic.
-        const unsigned int shift = 1U - (unsigned int)productExponent;
-        if (shift >= typeWidth) return fromRep(productSign);
-        
-        // Otherwise, shift the significand of the result so that the round
-        // bit is the high bit of productLo.
-        wideRightShiftWithSticky(&productHi, &productLo, shift);
-    }
-    
-    else {
-        // Result is normal before rounding; insert the exponent.
-        productHi &= significandMask;
-        productHi |= (rep_t)productExponent << significandBits;
-    }
-    
-    // Insert the sign of the result:
-    productHi |= productSign;
-    
-    // Final rounding.  The final result may overflow to infinity, or underflow
-    // to zero, but those are the correct results in those cases.  We use the
-    // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
-    if (productLo > signBit) productHi++;
-    if (productLo == signBit) productHi += productHi & 1;
-    return fromRep(productHi);
+fp_t __muldf3(fp_t a, fp_t b) {
+    return __mulXf3__(a, b);
 }
Index: lib/builtins/mulsf3.c
===================================================================
--- lib/builtins/mulsf3.c
+++ lib/builtins/mulsf3.c
@@ -13,100 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define SINGLE_PRECISION
-#include "fp_lib.h"
+#include "fp_mul_impl.inc"
 
 ARM_EABI_FNALIAS(fmul, mulsf3)
 
-COMPILER_RT_ABI fp_t
-__mulsf3(fp_t a, fp_t b) {
-    
-    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
-    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
-    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
-    
-    rep_t aSignificand = toRep(a) & significandMask;
-    rep_t bSignificand = toRep(b) & significandMask;
-    int scale = 0;
-    
-    // Detect if a or b is zero, denormal, infinity, or NaN.
-    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
-        
-        const rep_t aAbs = toRep(a) & absMask;
-        const rep_t bAbs = toRep(b) & absMask;
-        
-        // NaN * anything = qNaN
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
-        // anything * NaN = qNaN
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
-        
-        if (aAbs == infRep) {
-            // infinity * non-zero = +/- infinity
-            if (bAbs) return fromRep(aAbs | productSign);
-            // infinity * zero = NaN
-            else return fromRep(qnanRep);
-        }
-        
-        if (bAbs == infRep) {
-            // non-zero * infinity = +/- infinity
-            if (aAbs) return fromRep(bAbs | productSign);
-            // zero * infinity = NaN
-            else return fromRep(qnanRep);
-        }
-        
-        // zero * anything = +/- zero
-        if (!aAbs) return fromRep(productSign);
-        // anything * zero = +/- zero
-        if (!bAbs) return fromRep(productSign);
-        
-        // one or both of a or b is denormal, the other (if applicable) is a
-        // normal number.  Renormalize one or both of a and b, and set scale to
-        // include the necessary exponent adjustment.
-        if (aAbs < implicitBit) scale += normalize(&aSignificand);
-        if (bAbs < implicitBit) scale += normalize(&bSignificand);
-    }
-    
-    // Or in the implicit significand bit.  (If we fell through from the
-    // denormal path it was already set by normalize( ), but setting it twice
-    // won't hurt anything.)
-    aSignificand |= implicitBit;
-    bSignificand |= implicitBit;
-    
-    // Get the significand of a*b.  Before multiplying the significands, shift
-    // one of them left to left-align it in the field.  Thus, the product will
-    // have (exponentBits + 2) integral digits, all but two of which must be
-    // zero.  Normalizing this result is just a conditional left-shift by one
-    // and bumping the exponent accordingly.
-    rep_t productHi, productLo;
-    wideMultiply(aSignificand, bSignificand << exponentBits,
-                 &productHi, &productLo);
-    
-    int productExponent = aExponent + bExponent - exponentBias + scale;
-    
-    // Normalize the significand, adjust exponent if needed.
-    if (productHi & implicitBit) productExponent++;
-    else wideLeftShift(&productHi, &productLo, 1);
-    
-    // If we have overflowed the type, return +/- infinity.
-    if (productExponent >= maxExponent) return fromRep(infRep | productSign);
-    
-    if (productExponent <= 0) {
-        // Result is denormal before rounding, the exponent is zero and we
-        // need to shift the significand.
-        wideRightShiftWithSticky(&productHi, &productLo, 1U - (unsigned)productExponent);
-    }
-    
-    else {
-        // Result is normal before rounding; insert the exponent.
-        productHi &= significandMask;
-        productHi |= (rep_t)productExponent << significandBits;
-    }
-    
-    // Insert the sign of the result:
-    productHi |= productSign;
-    
-    // Final rounding.  The final result may overflow to infinity, or underflow
-    // to zero, but those are the correct results in those cases.
-    if (productLo > signBit) productHi++;
-    if (productLo == signBit) productHi += productHi & 1;
-    return fromRep(productHi);
+fp_t __mulsf3(fp_t a, fp_t b) {
+    return __mulXf3__(a, b);
 }
Index: lib/builtins/multf3.c
===================================================================
--- /dev/null
+++ lib/builtins/multf3.c
@@ -0,0 +1,25 @@
+//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+#ifdef CRT_HAS_128BIT
+
+#define QUAD_PRECISION
+#include "fp_mul_impl.inc"
+
+fp_t __multf3(fp_t a, fp_t b) {
+    return __mulXf3__(a, b);
+}
+
+#endif
Index: test/builtins/Unit/multf3_test.c
===================================================================
--- /dev/null
+++ test/builtins/Unit/multf3_test.c
@@ -0,0 +1,122 @@
+//===--------------- multf3_test.c - Test __multf3 ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __multf3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+#if __LP64__
+
+#include <stdlib.h>
+#include <limits.h>
+
+static inline long double fromRep(uint64_t hi, uint64_t lo)
+{
+    __uint128_t x = ((__uint128_t)hi << 64) + lo;
+    const union {long double f; __uint128_t i; } rep = {.i = x};
+    return rep.f;
+}
+
+static inline __uint128_t toRep(long double x)
+{
+    const union {long double f; __uint128_t i;} rep = {.f = x};
+    return rep.i;
+}
+
+static inline int compareResult(long double result,
+                                uint64_t expectedHi,
+                                uint64_t expectedLo)
+{
+    __uint128_t rep = toRep(result);
+    uint64_t hi = rep >> 64;
+    uint64_t lo = rep;
+
+    if (hi == expectedHi && lo == expectedLo){
+        return 0;
+    }
+    // test other posible NaN representation(signal NaN)
+    else if (expectedHi == 0x7fff800000000000UL && expectedLo == 0x0UL){
+        if ((hi & 0x7fff000000000000UL) == 0x7fff000000000000UL &&
+            ((hi & 0xffffffffffffUL) > 0 || lo > 0)){
+            return 0;
+        }
+    }
+    return 1;
+}
+
+// Returns: a * b
+long double __multf3(long double a, long double b);
+
+int test__multf3(long double a, long double b,
+                 uint64_t expectedHi, uint64_t expectedLo)
+{
+    long double x = __multf3(a, b);
+    int ret = compareResult(x, expectedHi, expectedLo);
+
+    if (ret){
+        printf("error in test__multf3(%.20Lf, %.20Lf) = %.20Lf, "
+               "expected %.20Lf\n", a, b, x, fromRep(expectedHi, expectedLo));
+    }
+    return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if __LP64__
+    // qNaN * any = qNaN
+    if (test__multf3(fromRep(0x7fff800000000000UL, 0x0),
+                     0x1.23456789abcdefp+5L,
+                     0x7fff800000000000UL,
+                     0x0UL))
+        return 1;
+    // NaN * any = NaN
+    if (test__multf3(fromRep(0x7fff800030000000UL, 0x0),
+                     0x1.23456789abcdefp+5L,
+                     0x7fff800000000000UL,
+                     0x0UL))
+        return 1;
+    // inf * any = inf
+    if (test__multf3(fromRep(0x7fff000000000000UL, 0x0),
+                     0x1.23456789abcdefp+5L,
+                     0x7fff000000000000UL, 0x0UL))
+        return 1;
+    // any * any
+    if (test__multf3(0x1.2eab345678439abcdefea56782346p+5L,
+                     0x1.edcb34a235253948765432134674fp-1L,
+                     0x400423e7f9e3c9fcUL,
+                     0xd906c2c2a85777c4UL))
+        return 1;
+    if (test__multf3(0x1.353e45674d89abacc3a2ebf3ff4ffp-50L,
+                     0x1.ed8764648369535adf4be3214567fp-9L,
+                     0x3fc52a163c6223fcUL,
+                     0xc94c4bf0430768b4UL))
+        return 1;
+    if (test__multf3(0x1.234425696abcad34a35eeffefdcbap+456L,
+                     0x451.ed98d76e5d46e5f24323dff21ffp+600L,
+                     0x44293a91de5e0e94UL,
+                     0xe8ed17cc2cdf64acUL))
+        return 1;
+    if (test__multf3(0x1.4356473c82a9fabf2d22ace345defp-234L,
+                     0x1.eda98765476743ab21da23d45678fp-455L,
+                     0x3d4f37c1a3137caeUL,
+                     0xfc6807048bc2836aUL))
+        return 1;
+
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}