Index: lib/builtins/adddf3.c
===================================================================
--- lib/builtins/adddf3.c
+++ lib/builtins/adddf3.c
@@ -13,140 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DOUBLE_PRECISION
-#include "fp_lib.h"
+#include "fp_add_impl.inc"
 
 ARM_EABI_FNALIAS(dadd, adddf3)
 
-COMPILER_RT_ABI fp_t
-__adddf3(fp_t a, fp_t b) {
-    
-    rep_t aRep = toRep(a);
-    rep_t bRep = toRep(b);
-    const rep_t aAbs = aRep & absMask;
-    const rep_t bAbs = bRep & absMask;
-    
-    // Detect if a or b is zero, infinity, or NaN.
-    if (aAbs - 1U >= infRep - 1U || bAbs - 1U >= infRep - 1U) {
-        
-        // NaN + anything = qNaN
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
-        // anything + NaN = qNaN
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
-        
-        if (aAbs == infRep) {
-            // +/-infinity + -/+infinity = qNaN
-            if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep);
-            // +/-infinity + anything remaining = +/- infinity
-            else return a;
-        }
-        
-        // anything remaining + +/-infinity = +/-infinity
-        if (bAbs == infRep) return b;
-        
-        // zero + anything = anything
-        if (!aAbs) {
-            // but we need to get the sign right for zero + zero
-            if (!bAbs) return fromRep(toRep(a) & toRep(b));
-            else return b;
-        }
-        
-        // anything + zero = anything
-        if (!bAbs) return a;
-    }
-    
-    // Swap a and b if necessary so that a has the larger absolute value.
-    if (bAbs > aAbs) {
-        const rep_t temp = aRep;
-        aRep = bRep;
-        bRep = temp;
-    }
-    
-    // Extract the exponent and significand from the (possibly swapped) a and b.
-    int aExponent = aRep >> significandBits & maxExponent;
-    int bExponent = bRep >> significandBits & maxExponent;
-    rep_t aSignificand = aRep & significandMask;
-    rep_t bSignificand = bRep & significandMask;
-    
-    // Normalize any denormals, and adjust the exponent accordingly.
-    if (aExponent == 0) aExponent = normalize(&aSignificand);
-    if (bExponent == 0) bExponent = normalize(&bSignificand);
-    
-    // The sign of the result is the sign of the larger operand, a.  If they
-    // have opposite signs, we are performing a subtraction; otherwise addition.
-    const rep_t resultSign = aRep & signBit;
-    const bool subtraction = (aRep ^ bRep) & signBit;
-    
-    // Shift the significands to give us round, guard and sticky, and or in the
-    // implicit significand bit.  (If we fell through from the denormal path it
-    // was already set by normalize( ), but setting it twice won't hurt
-    // anything.)
-    aSignificand = (aSignificand | implicitBit) << 3;
-    bSignificand = (bSignificand | implicitBit) << 3;
-    
-    // Shift the significand of b by the difference in exponents, with a sticky
-    // bottom bit to get rounding correct.
-    const unsigned int align = aExponent - bExponent;
-    if (align) {
-        if (align < typeWidth) {
-            const bool sticky = bSignificand << (typeWidth - align);
-            bSignificand = bSignificand >> align | sticky;
-        } else {
-            bSignificand = 1; // sticky; b is known to be non-zero.
-        }
-    }
-    
-    if (subtraction) {
-        aSignificand -= bSignificand;
-        
-        // If a == -b, return +zero.
-        if (aSignificand == 0) return fromRep(0);
-        
-        // If partial cancellation occured, we need to left-shift the result
-        // and adjust the exponent:
-        if (aSignificand < implicitBit << 3) {
-            const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
-            aSignificand <<= shift;
-            aExponent -= shift;
-        }
-    }
-    
-    else /* addition */ {
-        aSignificand += bSignificand;
-        
-        // If the addition carried up, we need to right-shift the result and
-        // adjust the exponent:
-        if (aSignificand & implicitBit << 4) {
-            const bool sticky = aSignificand & 1;
-            aSignificand = aSignificand >> 1 | sticky;
-            aExponent += 1;
-        }
-    }
-    
-    // If we have overflowed the type, return +/- infinity:
-    if (aExponent >= maxExponent) return fromRep(infRep | resultSign);
-    
-    if (aExponent <= 0) {
-        // Result is denormal before rounding; the exponent is zero and we
-        // need to shift the significand.
-        const int shift = 1 - aExponent;
-        const bool sticky = aSignificand << (typeWidth - shift);
-        aSignificand = aSignificand >> shift | sticky;
-        aExponent = 0;
-    }
-    
-    // Low three bits are round, guard, and sticky.
-    const int roundGuardSticky = aSignificand & 0x7;
-    
-    // Shift the significand into place, and mask off the implicit bit.
-    rep_t result = aSignificand >> 3 & significandMask;
-    
-    // Insert the exponent and sign.
-    result |= (rep_t)aExponent << significandBits;
-    result |= resultSign;
-    
-    // Final rounding.  The result may overflow to infinity, but that is the
-    // correct result in that case.
-    if (roundGuardSticky > 0x4) result++;
-    if (roundGuardSticky == 0x4) result += result & 1;
-    return fromRep(result);
+double __adddf3(double a, double b){
+    return __addXf3__(a, b);
 }
Index: lib/builtins/addsf3.c
===================================================================
--- lib/builtins/addsf3.c
+++ lib/builtins/addsf3.c
@@ -13,139 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define SINGLE_PRECISION
-#include "fp_lib.h"
+#include "fp_add_impl.inc"
 
 ARM_EABI_FNALIAS(fadd, addsf3)
 
-fp_t __addsf3(fp_t a, fp_t b) {
-
-    rep_t aRep = toRep(a);
-    rep_t bRep = toRep(b);
-    const rep_t aAbs = aRep & absMask;
-    const rep_t bAbs = bRep & absMask;
-    
-    // Detect if a or b is zero, infinity, or NaN.
-    if (aAbs - 1U >= infRep - 1U || bAbs - 1U >= infRep - 1U) {
-        
-        // NaN + anything = qNaN
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
-        // anything + NaN = qNaN
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
-        
-        if (aAbs == infRep) {
-            // +/-infinity + -/+infinity = qNaN
-            if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep);
-            // +/-infinity + anything remaining = +/- infinity
-            else return a;
-        }
-        
-        // anything remaining + +/-infinity = +/-infinity
-        if (bAbs == infRep) return b;
-        
-        // zero + anything = anything
-        if (!aAbs) {
-            // but we need to get the sign right for zero + zero
-            if (!bAbs) return fromRep(toRep(a) & toRep(b));
-            else return b;
-        }
-        
-        // anything + zero = anything
-        if (!bAbs) return a;
-    }
-    
-    // Swap a and b if necessary so that a has the larger absolute value.
-    if (bAbs > aAbs) {
-        const rep_t temp = aRep;
-        aRep = bRep;
-        bRep = temp;
-    }
-    
-    // Extract the exponent and significand from the (possibly swapped) a and b.
-    int aExponent = aRep >> significandBits & maxExponent;
-    int bExponent = bRep >> significandBits & maxExponent;
-    rep_t aSignificand = aRep & significandMask;
-    rep_t bSignificand = bRep & significandMask;
-    
-    // Normalize any denormals, and adjust the exponent accordingly.
-    if (aExponent == 0) aExponent = normalize(&aSignificand);
-    if (bExponent == 0) bExponent = normalize(&bSignificand);
-    
-    // The sign of the result is the sign of the larger operand, a.  If they
-    // have opposite signs, we are performing a subtraction; otherwise addition.
-    const rep_t resultSign = aRep & signBit;
-    const bool subtraction = (aRep ^ bRep) & signBit;
-    
-    // Shift the significands to give us round, guard and sticky, and or in the
-    // implicit significand bit.  (If we fell through from the denormal path it
-    // was already set by normalize( ), but setting it twice won't hurt
-    // anything.)
-    aSignificand = (aSignificand | implicitBit) << 3;
-    bSignificand = (bSignificand | implicitBit) << 3;
-    
-    // Shift the significand of b by the difference in exponents, with a sticky
-    // bottom bit to get rounding correct.
-    const unsigned int align = aExponent - bExponent;
-    if (align) {
-        if (align < typeWidth) {
-            const bool sticky = bSignificand << (typeWidth - align);
-            bSignificand = bSignificand >> align | sticky;
-        } else {
-            bSignificand = 1; // sticky; b is known to be non-zero.
-        }
-    }
-    
-    if (subtraction) {
-        aSignificand -= bSignificand;
-        
-        // If a == -b, return +zero.
-        if (aSignificand == 0) return fromRep(0);
-        
-        // If partial cancellation occured, we need to left-shift the result
-        // and adjust the exponent:
-        if (aSignificand < implicitBit << 3) {
-            const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
-            aSignificand <<= shift;
-            aExponent -= shift;
-        }
-    }
-    
-    else /* addition */ {
-        aSignificand += bSignificand;
-        
-        // If the addition carried up, we need to right-shift the result and
-        // adjust the exponent:
-        if (aSignificand & implicitBit << 4) {
-            const bool sticky = aSignificand & 1;
-            aSignificand = aSignificand >> 1 | sticky;
-            aExponent += 1;
-        }
-    }
-    
-    // If we have overflowed the type, return +/- infinity:
-    if (aExponent >= maxExponent) return fromRep(infRep | resultSign);
-    
-    if (aExponent <= 0) {
-        // Result is denormal before rounding; the exponent is zero and we
-        // need to shift the significand.
-        const int shift = 1 - aExponent;
-        const bool sticky = aSignificand << (typeWidth - shift);
-        aSignificand = aSignificand >> shift | sticky;
-        aExponent = 0;
-    }
-    
-    // Low three bits are round, guard, and sticky.
-    const int roundGuardSticky = aSignificand & 0x7;
-    
-    // Shift the significand into place, and mask off the implicit bit.
-    rep_t result = aSignificand >> 3 & significandMask;
-    
-    // Insert the exponent and sign.
-    result |= (rep_t)aExponent << significandBits;
-    result |= resultSign;
-    
-    // Final rounding.  The result may overflow to infinity, but that is the
-    // correct result in that case.
-    if (roundGuardSticky > 0x4) result++;
-    if (roundGuardSticky == 0x4) result += result & 1;
-    return fromRep(result);
+float __addsf3(float a, float b) {
+    return __addXf3__(a, b);
 }
Index: lib/builtins/addtf3.c
===================================================================
--- /dev/null
+++ lib/builtins/addtf3.c
@@ -0,0 +1,25 @@
+//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float addition with the IEEE-754
+// default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+#ifdef CRT_HAS_128BIT
+
+#define QUAD_PRECISION
+#include "fp_add_impl.inc"
+
+long double __addtf3(long double a, long double b){
+    return __addXf3__(a, b);
+}
+
+#endif
Index: lib/builtins/fp_add_impl.inc
===================================================================
--- /dev/null
+++ lib/builtins/fp_add_impl.inc
@@ -0,0 +1,144 @@
+//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float addition with the IEEE-754 default rounding
+// (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+COMPILER_RT_ABI static inline fp_t __addXf3__(fp_t a, fp_t b) {
+    rep_t aRep = toRep(a);
+    rep_t bRep = toRep(b);
+    const rep_t aAbs = aRep & absMask;
+    const rep_t bAbs = bRep & absMask;
+
+    /* Detect if a or b is zero, infinity, or NaN.*/
+    if (aAbs - REP_C(1) >= infRep - REP_C(1) ||
+        bAbs - REP_C(1) >= infRep - REP_C(1)) {
+        /* NaN + anything = qNaN*/
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        /* anything + NaN = qNaN*/
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+        if (aAbs == infRep) {
+            /* +/-infinity + -/+infinity = qNaN*/
+            if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep);
+            /* +/-infinity + anything remaining = +/- infinity*/
+            else return a;
+        }
+
+        /* anything remaining + +/-infinity = +/-infinity*/
+        if (bAbs == infRep) return b;
+
+        /* zero + anything = anything*/
+        if (!aAbs) {
+            /* but we need to get the sign right for zero + zero*/
+            if (!bAbs) return fromRep(toRep(a) & toRep(b));
+            else return b;
+        }
+
+        /* anything + zero = anything*/
+        if (!bAbs) return a;
+    }
+
+    /* Swap a and b if necessary so that a has the larger absolute value.*/
+    if (bAbs > aAbs) {
+        const rep_t temp = aRep;
+        aRep = bRep;
+        bRep = temp;
+    }
+
+    /* Extract the exponent and significand from the (possibly swapped) a and b.*/
+    int aExponent = aRep >> significandBits & maxExponent;
+    int bExponent = bRep >> significandBits & maxExponent;
+    rep_t aSignificand = aRep & significandMask;
+    rep_t bSignificand = bRep & significandMask;
+
+    /* Normalize any denormals, and adjust the exponent accordingly.*/
+    if (aExponent == 0) aExponent = normalize(&aSignificand);
+    if (bExponent == 0) bExponent = normalize(&bSignificand);
+
+    /* The sign of the result is the sign of the larger operand, a.  If they
+       have opposite signs, we are performing a subtraction; otherwise addition.*/
+    const rep_t resultSign = aRep & signBit;
+    const bool subtraction = (aRep ^ bRep) & signBit;
+
+    /* Shift the significands to give us round, guard and sticky, and or in the
+       implicit significand bit.  (If we fell through from the denormal path it
+       was already set by normalize( ), but setting it twice won't hurt
+      anything.) */
+    aSignificand = (aSignificand | implicitBit) << 3;
+    bSignificand = (bSignificand | implicitBit) << 3;
+
+    /* Shift the significand of b by the difference in exponents, with a sticky
+       bottom bit to get rounding correct.*/
+    const unsigned int align = aExponent - bExponent;
+    if (align) {
+        if (align < typeWidth) {
+            const bool sticky = bSignificand << (typeWidth - align);
+            bSignificand = bSignificand >> align | sticky;
+        } else {
+            bSignificand = 1; /* sticky; b is known to be non-zero.*/
+        }
+    }
+    if (subtraction) {
+        aSignificand -= bSignificand;
+        /* If a == -b, return +zero.*/
+        if (aSignificand == 0) return fromRep(0);
+
+        /* If partial cancellation occured, we need to left-shift the result
+           and adjust the exponent:*/
+        if (aSignificand < implicitBit << 3) {
+            const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+            aSignificand <<= shift;
+            aExponent -= shift;
+        }
+    }
+    else /* addition */ {
+        aSignificand += bSignificand;
+
+        /* If the addition carried up, we need to right-shift the result and
+           adjust the exponent:*/
+        if (aSignificand & implicitBit << 4) {
+            const bool sticky = aSignificand & 1;
+            aSignificand = aSignificand >> 1 | sticky;
+            aExponent += 1;
+        }
+    }
+
+    /* If we have overflowed the type, return +/- infinity:*/
+    if (aExponent >= maxExponent) return fromRep(infRep | resultSign);
+
+    if (aExponent <= 0) {
+        /* Result is denormal before rounding; the exponent is zero and we
+           need to shift the significand.*/
+        const int shift = 1 - aExponent;
+        const bool sticky = aSignificand << (typeWidth - shift);
+        aSignificand = aSignificand >> shift | sticky;
+        aExponent = 0;
+    }
+
+    /* Low three bits are round, guard, and sticky.*/
+    const int roundGuardSticky = aSignificand & 0x7;
+
+    /* Shift the significand into place, and mask off the implicit bit.*/
+    rep_t result = aSignificand >> 3 & significandMask;
+
+    /* Insert the exponent and sign.*/
+    result |= (rep_t)aExponent << significandBits;
+    result |= resultSign;
+
+    /* Final rounding.  The result may overflow to infinity, but that is the
+       correct result in that case.*/
+    if (roundGuardSticky > 0x4) result++;
+    if (roundGuardSticky == 0x4) result += result & 1;
+    return fromRep(result);
+}
Index: lib/builtins/subtf3.c
===================================================================
--- /dev/null
+++ lib/builtins/subtf3.c
@@ -0,0 +1,29 @@
+//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float subtraction with the
+// IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+#ifdef CRT_HAS_128BIT
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b);
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t
+__subtf3(fp_t a, fp_t b) {
+    return __addtf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+#endif
Index: test/builtins/Unit/addtf3_test.c
===================================================================
--- /dev/null
+++ test/builtins/Unit/addtf3_test.c
@@ -0,0 +1,116 @@
+//===--------------- addtf3_test.c - Test __addtf3 ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __addtf3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+#if __LP64__
+
+#include <stdlib.h>
+#include <limits.h>
+
+static inline long double fromRep(uint64_t hi, uint64_t lo)
+{
+    __uint128_t x = ((__uint128_t)hi << 64) + lo;
+    const union {long double f; __uint128_t i; } rep = {.i = x};
+    return rep.f;
+}
+
+static inline __uint128_t toRep(long double x)
+{
+    const union {long double f; __uint128_t i;} rep = {.f = x};
+    return rep.i;
+}
+
+// return 0 if equal
+// use two 64-bit integers intead of one 128-bit integer
+// because 128-bit integer constant can't be assigned directly
+static inline int compareResult(long double result,
+                                uint64_t expectedHi,
+                                uint64_t expectedLo)
+{
+    __uint128_t rep = toRep(result);
+    uint64_t hi = rep >> 64;
+    uint64_t lo = rep;
+
+    if (hi == expectedHi && lo == expectedLo){
+        return 0;
+    }
+    // test other posible NaN representation(signal NaN)
+    else if (expectedHi == 0x7fff800000000000UL && expectedLo == 0x0UL){
+        if ((hi & 0x7fff000000000000UL) == 0x7fff000000000000UL &&
+            ((hi & 0xffffffffffffUL) > 0 || lo > 0)){
+            return 0;
+        }
+    }
+    return 1;
+}
+
+// Returns: a + b
+long double __addtf3(long double a, long double b);
+
+int test__addtf3(long double a, long double b,
+                 uint64_t expectedHi, uint64_t expectedLo)
+{
+    long double x = __addtf3(a, b);
+    int ret = compareResult(x, expectedHi, expectedLo);
+
+    if (ret){
+        printf("error in test__addtf3(%.20Lf, %.20Lf) = %.20Lf, "
+               "expected %.20Lf\n", a, b, x, fromRep(expectedHi, expectedLo));
+    }
+
+    return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if __LP64__
+    // qNaN + any = qNaN
+    if (test__addtf3(fromRep(0x7fff800000000000UL, 0x0),
+                     0x1.23456789abcdefp+5L,
+                     0x7fff800000000000UL,
+                     0x0UL))
+        return 1;
+    // NaN + any = NaN
+    if (test__addtf3(fromRep(0x7fff800030000000UL, 0x0),
+                     0x1.23456789abcdefp+5L,
+                     0x7fff800000000000UL,
+                     0x0UL))
+        return 1;
+    // inf + inf = inf
+    if (test__addtf3(fromRep(0x7fff000000000000UL, 0x0),
+                     fromRep(0x7fff000000000000UL, 0x0),
+                     0x7fff000000000000UL, 0x0UL))
+        return 1;
+    // inf + any = inf
+    if (test__addtf3(fromRep(0x7fff000000000000UL, 0x0),
+                     0x1.2335653452436234723489432abcdefp+5L,
+                     0x7fff000000000000UL, 0x0UL))
+        return 1;
+    // any + any
+    if (test__addtf3(0x1.23456734245345543849abcdefp+5L,
+                     0x1.edcba52449872455634654321fp-1L,
+                     0x40042afc95c8b579UL,
+                     0x61e58dd6c51eb77cUL))
+        return 1;
+
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}
Index: test/builtins/Unit/subtf3_test.c
===================================================================
--- /dev/null
+++ test/builtins/Unit/subtf3_test.c
@@ -0,0 +1,110 @@
+//===--------------- subtf3_test.c - Test __subtf3 ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __subtf3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+#if __LP64__
+
+#include <stdlib.h>
+#include <limits.h>
+
+static inline long double fromRep(uint64_t hi, uint64_t lo)
+{
+    __uint128_t x = ((__uint128_t)hi << 64) + lo;
+    const union {long double f; __uint128_t i; } rep = {.i = x};
+    return rep.f;
+}
+
+static inline __uint128_t toRep(long double x)
+{
+    const union {long double f; __uint128_t i;} rep = {.f = x};
+    return rep.i;
+}
+
+// return 0 if equal
+// use two 64-bit integers intead of one 128-bit integer
+// because 128-bit integer constant can't be assigned directly
+static inline int compareResult(long double result,
+                                uint64_t expectedHi,
+                                uint64_t expectedLo)
+{
+    __uint128_t rep = toRep(result);
+    uint64_t hi = rep >> 64;
+    uint64_t lo = rep;
+
+    if (hi == expectedHi && lo == expectedLo){
+        return 0;
+    }
+    // test other posible NaN representation(signal NaN)
+    else if (expectedHi == 0x7fff800000000000UL && expectedLo == 0x0UL){
+        if ((hi & 0x7fff000000000000UL) == 0x7fff000000000000UL &&
+            ((hi & 0xffffffffffff) > 0 || lo > 0)){
+            return 0;
+        }
+    }
+    return 1;
+}
+
+// Returns: a - b
+long double __subtf3(long double a, long double b);
+
+int test__subtf3(long double a, long double b,
+                 uint64_t expectedHi, uint64_t expectedLo)
+{
+    long double x = __subtf3(a, b);
+    int ret = compareResult(x, expectedHi, expectedLo);
+
+    if (ret){
+        printf("error in test__subtf3(%.20Lf, %.20Lf) = %.20Lf, "
+               "expected %.20Lf\n", a, b, x, fromRep(expectedHi, expectedLo));
+    }
+    return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if __LP64__
+    // qNaN - any = qNaN
+    if (test__subtf3(fromRep(0x7fff800000000000UL, 0x0),
+                      0x1.23456789abcdefp+5L,
+                      0x7fff800000000000UL,
+                      0x0UL))
+        return 1;
+    // NaN - any = NaN
+    if (test__subtf3(fromRep(0x7fff800030000000UL, 0x0),
+                      0x1.23456789abcdefp+5L,
+                      0x7fff800000000000UL,
+                      0x0UL))
+        return 1;
+    // inf - any = inf
+    if (test__subtf3(fromRep(0x7fff000000000000UL, 0x0),
+                     0x1.23456789abcdefp+5L,
+                     0x7fff000000000000UL,0x0UL))
+        return 1;
+    // any - any
+    if (test__subtf3(0x1.234567829a3bcdef5678ade36734p+5L,
+                     0x1.ee9d7c52354a6936ab8d7654321fp-1L,
+                     0x40041b8af1915166UL,
+                     0xa44a7bca780a166cUL))
+        return 1;
+
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}