Index: src/cxa_exception.cpp
===================================================================
--- src/cxa_exception.cpp
+++ src/cxa_exception.cpp
@@ -15,8 +15,10 @@
 #include "cxxabi.h"
 
 #include <exception>        // for std::terminate
+#include <cstddef>          // for std::max_align_t
 #include <cstdlib>          // for malloc, free
 #include <cstring>          // for memset
+#include <cassert>          // for fallback_malloc.ipp
 #if !LIBCXXABI_HAS_NO_THREADS
 #  include <pthread.h>      // for fallback_malloc.ipp's mutexes
 #endif
@@ -106,6 +108,14 @@
 
 #include "fallback_malloc.ipp"
 
+static_assert(alignof(FallbackMaxAlignType) == alignof(__cxa_exception),
+    "The alignment of FallbackMaxAlignType should match the alignment of "
+    "__cxa_exception");
+
+static_assert(alignof(FallbackMaxAlignType) >= alignof(std::max_align_t),
+    "FallbackMaxAlignType must have an alignment requirement greater or equal "
+    "to the alignment of std::max_align_t");
+
 //  Allocate some memory from _somewhere_
 static void *do_malloc(size_t size) {
     void *ptr = std::malloc(size);
Index: src/fallback_malloc.ipp
===================================================================
--- src/fallback_malloc.ipp
+++ src/fallback_malloc.ipp
@@ -11,7 +11,18 @@
 //  
 //===----------------------------------------------------------------------===//
 
-#include "config.h"
+// fallback_malloc.ipp cannot include any headers but it requires certain headers
+// already be included. These checks help to ensure the proper headers have
+// already been included.
+#ifndef LIBCXXABI_CONFIG_H
+#error config.h needs to be included before this file!
+#endif
+#ifndef assert
+#error <cassert> needs to be included before this file!
+#endif
+#ifndef _ALIGNAS_TYPE
+#error The required macro '_ALIGNAS_TYPE' is not defined.
+#endif
 
 //  A small, simple heap manager based (loosely) on 
 //  the startup heap manager from FreeBSD, optimized for space.
@@ -47,41 +58,77 @@
 #if !LIBCXXABI_HAS_NO_THREADS
     pthread_mutex_t *mtx_;
 #endif
-    };
+};
 
-        
-#define HEAP_SIZE   512
-char heap [ HEAP_SIZE ];
 
 typedef unsigned short heap_offset;
 typedef unsigned short heap_size;
 
+// On both 64 and 32 bit targets heap_node should have the following properties
+// Size: 4
+// Alignment: 2
 struct heap_node {
     heap_offset next_node;  // offset into heap
     heap_size   len;        // size in units of "sizeof(heap_node)"
 };
 
-static const heap_node *list_end = (heap_node *) ( &heap [ HEAP_SIZE ] );   // one past the end of the heap
+
+// All pointers returned by fallback_malloc must be at least aligned
+// as RequiredAligned. Note that RequiredAlignment can be greater than
+// alignof(std::max_align_t) on 64 bit systems compiling 32 bit code.
+struct FallbackMaxAlignType {} __attribute__((aligned));
+const size_t RequiredAlignment = alignof(FallbackMaxAlignType);
+
+static_assert(alignof(FallbackMaxAlignType) % sizeof(heap_node) == 0,
+    "The required alignment must be evenly divisible by the sizeof(heap_node)");
+
+// The number of heap_node's that can fit in a chunk of memory with the size
+// of the RequiredAlignment. On 64 bit targets NodesPerAlignment should be 4.
+const size_t NodesPerAlignment = alignof(FallbackMaxAlignType) / sizeof(heap_node);
+
+char heap _ALIGNAS_TYPE(heap_node) [512];
+const size_t HeapSize = sizeof(heap) / sizeof(heap_node);
+
+static const heap_node *list_end = ((heap_node *)heap) + HeapSize; // one past the end of the heap
 static heap_node *freelist = NULL;
 
-heap_node *node_from_offset ( const heap_offset offset )
-    { return (heap_node *) ( heap + ( offset * sizeof (heap_node))); }
+heap_node *node_from_offset ( const heap_offset offset ) {
+  return (heap_node *) ( heap + ( offset * sizeof (heap_node)));
+}
+
+heap_offset offset_from_node ( const heap_node *ptr ) {
+  size_t ByteOffset = static_cast<size_t>(reinterpret_cast<const char*>(ptr) - heap);
+  return static_cast<heap_offset>(ByteOffset / sizeof(heap_node));
+}
+
+bool is_fallback_ptr(void *ptr) {
+  return ptr >= heap && ptr < (heap + HeapSize);
+}
+
+// Return a pointer to the first address, 'A', in `heap` that can actually be
+// used to represent a heap_node. 'A' must be aligned so that
+// 'A + sizeof(heap_node) % RequiredAlignment == 0'. On 64 bit systems this
+// address should be 12 bytes after the first 16 byte boundary.
+heap_node* getFirstAlignedNodeInHeap() {
+  heap_node* node = (heap_node*) heap;
+  const size_t alignNBytesAfterBoundary = RequiredAlignment - sizeof(heap_node);
+  size_t boundaryOffset = reinterpret_cast<size_t>(node) % RequiredAlignment;
+  size_t requiredOffset = alignNBytesAfterBoundary - boundaryOffset;
+  size_t NElemOffset = requiredOffset / sizeof(heap_node);
+  return node + NElemOffset;
+}
 
-heap_offset offset_from_node ( const heap_node *ptr )
-    { return static_cast<heap_offset>(static_cast<size_t>(reinterpret_cast<const char *>(ptr) - heap)  / sizeof (heap_node)); }
- 
 void init_heap () {
-    freelist = (heap_node *) heap;
+    freelist = getFirstAlignedNodeInHeap();
     freelist->next_node = offset_from_node ( list_end );
-    freelist->len = HEAP_SIZE / sizeof (heap_node);
-    }
+    freelist->len = static_cast<heap_size>(list_end - freelist);
+}
     
 //  How big a chunk we allocate
 size_t alloc_size (size_t len)
     { return (len + sizeof(heap_node) - 1) / sizeof(heap_node) + 1; }
 
-bool is_fallback_ptr ( void *ptr )
-    { return ptr >= heap && ptr < ( heap + HEAP_SIZE ); }
+
 
 void *fallback_malloc(size_t len) {
     heap_node *p, *prev;
@@ -91,27 +138,48 @@
     if ( NULL == freelist )
         init_heap ();
 
-//  Walk the free list, looking for a "big enough" chunk
-    for (p = freelist, prev = 0; 
-            p && p != list_end;     prev = p, p = node_from_offset ( p->next_node)) {
+    //  Walk the free list, looking for a "big enough" chunk
+    for (p = freelist, prev = 0; p && p != list_end;
+         prev = p, p = node_from_offset ( p->next_node)) {
 
-        if (p->len > nelems) {  //  chunk is larger, shorten, and return the tail
-            heap_node *q;
+        // Check the invariant that all heap_nodes pointers 'p' are aligned
+        // so that 'p + 1' has an alignment of at least RequiredAlignment
+        assert(reinterpret_cast<size_t>(p + 1) % RequiredAlignment == 0);
 
-            p->len = static_cast<heap_size>(p->len - nelems);
+        // Calculate the number of extra padding elements needed in order
+        // to split 'p' and create a properly aligned heap_node from the tail
+        // of 'p'. We calculate aligned_nelems such that 'p->len - aligned_nelems'
+        // will be a multiple of NodesPerAlignment.
+        size_t aligned_nelems = nelems;
+        if (p->len > nelems) {
+          heap_size remaining_len = static_cast<heap_size>(p->len - nelems);
+          aligned_nelems += remaining_len % NodesPerAlignment;
+        }
+
+        // chunk is larger and we can create a properly aligned
+        // heap_node from the tail. In this case we shorten 'p' and
+        // and return the tail.
+        if (p->len > aligned_nelems) {
+            heap_node *q;
+            p->len = static_cast<heap_size>(p->len - aligned_nelems);
             q = p + p->len;
             q->next_node = 0;
-            q->len = static_cast<heap_size>(nelems);
-            return (void *) (q + 1);
+            q->len = static_cast<heap_size>(aligned_nelems);
+            void* ptr = q + 1;
+            assert(reinterpret_cast<size_t>(ptr) % RequiredAlignment == 0);
+            return ptr;
         }
-        
-        if (p->len == nelems) { // exact size match
+        // The chunk is the exact size or the chunk is larger but not large
+        // enough to split due to alignment constraints.
+        else if (p->len >= nelems) {
             if (prev == 0)
                 freelist = node_from_offset(p->next_node);
             else
                 prev->next_node = p->next_node;
             p->next_node = 0;
-            return (void *) (p + 1);
+            void* ptr = p + 1;
+            assert(reinterpret_cast<size_t>(ptr) % RequiredAlignment == 0);
+            return ptr;
         }
     }
     return NULL;    // couldn't find a spot big enough
Index: test/test_fallback_malloc.pass.cpp
===================================================================
--- test/test_fallback_malloc.pass.cpp
+++ test/test_fallback_malloc.pass.cpp
@@ -9,21 +9,30 @@
 
 #include <iostream>
 #include <deque>
+#include <cassert>
 
 #include <pthread.h>
 
-typedef std::deque<void *> container;
-
 // #define  DEBUG_FALLBACK_MALLOC
 #define INSTRUMENT_FALLBACK_MALLOC
+#include "../src/config.h"
 #include "../src/fallback_malloc.ipp"
+#include "../src/cxa_exception.hpp"
+
+typedef std::deque<void *> container;
+
+void assertAlignment(void* ptr) {
+    assert(reinterpret_cast<size_t>(ptr) % alignof(FallbackMaxAlignType) == 0);
+}
 
 container alloc_series ( size_t sz ) {
     container ptrs;
     void *p;
     
-    while ( NULL != ( p = fallback_malloc ( sz )))
+    while ( NULL != ( p = fallback_malloc ( sz ))) {
+        assertAlignment(p);
         ptrs.push_back ( p );
+    }
     return ptrs;
     }
 
@@ -32,6 +41,7 @@
     void *p;
     
     while ( NULL != ( p = fallback_malloc ( sz ))) {
+        assertAlignment(p);
         ptrs.push_back ( p );
         sz *= growth;
         }
@@ -47,6 +57,7 @@
     for ( const size_t *iter = first; iter != last; ++iter ) {
         if ( NULL == (p = fallback_malloc ( *iter )))
             break;
+        assertAlignment(p);
         ptrs.push_back ( p );
         }