Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -995,7 +995,49 @@
   return true;
 }
 
+/// Hacks to remove TBAA information from LValues that represent union members.
+/// The TBAA in the current form does not work for union members: the aliasing
+/// information emitted in such cases may be incorrect (leading to incorrect
+/// optimizations).
+static bool isUnionAccess(llvm::DenseMap<const Expr*, bool> &Cache,
+                          const Expr *E) {
+  auto F = Cache.find(E);
+  if (F != Cache.end())
+    return F->second;
+
+  switch (E->getStmtClass()) {
+    case Stmt::MemberExprClass: {
+      const Expr *BE = cast<const MemberExpr>(E)->getBase();
+      if (BE->getType()->isUnionType()) {
+        Cache.insert({E, true});
+        return true;
+      }
+      return Cache.insert({BE, isUnionAccess(Cache, BE)}).first->second;
+    }
+    case Stmt::ImplicitCastExprClass: {
+      const auto *ICE = cast<const ImplicitCastExpr>(E)->getSubExpr();
+      return Cache.insert({ICE, isUnionAccess(Cache, ICE)}).first->second;
+    }
+    case Stmt::ArraySubscriptExprClass: {
+      const auto *ASE = cast<const ArraySubscriptExpr>(E)->getBase();
+      return Cache.insert({ASE, isUnionAccess(Cache, ASE)}).first->second;
+    }
+    default:
+      break;
+  }
+  Cache.insert({E, false});
+  return false;
+}
+
 LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) {
+  LValue LV = EmitCheckedLValueImpl(E, TCK);
+  if (CGM.shouldUseTBAA() && isUnionAccess(UnionAccesses, E))
+    LV.setTBAAInfo(nullptr);
+  return LV;
+}
+
+LValue CodeGenFunction::EmitCheckedLValueImpl(const Expr *E,
+                                              TypeCheckKind TCK) {
   LValue LV;
   if (SanOpts.has(SanitizerKind::ArrayBounds) && isa<ArraySubscriptExpr>(E))
     LV = EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E), /*Accessed*/true);
@@ -1031,7 +1073,15 @@
 /// type of the same size of the lvalue's type.  If the lvalue has a variable
 /// length type, this is not possible.
 ///
+
 LValue CodeGenFunction::EmitLValue(const Expr *E) {
+  LValue LV = EmitLValueImpl(E);
+  if (CGM.shouldUseTBAA() && isUnionAccess(UnionAccesses, E))
+    LV.setTBAAInfo(nullptr);
+  return LV;
+}
+
+LValue CodeGenFunction::EmitLValueImpl(const Expr *E) {
   ApplyDebugLocation DL(*this, E);
   switch (E->getStmtClass()) {
   default: return EmitUnsupportedLValue(E, "l-value expression");
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2885,6 +2885,21 @@
   /// that the address will be used to access the object.
   LValue EmitCheckedLValue(const Expr *E, TypeCheckKind TCK);
 
+private:
+  /// The actual implementations of LValue emission. As a workaround for
+  /// a problem in representing union member accesses in TBAA, the public
+  /// functions will remove the TBAA information from any LValue generated
+  /// for such an access.
+  /// When the TBAA issue is fixed, the public wrappers (declared above)
+  /// should be replaced with these functions.
+  LValue EmitLValueImpl(const Expr *E);
+  LValue EmitCheckedLValueImpl(const Expr *E, TypeCheckKind TCK);
+
+  /// Cached results of previous checks for union access. This is reset
+  /// at the beginning of GenerateCode.
+  llvm::DenseMap<const Expr*, bool> UnionAccesses;
+
+public:
   RValue convertTempToRValue(Address addr, QualType type,
                              SourceLocation Loc);
 
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -1103,6 +1103,9 @@
 
 void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                                    const CGFunctionInfo &FnInfo) {
+  // Clear the cache of known union accesses.
+  UnionAccesses.clear();
+
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
   CurGD = GD;
 
Index: test/CodeGen/union-tbaa1.c
===================================================================
--- /dev/null
+++ test/CodeGen/union-tbaa1.c
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 %s -triple hexagon-unknown-elf -O2 -emit-llvm -o - | FileCheck %s
+
+typedef union __attribute__((aligned(4))) {
+  unsigned short uh[2];
+  unsigned uw;
+} vect32;
+
+void bar(vect32 p[][2]);
+
+// CHECK-LABEL: define void @fred
+void fred(unsigned Num, int Vec[2], int *Index, int Arr[4][2]) {
+  vect32 Tmp[4][2];
+// Generate tbaa for the load of Index:
+// CHECK: load i32, i32* %Index{{.*}}tbaa
+// But no tbaa for the two stores:
+// CHECK: %uw[[UW1:[0-9]*]] = getelementptr
+// CHECK: store{{.*}}%uw[[UW1]]
+// CHECK-NOT: tbaa
+// There will be a load after the store, and it will use tbaa. Make sure
+// the check-not above doesn't find it:
+// CHECK: load
+  Tmp[*Index][0].uw = Arr[*Index][0] * Num;
+// CHECK: %uw[[UW2:[0-9]*]] = getelementptr
+// CHECK: store{{.*}}%uw[[UW2]]
+// CHECK-NOT: tbaa
+  Tmp[*Index][1].uw = Arr[*Index][1] * Num;
+// Same here, don't generate tbaa for the loads:
+// CHECK: %uh[[UH1:[0-9]*]] = bitcast %union.vect32
+// CHECK: %arrayidx[[AX1:[0-9]*]] = getelementptr{{.*}}%uh[[UH1]]
+// CHECK: load i16, i16* %arrayidx[[AX1]]
+// CHECK-NOT: tbaa
+// CHECK: store
+  Vec[0] = Tmp[*Index][0].uh[1];
+// CHECK: %uh[[UH2:[0-9]*]] = bitcast %union.vect32
+// CHECK: %arrayidx[[AX2:[0-9]*]] = getelementptr{{.*}}%uh[[UH2]]
+// CHECK: load i16, i16* %arrayidx[[AX2]]
+// CHECK-NOT: tbaa
+// CHECK: store
+  Vec[1] = Tmp[*Index][1].uh[1];
+  bar(Tmp);
+}
+
Index: test/CodeGenCXX/union-tbaa2.cpp
===================================================================
--- /dev/null
+++ test/CodeGenCXX/union-tbaa2.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -target-feature +sse4.2 -target-feature +avx -emit-llvm -o - | FileCheck %s
+
+// Testcase from llvm.org/PR32056
+
+extern "C" int printf (const char *__restrict __format, ...);
+
+typedef double __m256d __attribute__((__vector_size__(32)));
+
+static __inline __m256d __attribute__((__always_inline__, __nodebug__,
+                                       __target__("avx")))
+_mm256_setr_pd(double __a, double __b, double __c, double __d) {
+  return (__m256d){ __a, __b, __c, __d };
+}
+
+struct A {
+  A () {
+// Check that there is no TBAA information generated for the stores to the
+// union members:
+// CHECK: store <4 x double>
+// CHECK-NOT: tbaa
+// CHECK: store <4 x double>
+// CHECK-NOT: tbaa
+    a = _mm256_setr_pd(0.0, 1.0, 2.0, 3.0);
+    b = _mm256_setr_pd(4.0, 5.0, 6.0, 7.0);
+  }
+
+  const double *begin() { return c; }
+  const double *end() { return c+8; }
+
+  union {
+    struct { __m256d a, b; };
+    double c[8];
+  };
+};
+
+int main(int argc, char *argv[]) {
+  A a;
+  for (double value : a)
+    printf("%f ", value);
+  return 0;
+}