Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -24,12 +24,14 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/Utils/SanitizerStats.h"
@@ -1411,6 +1413,96 @@
                     lvalue.getTBAAOffset(), lvalue.isNontemporal());
 }
 
+// AAPCS requires bitfield acesses to be performed using the natural alignment /
+// width of the container type. This requirement is at odds with how clang works
+// with record types. For example, consider:
+//   struct s {
+//     char a;
+//     short b : 8;
+//   };
+// Here, clang treats 'a' and 'b' as two separate (consecutive) storage units
+// with each having a size of 8-bits. This means an access to the bitfield 'b'
+// will use an i8* instead of an i16* (type of the container) as required by the
+// AAPCS. In other words, AAPCS allows the storage of 'a' and 'b' to overlap
+// (7.1.7.4); a load of 'b' will load 'a' as well, which is then masked out as
+// necessary. Modifying clang's abstraction of structures to allow overlapping
+// fields is quite obtrusive, we have therefore resolved to the following tweak
+// where we intercept bitfied loads / stores and adjust them so that they are
+// AAPCS compliant. Note that both clang and AAPCS already agree on the layout
+// of structs, it's only the access width / alignment that needs fixing.
+llvm::Value *CodeGenFunction::AdjustAAPCSBitfieldAccess(const LValue LV,
+                                                        CGBitFieldInfo &Info,
+                                                        bool IsLoad) {
+  llvm::Type *ResLTy = IsLoad ? ConvertType(LV.getType()) :
+                                ConvertTypeForMem(LV.getType());
+  llvm::Value *Ptr = LV.getBitFieldAddress().getPointer();
+
+  // Offset to the bitfield from the beginning of the struct
+  uint32_t AbsoluteOffset = getContext().toBits(Info.StorageOffset) +
+    Info.Offset;
+
+  // Container size is the width of the bitfield type
+  uint32_t ContainerSize = ResLTy->getPrimitiveSizeInBits();
+
+  // Offset within the container
+  uint32_t MemberOffset = AbsoluteOffset & (ContainerSize - 1);
+
+  // Bail out if an aligned load of the container cannot cover the entire
+  // bitfield. This can happen for example, if the bitfield is part of a packed
+  // struct. AAPCS does not define access rules for such cases, we let clang to
+  // follow its own rules.
+  if (MemberOffset + Info.Size > ContainerSize)
+    return Ptr;
+
+  // Adjust offsets for big-endian targets
+  if (CGM.getTypes().getDataLayout().isBigEndian())
+    MemberOffset = ContainerSize - (MemberOffset + Info.Size);
+
+  // Get rid of any bitcasts (we are going to change the cast anyways)
+  if (dyn_cast<llvm::BitCastOperator>(Ptr))
+    Ptr = Ptr->stripPointerCasts();
+
+  // If the result is a GEP, we need to reset it so that it points to the
+  // beginning of the struct.
+  if (isa<llvm::GEPOperator>(Ptr)) {
+    llvm::User *GEPUser = dyn_cast<llvm::User>(Ptr);
+    if (GEPUser->getNumOperands() > 1 &&
+        (AbsoluteOffset > Info.Offset)) {
+      // This means we are indexing into an element in the middle of a record.
+      // So we dissect the original GEP and assemble a new one that indexes into
+      // the beginning of that record.
+      llvm::User::op_iterator it = GEPUser->op_begin();
+      // All the indices are given relative to the first operand. We use this
+      // operand but overwrite the final index to zero, which gives us a pointer
+      // to the inner-most struct (in the case of nested structs) holding the
+      // bitfield.
+      Ptr = *it++;
+      std::vector<llvm::Value*> IdxVec;
+      while (it != GEPUser->op_end()) {
+        IdxVec.push_back(*it++);
+      }
+      IdxVec.pop_back();
+      IdxVec.push_back(llvm::ConstantInt::get(CGM.IntTy, 0));
+      Ptr = Builder.CreateInBoundsGEP(Ptr,
+                                      llvm::ArrayRef<llvm::Value*>(IdxVec));
+    }
+  }
+
+  // Here we essentially treat the resulting record as an array of containers
+  // and index into it appropriately.
+  Ptr = Builder.CreateBitCast(Ptr, ResLTy->getPointerTo());
+  Ptr = Builder.CreateInBoundsGEP(Ptr,
+    llvm::ConstantInt::get(CGM.IntTy, AbsoluteOffset / ContainerSize));
+
+  // Set the new bitfield access parameters.
+  Info.StorageOffset =
+    CharUnits::fromQuantity(AbsoluteOffset & ~(ContainerSize - 1));
+  Info.StorageSize = ContainerSize;
+  Info.Offset = MemberOffset;
+
+  return Ptr;
+}
+
 /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
 /// method emits the address of the lvalue, then loads the result as an rvalue,
 /// returning the rvalue.
@@ -1461,12 +1553,14 @@
 }
 
 RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
-  const CGBitFieldInfo &Info = LV.getBitFieldInfo();
-
-  // Get the output type.
+  CGBitFieldInfo Info = CGBitFieldInfo(LV.getBitFieldInfo());
   llvm::Type *ResLTy = ConvertType(LV.getType());
-
   Address Ptr = LV.getBitFieldAddress();
+
+  if (CGM.getTarget().getABI().startswith("aapcs"))
+    Ptr = Address(AdjustAAPCSBitfieldAccess(LV, Info),
+                  getContext().getTypeAlignInChars(LV.getType()));
+
   llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load");
 
   if (Info.IsSigned) {
@@ -1658,16 +1752,19 @@
 
 void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                                      llvm::Value **Result) {
-  const CGBitFieldInfo &Info = Dst.getBitFieldInfo();
+  CGBitFieldInfo Info = CGBitFieldInfo(Dst.getBitFieldInfo());
   llvm::Type *ResLTy = ConvertTypeForMem(Dst.getType());
   Address Ptr = Dst.getBitFieldAddress();
 
+  if (CGM.getTarget().getABI().startswith("aapcs"))
+    Ptr = Address(AdjustAAPCSBitfieldAccess(Dst, Info, false),
+                  getContext().getTypeAlignInChars(Dst.getType()));
+
   // Get the source value, truncated to the width of the bit-field.
   llvm::Value *SrcVal = Src.getScalarVal();
-
-  // Cast the source to the storage type and shift it into place.
+  // Cast the source to the access type and shift it into place.
   SrcVal = Builder.CreateIntCast(SrcVal, Ptr.getElementType(),
-                                 /*IsSigned=*/false);
+                                /*IsSigned=*/false);
   llvm::Value *MaskedVal = SrcVal;
 
   // See if there are other bits in the bitfield's storage we'll need to load
Index: lib/CodeGen/CGRecordLayoutBuilder.cpp
===================================================================
--- lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -234,8 +234,11 @@
   // Reverse the bit offsets for big endian machines. Because we represent
   // a bitfield as a single large integer load, we can imagine the bits
   // counting from the most-significant-bit instead of the
-  // least-significant-bit.
-  if (DataLayout.isBigEndian())
+  // least-significant-bit. For AAPCS we delay this offset reversal until
+  // the IR generation phase (CodeGenFunction::AdjustAAPCSBitfieldAccess)
+  // where the access parameters are re-calculated.
+  if (DataLayout.isBigEndian() &&
+      !Context.getTargetInfo().getABI().startswith("aapcs"))
     Info.Offset = Info.StorageSize - (Info.Offset + Info.Size);
 }
 
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2384,6 +2384,10 @@
   /// \brief Emit code for sections directive.
   OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S);
 
+  /// \brief Perform AAPCS specific tweaks on bitfield loads / stores.
+  llvm::Value *AdjustAAPCSBitfieldAccess(const LValue LV, CGBitFieldInfo &Info,
+                                         bool IsLoad = true);
+
 public:
 
   //===--------------------------------------------------------------------===//
Index: test/CodeGen/aapcs-bitfield.c
===================================================================
--- /dev/null
+++ test/CodeGen/aapcs-bitfield.c
@@ -0,0 +1,467 @@
+// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 \
+// RUN:   | FileCheck %s -check-prefix=LE -check-prefix=CHECK
+// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 \
+// RUN:   | FileCheck %s -check-prefix=BE -check-prefix=CHECK
+
+// CHECK: %struct.st0 = type { i8, i8 }
+// CHECK: %struct.st1 = type { i16, [2 x i8] }
+// CHECK: %struct.st2 = type { i16, i8 }
+// CHECK: %struct.st3 = type { i8, i8 }
+// CHECK: %struct.st4 = type { i16, [2 x i8] }
+// CHECK: %struct.st5 = type { i16, i8 }
+// CHECK: %struct.st6 = type { i16, i8, i8 }
+// CHECK: %struct.st7b = type { i8, [3 x i8], %struct.st7a }
+// CHECK: %struct.st7a = type { i8, i8, [2 x i8] }
+// CHECK: %struct.st8 = type { i16, [2 x i8] }
+
+// A simple plain bit-field
+struct st0 {
+  // Expected masks (0s mark the bit-field):
+  // le: 0xff80 (-128)
+  // be: 0x01ff (511)
+  short c : 7;
+};
+
+// CHECK-LABEL: st0_check_load
+int st0_check_load(struct st0 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st0* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // LE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 9
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 9
+  // LE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR:.*]] = bitcast %struct.st0* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // BE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 9
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st0_check_store
+void st0_check_store(struct st0 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st0* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -128
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // LE-NEXT: store i16 %[[SET]], i16* %[[PTR]], align 2
+
+  // BE: %[[PTR:.*]] = bitcast %struct.st0* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 511
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 512
+  // BE-NEXT: store i16 %[[SET]], i16* %[[PTR]], align 2
+  m->c = 1;
+}
+
+// The bitfield 'c' should come straight after 'a'.
+struct st1 {
+  int a : 10;
+  // Expected masks (0s mark the bit-field 'c'):
+  // le: 0x03ff (1023)
+  // be: 0xffc0 (-64)
+  short c : 6;
+};
+
+// CHECK-LABEL: st1_check_load
+int st1_check_load(struct st1 *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st1, %struct.st1* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // LE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 10
+  // LE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st1, %struct.st1* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // BE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 10
+  // BE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 10
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st1_check_store
+void st1_check_store(struct st1 *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st1, %struct.st1* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 1023
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1024
+  // LE-NEXT: store i16 %[[SET]], i16* %[[PTR]], align 2
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st1, %struct.st1* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -64
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // BE-NEXT: store i16 %[[SET]], i16* %[[PTR]], align 2
+  m->c = 1;
+}
+
+// The bit-field 'c' should begin from the third byte (alignment of the
+// container 'short' forces this).
+struct st2 {
+  int a : 10;
+  // Expected masks (0s mark the bit-field):
+  // le: 0xff80 (-128)
+  // be: 0x01ff (511)
+  short c : 7;
+};
+
+// CHECK-LABEL: st2_check_load
+int st2_check_load(struct st2 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR2]], align 2
+  // LE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 9
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 9
+  // LE-nEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR2]], align 2
+  // BE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 9
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st2_check_store
+void st2_check_store(struct st2 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR2]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -128
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // LE-NEXT: store i16 %[[SET]], i16* %[[PTR2]], align 2
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR2]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 511
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 512
+  // BE-NEXT: store i16 %[[SET]], i16* %[[PTR2]], align 2
+  m->c = 1;
+}
+
+// Simple volatile bitfield
+struct st3 {
+  // Expected masks (0s mark the bit-field):
+  // le: 0xff80 (-128)
+  // be: 0x01ff (511)
+  volatile short c : 7;
+};
+
+// CHECK-LABLE: st3_check_load
+int st3_check_load(struct st3 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st3* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // LE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 9
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 9
+  // LE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st3* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // BE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 9
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABLE: st3_check_store
+void st3_check_store(struct st3 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st3* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -128
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // LE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR1]], align 2
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st3* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 511
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 512
+  // BE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR1]], align 2
+  m->c = 1;
+}
+
+// Similar to st1 but 'c' is volatile, it should come straight after 'b'.
+struct st4 {
+  int b : 9;
+  // Expected masks (0s mark the bit-field):
+  // le: 0xc1 (-63)
+  // be: 0x83 (-125)
+  volatile char c : 5;
+};
+
+// CHECK-LABLE: st4_check_load
+int st4_check_load(struct st4 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i8*
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i8, i8* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR2]], align 1
+  // LE-NEXT: %[[CLR1:.*]] = shl i8 %[[LD]], 2
+  // LE-NEXT: %[[CLR2:.*]] = ashr i8 %[[CLR1]], 3
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i8*
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i8, i8* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR2]], align 1
+  // BE-NEXT: %[[CLR1:.*]] = shl i8 %[[LD]], 1
+  // BE-NEXT: %[[CLR2:.*]] = ashr i8 %[[CLR1]], 3
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[CLR2]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABLE: st4_check_store
+void st4_check_store(struct st4 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i8*
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i8, i8* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR2]], align 1
+  // LE-NEXT: %[[CLR:.*]] = and i8 %[[LD]], -63
+  // LE-NEXT: %[[SET:.*]] = or i8 %[[CLR]], 2
+  // LE-NEXT: store volatile i8 %[[SET]], i8* %[[PTR2]], align 1
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i8*
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i8, i8* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR2]], align 1
+  // BE-NEXT: %[[CLR:.*]] = and i8 %[[LD]], -125
+  // BE-NEXT: %[[SET:.*]] = or i8 %[[CLR]], 4
+  // BE-NEXT: store volatile i8 %[[SET]], i8* %[[PTR2]], align 1
+  m->c = 1;
+}
+
+// Similar to st2 but 'c' is volatile, it should land into the third byte.
+struct st5 {
+  int a : 12;
+  // Expected masks (0s mark the bit-field):
+  // le: 0xe0 (-32)
+  // be: 0x07 (7)
+  volatile char c : 5;
+};
+
+// CHECK-LABEL: st5_check_load
+int st5_check_load(struct st5 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5, %struct.st5* %m, i32 0, i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR1]], align 1
+  // LE-NEXT: %[[CLR1:.*]] = shl i8 %[[LD]], 3
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i8 %[[CLR1]], 3
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5, %struct.st5* %m, i32 0, i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR1]], align 1
+  // BE-NEXT: %[[CLR:.*]] = ashr i8 %[[LD]], 3
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st5_check_store
+void st5_check_store(struct st5 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5, %struct.st5* %m, i32 0, i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR1]], align 1
+  // LE-NEXT: %[[CLR:.*]] = and i8 %[[LD]], -32
+  // LE-NEXT: %[[SET:.*]] = or i8 %[[CLR]], 1
+  // LE-NEXT: store volatile i8 %[[SET:.*]], i8* %[[PTR1]], align 1
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5, %struct.st5* %m, i32 0, i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR1]], align 1
+  // BE-NEXT: %[[CLR:.*]] = and i8 %[[LD]], 7
+  // BE-NEXT: %[[SET:.*]] = or i8 %[[CLR]], 8
+  // BE-NEXT: store volatile i8 %[[SET]], i8* %[[PTR1]], align 1
+  m->c = 1;
+}
+
+// Overlapping (access) of bitfields and normal fields
+struct st6 {
+  // Occupies the first two bytes
+  // le: 0xfffff000 (-4096)
+  // be: 0x000fffff (1048575)
+  int a : 12;
+  // Occupies the third byte
+  char b;
+  // Occupies the last byte
+  // le: 0xe0ffffff (-520093697)
+  // be: 0xffffff07 (-249)
+  int c : 5;
+};
+
+// CHECK-LABEL: st6_check_load
+int st6_check_load(struct st6 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st6* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // LE-NEXT: %[[CLR1:.*]] = shl i32 %[[LD]], 20
+  // LE-NEXT: %[[CLR2]] = ashr exact i32 %[[CLR1]], 20
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st6* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR1]], align 4
+  // BE-NEXT: %[[CLR1:.*]] = ashr i32 %[[LD]], 20
+  int x = m->a;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st6, %struct.st6* %m, i32 0, i32 1
+  // LE-NEXT: %[[LD2:.*]] = load i8, i8* %[[PTR2]], align 2{{.*}}
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD2]] to i32
+  // LE-NEXT: %[[RES1:.*]] = add nsw i32 %[[CLR2]], %[[SEXT]]
+
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st6, %struct.st6* %m, i32 0, i32 1
+  // BE-NEXT: %[[LD2:.*]] = load i8, i8* %[[PTR2]], align 2{{.*}}
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD2]] to i32
+  // BE-NEXT: %[[RES1:.*]] = add nsw i32 %[[SEXT]], %[[CLR1]]
+  x += m->b;
+
+  // LE: %[[CLR3:.*]] = shl i32 %[[LD]], 3
+  // LE-NEXT: %[[CLR4:.*]] = ashr i32 %[[CLR3]], 27
+  // LE-NEXT: %[[RES2:.*]] = add nsw i32 %[[RES1]], %[[CLR4]]
+
+  // BE: %[[CLR3:.*]] = shl i32 %[[LD]], 24
+  // BE-NEXT: %[[CLR4:.*]] = ashr i32 %[[CLR3]], 27
+  // BE-NEXT: %[[RES2:.*]] = add nsw i32 %[[RES1]], %[[CLR4]]
+  x += m->c;
+
+  // LE: ret i32 %[[RES2]]
+  // BE: ret i32 %[[RES2]]
+  return x;
+}
+
+// CHECK-LABEL: st6_check_store
+void st6_check_store(struct st6 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st6* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // LE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -4096
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1
+  // LE-NEXT: store i32 %[[SET]], i32* %[[PTR]], align 4
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st6* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // BE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], 1048575
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1048576
+  // BE-NEXT: store i32 %[[SET]], i32* %[[PTR1]], align 4
+  m->a = 1;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st6, %struct.st6* %m, i32 0, i32 1
+  // LE-NEXT: store i8 2, i8* %[[PTR2]], align 2{{.*}}
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st6, %struct.st6* %m, i32 0, i32 1
+  // BE-NEXT: store i8 2, i8* %[[PTR2]], align 2{{.*}}
+  m->b = 2;
+
+  // LE: %[[LD2:.*]] = load i32, i32* %[[PTR1]], align 4
+  // LE-NEXT: %[[CLR2:.*]] = and i32 %[[LD2]], -520093697
+  // LE-NEXT: %[[SET2:.*]] = or i32 %[[CLR2]], 50331648
+  // LE-NEXT: store i32 %[[SET2]], i32* %[[PTR1]], align 4
+
+  // BE: %[[LD2:.*]] = load i32, i32* %[[PTR1]], align 4
+  // BE-NEXT: %[[CLR2:.*]] = and i32 %[[LD2]], -249
+  // BE-NEXT: %[[SET2:.*]] = or i32 %[[CLR2]], 24
+  // store i32 %[[SET2]], i32* %[[PTR2]], align 4
+  m->c = 3;
+}
+
+// Nested structs and bitfields.
+struct st7a {
+  // Occupies the first byte.
+  char a;
+  // Occupies the second byte.
+  // le: 0xffffe0ff (-7937)
+  // be: 0xff07ffff (-16252929)
+  int b : 5;
+};
+
+struct st7b {
+  // Occupies the first byte.
+  char x;
+  // Comes after 3 bytes of padding.
+  struct st7a y;
+};
+
+// CHECK-LABEL: st7_check_load
+int st7_check_load(struct st7b *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load i8, i8* %[[PTR]], align 4{{.*}}
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD]] to i32
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load i8, i8* %[[PTR]], align 4{{.*}}
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD]] to i32
+  int r = m->x;
+
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 2, i32 0
+  // LE-NEXT: %[[LD1:.*]] = load i8, i8* %[[PTR1]], align 4{{.*}}
+  // LE-NEXT: %[[SEXT1:.*]] = sext i8 %[[LD1]] to i32
+  // LE-NEXT: %[[RES:.*]] = add nsw i32 %[[SEXT1]], %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 2, i32 0
+  // BE-NEXT: %[[LD1:.*]] = load i8, i8* %[[PTR1]], align 4{{.*}}
+  // BE-NEXT: %[[SEXT1:.*]] = sext i8 %[[LD1]] to i32
+  // BE-NEXT: %[[RES:.*]] = add nsw i32 %[[SEXT1]], %[[SEXT]]
+  r += m->y.a;
+
+  // LE: %[[PTR2:.*]] = bitcast i8* %[[PTR1]] to i32*
+  // LE-NEXT: %[[LD2:.*]] = load i32, i32* %[[PTR2]], align 4
+  // LE-NEXT: %[[CLR:.*]] = shl i32 %[[LD2]], 19
+  // LE-NEXT: %[[CLR1:.*]] = ashr i32 %[[CLR]], 27
+  // LE-NEXT: %[[RES1:.*]] = add nsw i32 %[[RES]], %[[CLR1]]
+
+  // BE: %[[PTR2:.*]] = bitcast i8* %[[PTR1]] to i32*
+  // BE-NEXT: %[[LD2:.*]] = load i32, i32* %[[PTR2]], align 4
+  // BE-NEXT: %[[CLR:.*]] = shl i32 %[[LD2]], 8
+  // BE-NEXT: %[[CLR1:.*]] = ashr i32 %[[CLR]], 27
+  // BE-NEXT: %[[RES1:.*]] = add nsw i32 %[[RES]], %[[CLR1]]
+  r += m->y.b;
+
+  // LE: ret i32 %[[RES1]]
+  // BE: ret i32 %[[RES1]]
+  return r;
+}
+
+// CHECK-LABEL: st7_check_store
+void st7_check_store(struct st7b *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 0
+  // LE-NEXT: store i8 1, i8* %[[PTR1]], align 4{{.*}}
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 0
+  // BE-NEXT: store i8 1, i8* %[[PTR1]], align 4{{.*}}
+  m->x = 1;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 2, i32 0
+  // LE-NEXT: store i8 2, i8* %[[PTR2]], align 4{{.*}}
+
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 2, i32 0
+  // BE-NEXT: store i8 2, i8* %[[PTR2]], align 4{{.*}}
+  m->y.a = 2;
+
+  // LE: %[[PTR3:.*]] = bitcast i8* %[[PTR2]] to i32*
+  // LE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR3]], align 4
+  // LE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -7937
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 768
+  // LE-NEXT: store i32 %[[SET]], i32* %[[PTR3]], align 4
+
+  // BE: %[[PTR3:.*]] = bitcast i8* %[[PTR2]] to i32*
+  // BE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR3]], align 4
+  // BE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -16252929
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1572864
+  // BE-NEXT: store i32 %[[SET]], i32* %[[PTR3]], align 4
+  m->y.b = 3;
+}
+
+// Check overflowing assignments to bitfields.
+struct st8 {
+  unsigned f : 16;
+};
+
+// CHECK-LABEL: st8_check_assignment
+int st8_check_assignment(struct st8 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st8* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[LD]], 65535
+  // LE-NEXT: store i32 %[[SET]], i32* %[[PTR]], align 4
+  // LE-NEXT: ret i32 65535
+
+  // BE: %[[PTR:.*]] = bitcast %struct.st8* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[LD]], -65536
+  // BE-NEXT: store i32 %[[SET]], i32* %[[PTR]], align 4
+  // BE-NEXT: ret i32 65535
+  return m->f = 0xffffffff;
+}
Index: test/CodeGen/arm-bitfield-alignment.c
===================================================================
--- test/CodeGen/arm-bitfield-alignment.c
+++ test/CodeGen/arm-bitfield-alignment.c
@@ -12,4 +12,4 @@
 }
 
 // CHECK: @g = external global %struct.T, align 4
-// CHECK: %{{.*}} = load i64, i64* bitcast (%struct.T* @g to i64*), align 4
+// CHECK: %{{.*}} = load i32, i32* bitcast (%struct.T* @g to i32*), align 4