Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -123,6 +123,11 @@
                                        Loc);
 }
 
+// Helper method to check if the underlying ABI is AAPCS
+static bool isAAPCS(const TargetInfo &TargetInfo) {
+  return TargetInfo.getABI().startswith("aapcs");
+}
+
 /// EmitIgnoredExpr - Emit code to compute the specified expression,
 /// ignoring the result.
 void CodeGenFunction::EmitIgnoredExpr(const Expr *E) {
@@ -1671,9 +1676,12 @@
   llvm::Value *MaskedVal = SrcVal;
 
   // See if there are other bits in the bitfield's storage we'll need to load
-  // and mask together with source before storing.
-  if (Info.StorageSize != Info.Size) {
-    assert(Info.StorageSize > Info.Size && "Invalid bitfield size.");
+  // and mask together with source before storing. Note that AAPCS dictates
+  // volatile bitfield stores must always load the bitfield container, even
+  // when the bitfield fills the entire container.
+  if (Info.StorageSize != Info.Size ||
+      (isAAPCS(CGM.getTarget()) && Dst.isVolatileQualified())) {
+    assert(Info.StorageSize >= Info.Size && "Invalid bitfield size.");
     llvm::Value *Val =
       Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load");
 
@@ -3100,6 +3108,75 @@
   return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName());
 }
 
+// AAPCS requires bitfield acesses to be performed using the natural alignment /
+// width of the container type. This requirement is at odds with how clang works
+// with record types. For example, consider:
+//   struct s {
+//     char a;
+//     short b : 8;
+//   };
+// Here, clang treats 'a' and 'b' as two separate (consecutive) storage units
+// with each having a size of 8-bits. This means an access to the bitfield 'b'
+// will use an i8* instead of an i16* (type of the container) as required by the
+// AAPCS. In other words, AAPCS allows the storage of 'a' and 'b' to overlap
+// (7.1.7.4); a load of 'b' will load 'a' as well, which is then masked out as
+// necessary. Modifying clang's abstraction of structures to allow overlapping
+// fields is quite obtrusive, we have therefore resolved to the following tweak
+// where we intercept bitfied loads / stores and adjust them so that they are
+// AAPCS compliant. Note that both clang and AAPCS already agree on the layout
+// of structs, it's only the access width / alignment that needs fixing.
+Address CodeGenFunction::AdjustAAPCSBitfieldLValue(Address Base,
+                                                   QualType FieldType,
+                                                   CGBitFieldInfo &Info,
+                                                   bool &Adjusted) {
+  llvm::Type *ResLTy = ConvertTypeForMem(FieldType);
+
+  // CGRecordLowering::setBitFieldInfo() pre-adjusts the bitfield offsets for
+  // big-endian targets, but it assumes a container of width Info.StorageSize.
+  // Since AAPCS uses a different container size (width of the type), we first
+  // undo that calculation here and redo it once the bitfield offset within the
+  // new container is calculated
+  if (CGM.getTypes().getDataLayout().isBigEndian())
+    Info.Offset = Info.StorageSize - (Info.Offset + Info.Size);  
+
+  // Offset to the bitfield from the beginning of the struct
+  uint32_t AbsoluteOffset = getContext().toBits(Info.StorageOffset) +
+    Info.Offset;
+
+  // Container size is the width of the bitfield type
+  uint32_t ContainerSize = ResLTy->getPrimitiveSizeInBits();
+
+  // Offset within the container
+  uint32_t MemberOffset = AbsoluteOffset & (ContainerSize - 1);
+
+  // Bail out if an aligned load of the container cannot cover the entire
+  // bitfield. This can happen for example, if the bitfield is part of a packed
+  // struct. AAPCS does not define access rules for such cases, we let clang to
+  // follow its own rules.
+  if (MemberOffset + Info.Size > ContainerSize)
+    return Base;
+
+  // Re-adjust offsets for big-endian targets
+  if (CGM.getTypes().getDataLayout().isBigEndian())
+    MemberOffset = ContainerSize - (MemberOffset + Info.Size);
+
+  // No turning back after this point
+  Adjusted = true;
+
+  // Calculate the new bitfield access parameters
+  Info.StorageOffset =
+    getContext().toCharUnitsFromBits(AbsoluteOffset & ~(ContainerSize - 1));
+  Info.StorageSize = ContainerSize;
+  Info.Offset = MemberOffset;
+
+  // GEP into the bitfield container. Here we essentially treat the Base as a
+  // pointer to a block of containers and index into it appropriately
+  return Builder.CreateConstInBoundsGEP(
+                   Builder.CreateElementBitCast(Base, ResLTy),
+                   AbsoluteOffset / ContainerSize,
+                   getContext().toCharUnitsFromBits(ContainerSize));
+}
+
 LValue CodeGenFunction::EmitLValueForField(LValue base,
                                            const FieldDecl *field) {
   AlignmentSource fieldAlignSource =
@@ -3108,9 +3185,19 @@
   if (field->isBitField()) {
     const CGRecordLayout &RL =
       CGM.getTypes().getCGRecordLayout(field->getParent());
-    const CGBitFieldInfo &Info = RL.getBitFieldInfo(field);
+    CGBitFieldInfo Info = RL.getBitFieldInfo(field);
     Address Addr = base.getAddress();
     unsigned Idx = RL.getLLVMFieldNo(field);
+    QualType fieldType =
+      field->getType().withCVRQualifiers(base.getVRQualifiers());
+
+    bool Adjusted = false;
+    if (isAAPCS(CGM.getTarget()) && fieldType.isVolatileQualified()) {
+      Addr = AdjustAAPCSBitfieldLValue(Addr, fieldType, Info, Adjusted);
+      if (Adjusted)
+        return LValue::MakeBitfield(Addr, Info, fieldType, fieldAlignSource);
+    }
+
     if (Idx != 0)
       // For structs, we GEP to the field that the record layout suggests.
       Addr = Builder.CreateStructGEP(Addr, Idx, Info.StorageOffset,
@@ -3121,8 +3208,6 @@
     if (Addr.getElementType() != FieldIntTy)
       Addr = Builder.CreateElementBitCast(Addr, FieldIntTy);
 
-    QualType fieldType =
-      field->getType().withCVRQualifiers(base.getVRQualifiers());
     return LValue::MakeBitfield(Addr, Info, fieldType, fieldAlignSource);
   }
 
Index: lib/CodeGen/CGValue.h
===================================================================
--- lib/CodeGen/CGValue.h
+++ lib/CodeGen/CGValue.h
@@ -20,6 +20,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/Type.h"
 #include "Address.h"
+#include "CGRecordLayout.h"
 
 namespace llvm {
   class Constant;
@@ -29,7 +30,6 @@
 namespace clang {
 namespace CodeGen {
   class AggValueSlot;
-  struct CGBitFieldInfo;
 
 /// RValue - This trivial value class is used to represent the result of an
 /// expression that is evaluated.  It can be one of three things: either a
@@ -166,11 +166,11 @@
 
     // ExtVector element subset: V.xyx
     llvm::Constant *VectorElts;
-
-    // BitField start bit and size
-    const CGBitFieldInfo *BitFieldInfo;
   };
 
+  // BitField start bit and size
+  CGBitFieldInfo BitFieldInfo;
+
   QualType Type;
 
   // 'const' is unused here
@@ -362,7 +362,7 @@
   llvm::Value *getBitFieldPointer() const { assert(isBitField()); return V; }
   const CGBitFieldInfo &getBitFieldInfo() const {
     assert(isBitField());
-    return *BitFieldInfo;
+    return BitFieldInfo;
   }
 
   // global register lvalue
@@ -418,7 +418,7 @@
     LValue R;
     R.LVType = BitField;
     R.V = Addr.getPointer();
-    R.BitFieldInfo = &Info;
+    R.BitFieldInfo = Info;
     R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), alignSource);
     return R;
   }
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2384,6 +2384,12 @@
   /// \brief Emit code for sections directive.
   OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S);
 
+  /// \brief Perform AAPCS specific tweaks on bitfield accesses.
+  Address AdjustAAPCSBitfieldLValue(Address Base,
+                                    QualType FieldType,
+                                    CGBitFieldInfo &Info,
+                                    bool &Adjusted);
+
 public:
 
   //===--------------------------------------------------------------------===//
Index: test/CodeGen/aapcs-bitfield.c
===================================================================
--- /dev/null
+++ test/CodeGen/aapcs-bitfield.c
@@ -0,0 +1,353 @@
+// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 \
+// RUN:   | FileCheck %s -check-prefix=LE -check-prefix=CHECK
+// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 \
+// RUN:   | FileCheck %s -check-prefix=BE -check-prefix=CHECK
+
+// CHECK: %struct.st1 = type { i8, i8 }
+// CHECK: %struct.st2 = type { i16, [2 x i8] }
+// CHECK: %struct.st3 = type { i16, i8 }
+// CHECK: %struct.st4 = type { i16, i8, i8 }
+// CHECK: %struct.st5b = type { i8, [3 x i8], %struct.st5a }
+// CHECK: %struct.st5a = type { i8, i8, [2 x i8] }
+// CHECK: %struct.st6 = type { i16, [2 x i8] }
+
+// A simple volatile bitfield, should be accessed through an i16*
+struct st1 {
+  // Expected masks (0s mark the bit-field):
+  // le: 0xff80 (-128)
+  // be: 0x01ff (511)
+  volatile short c : 7;
+};
+
+// CHECK-LABLE: st1_check_load
+int st1_check_load(struct st1 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st1* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // LE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 9
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 9
+  // LE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st1* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // BE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 9
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABLE: st1_check_store
+void st1_check_store(struct st1 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st1* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -128
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // LE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR1]], align 2
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st1* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 511
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 512
+  // BE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR1]], align 2
+  m->c = 1;
+}
+
+// 'c' should land straight after 'b' and should be accessed through an i16*
+struct st2 {
+  int b : 10;
+  // Expected masks (0s mark the bit-field):
+  // le: 0x03ff (1023)
+  // be: 0xffc0 (-64)
+  volatile short c : 6;
+};
+
+// CHECK-LABLE: st2_check_load
+int st2_check_load(struct st2 *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR]], align 4
+  // LE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 10
+  // LE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR]], align 4
+  // BE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 10
+  // BE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 10
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABLE: st2_check_store
+void st2_check_store(struct st2 *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR]], align 4
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 1023
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1024
+  // LE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR]], align 4
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR]], align 4
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -64
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // BE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR]], align 4
+  m->c = 1;
+}
+
+// 'c' should land into the third byte and should be accessed through an i16*
+struct st3 {
+  int a : 10;
+  // Expected masks (0s mark the bit-field):
+  // le: 0xff80 (-128)
+  // be: 0x01ff (511)
+  volatile short c : 7;
+};
+
+// CHECK-LABEL: st3_check_load
+int st3_check_load(struct st3 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st3, %struct.st3* %m, i32 0, i32 0
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR2]], align 2
+  // LE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 9
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 9
+  // LE-nEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st3, %struct.st3* %m, i32 0, i32 0
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR2]], align 2
+  // BE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 9
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st3_check_store
+void st3_check_store(struct st3 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st3, %struct.st3* %m, i32 0, i32 0
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR2]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -128
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // LE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR2]], align 2
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st3, %struct.st3* %m, i32 0, i32 0
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR2]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 511
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 512
+  // BE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR2]], align 2
+  m->c = 1;
+}
+
+// Overlapping (access) of volatile bitfields and normal fields
+struct st4 {
+  // Occupies the first two bytes
+  // le: 0xfffff000 (-4096)
+  // be: 0x000fffff (1048575)
+  volatile int a : 12;
+  // Occupies the third byte
+  char b;
+  // Occupies the last byte
+  // le: 0xe0ffffff (-520093697)
+  // be: 0xffffff07 (-249)
+  volatile int c : 5;
+};
+
+// CHECK-LABEL: st4_check_load
+int st4_check_load(struct st4 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load volatile i32, i32* %[[PTR1]], align 4
+  // LE-NEXT: %[[CLR1:.*]] = shl i32 %[[LD]], 20
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i32 %[[CLR1]], 20
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load volatile i32, i32* %[[PTR1]], align 4
+  // BE-NEXT: %[[CLR1:.*]] = ashr i32 %[[LD]], 20
+  int x = m->a;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st4, %struct.st4* %m, i32 0, i32 1
+  // LE-NEXT: %[[LD2:.*]] = load i8, i8* %[[PTR2]], align 2{{.*}}
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD2]] to i32
+  // LE-NEXT: %[[RES1:.*]] = add nsw i32 %[[CLR2]], %[[SEXT]]
+
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st4, %struct.st4* %m, i32 0, i32 1
+  // BE-NEXT: %[[LD2:.*]] = load i8, i8* %[[PTR2]], align 2{{.*}}
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD2]] to i32
+  // BE-NEXT: %[[RES1:.*]] = add nsw i32 %[[SEXT]], %[[CLR1]]
+  x += m->b;
+
+  // LE: %[[LD3:.*]] = load volatile i32, i32* %[[PTR1]], align 4
+  // LE-NEXT: %[[CLR3:.*]] = shl i32 %[[LD3]], 3
+  // LE-NEXT: %[[CLR4:.*]] = ashr i32 %[[CLR3]], 27
+  // LE-NEXT: %[[RES2:.*]] = add nsw i32 %[[RES1]], %[[CLR4]]
+
+  // BE: %[[LD3:.*]] = load volatile i32, i32* %[[PTR1]], align 4
+  // BE-NEXT: %[[CLR3:.*]] = shl i32 %[[LD3]], 24
+  // BE-NEXT: %[[CLR4:.*]] = ashr i32 %[[CLR3]], 27
+  // BE-NEXT: %[[RES2:.*]] = add nsw i32 %[[RES1]], %[[CLR4]]
+  x += m->c;
+
+  // LE: ret i32 %[[RES2]]
+  // BE: ret i32 %[[RES2]]
+  return x;
+}
+
+// CHECK-LABEL: st4_check_store
+void st4_check_store(struct st4 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load volatile i32, i32* %[[PTR1]], align 4
+  // LE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -4096
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1
+  // LE-NEXT: store volatile i32 %[[SET]], i32* %[[PTR1]], align 4
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load volatile i32, i32* %[[PTR1]], align 4
+  // BE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], 1048575
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1048576
+  // BE-NEXT: store volatile i32 %[[SET]], i32* %[[PTR1]], align 4
+  m->a = 1;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st4, %struct.st4* %m, i32 0, i32 1
+  // LE-NEXT: store i8 2, i8* %[[PTR2]], align 2{{.*}}
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st4, %struct.st4* %m, i32 0, i32 1
+  // BE-NEXT: store i8 2, i8* %[[PTR2]], align 2{{.*}}
+  m->b = 2;
+
+  // LE: %[[LD2:.*]] = load volatile i32, i32* %[[PTR1]], align 4
+  // LE-NEXT: %[[CLR2:.*]] = and i32 %[[LD2]], -520093697
+  // LE-NEXT: %[[SET2:.*]] = or i32 %[[CLR2]], 50331648
+  // LE-NEXT: store volatile i32 %[[SET2]], i32* %[[PTR1]], align 4
+
+  // BE: %[[LD2:.*]] = load volatile i32, i32* %[[PTR1]], align 4
+  // BE-NEXT: %[[CLR2:.*]] = and i32 %[[LD2]], -249
+  // BE-NEXT: %[[SET2:.*]] = or i32 %[[CLR2]], 24
+  // store volatile i32 %[[SET2]], i32* %[[PTR2]], align 4
+  m->c = 3;
+}
+
+// Nested structs and volatile bitfields
+struct st5a {
+  // Occupies the first byte
+  char a;
+  // Occupies the second byte
+  // le: 0xffffe0ff (-7937)
+  // be: 0xff07ffff (-16252929)
+  volatile int b : 5;
+};
+
+struct st5b {
+  // Occupies the first byte
+  char x;
+  // Comes after 3 bytes of padding
+  struct st5a y;
+};
+
+// CHECK-LABEL: st7_check_load
+int st7_check_load(struct st5b *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st5b, %struct.st5b* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load i8, i8* %[[PTR]], align 4{{.*}}
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD]] to i32
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st5b, %struct.st5b* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load i8, i8* %[[PTR]], align 4{{.*}}
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD]] to i32
+  int r = m->x;
+
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5b, %struct.st5b* %m, i32 0, i32 2
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds %struct.st5a, %struct.st5a* %[[PTR1]], i32 0, i32 0
+  // LE-NEXT: %[[LD1:.*]] = load i8, i8* %[[PTR2]], align 4{{.*}}
+  // LE-NEXT: %[[SEXT1:.*]] = sext i8 %[[LD1]] to i32
+  // LE-NEXT: %[[RES:.*]] = add nsw i32 %[[SEXT1]], %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5b, %struct.st5b* %m, i32 0, i32 2
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds %struct.st5a, %struct.st5a* %[[PTR1]], i32 0, i32 0
+  // BE-NEXT: %[[LD1:.*]] = load i8, i8* %[[PTR2]], align 4{{.*}}
+  // BE-NEXT: %[[SEXT1:.*]] = sext i8 %[[LD1]] to i32
+  // BE-NEXT: %[[RES:.*]] = add nsw i32 %[[SEXT1]], %[[SEXT]]
+  r += m->y.a;
+
+  // LE: %[[PTR3:.*]] = bitcast %struct.st5a* %[[PTR1]] to i32*
+  // LE-NEXT: %[[LD2:.*]] = load volatile i32, i32* %[[PTR3]], align 4
+  // LE-NEXT: %[[CLR:.*]] = shl i32 %[[LD2]], 19
+  // LE-NEXT: %[[CLR1:.*]] = ashr i32 %[[CLR]], 27
+  // LE-NEXT: %[[RES1:.*]] = add nsw i32 %[[RES]], %[[CLR1]]
+
+  // BE: %[[PTR3:.*]] = bitcast %struct.st5a* %[[PTR1]] to i32*
+  // BE-NEXT: %[[LD2:.*]] = load volatile i32, i32* %[[PTR3]], align 4
+  // BE-NEXT: %[[CLR:.*]] = shl i32 %[[LD2]], 8
+  // BE-NEXT: %[[CLR1:.*]] = ashr i32 %[[CLR]], 27
+  // BE-NEXT: %[[RES1:.*]] = add nsw i32 %[[RES]], %[[CLR1]]
+  r += m->y.b;
+
+  // LE: ret i32 %[[RES1]]
+  // BE: ret i32 %[[RES1]]
+  return r;
+}
+
+// CHECK-LABEL: st7_check_store
+void st7_check_store(struct st5b *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5b, %struct.st5b* %m, i32 0, i32 0
+  // LE-NEXT: store i8 1, i8* %[[PTR1]], align 4{{.*}}
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5b, %struct.st5b* %m, i32 0, i32 0
+  // BE-NEXT: store i8 1, i8* %[[PTR1]], align 4{{.*}}
+  m->x = 1;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st5b, %struct.st5b* %m, i32 0, i32 2
+  // LE-NEXT: %[[PTR3:.*]] = getelementptr inbounds %struct.st5a, %struct.st5a* %[[PTR2]], i32 0, i32 0
+  // LE-NEXT: store i8 2, i8* %[[PTR3]], align 4{{.*}}
+
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st5b, %struct.st5b* %m, i32 0, i32 2
+  // BE-NEXT: %[[PTR3:.*]] = getelementptr inbounds %struct.st5a, %struct.st5a* %[[PTR2]], i32 0, i32 0
+  // BE-NEXT: store i8 2, i8* %[[PTR3]], align 4{{.*}}
+  m->y.a = 2;
+
+  // LE: %[[PTR3:.*]] = bitcast %struct.st5a* %[[PTR2]] to i32*
+  // LE-NEXT: %[[LD:.*]] = load volatile i32, i32* %[[PTR3]], align 4
+  // LE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -7937
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 768
+  // LE-NEXT: store volatile i32 %[[SET]], i32* %[[PTR3]], align 4
+
+  // BE: %[[PTR3:.*]] = bitcast %struct.st5a* %[[PTR2]] to i32*
+  // BE-NEXT: %[[LD:.*]] = load volatile i32, i32* %[[PTR3]], align 4
+  // BE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -16252929
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1572864
+  // BE-NEXT: store volatile i32 %[[SET]], i32* %[[PTR3]], align 4
+  m->y.b = 3;
+}
+
+// Check overflowing assignments to volatile bitfields
+struct st6 {
+  volatile unsigned f : 16;
+};
+
+// CHECK-LABEL: st6_check_assignment
+int st6_check_assignment(struct st6 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st6* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load volatile i32, i32* %[[PTR]], align 4
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[LD]], 65535
+  // LE-NEXT: store volatile i32 %[[SET]], i32* %[[PTR]], align 4
+  // LE-NEXT: ret i32 65535
+
+  // BE: %[[PTR:.*]] = bitcast %struct.st6* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load volatile i32, i32* %[[PTR]], align 4
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[LD]], -65536
+  // BE-NEXT: store volatile i32 %[[SET]], i32* %[[PTR]], align 4
+  // BE-NEXT: ret i32 65535
+  return m->f = 0xffffffff;
+}
+
+// Check that volatile bitfield stores load the container, even when the
+// bitfield occupy the entire container
+struct st9 {
+  volatile short a : 16;
+};
+
+// CHECK-LABEL: st9_check_full_volatile_store
+void st9_check_full_volatile_store(struct st9 *m) {
+  // CHECK: %[[PTR:.*]] = getelementptr inbounds %struct.st9, %struct.st9* %m, i32 0, i32 0
+  // CHECK-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR]], align 2
+  // CHECK-NEXT: store volatile i16 15, i16* %[[PTR]], align 2
+  m->a = 0xf;
+}