diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -415,6 +415,11 @@
   llvm_unreachable("unknown storage duration");
 }
 
+/// Helper method to check if the underlying ABI is AAPCS
+static bool isAAPCS(const TargetInfo &TargetInfo) {
+  return TargetInfo.getABI().startswith("aapcs");
+}
+
 LValue CodeGenFunction::
 EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
   const Expr *E = M->getSubExpr();
@@ -1830,20 +1835,24 @@
   Address Ptr = LV.getBitFieldAddress();
   llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load");
 
+  bool UseVolatile = LV.isVolatileQualified() && Info.VolatileStorageSize != 0 &&
+                    isAAPCS(CGM.getTarget());
+  const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset;
+  const unsigned StorageSize =
+      UseVolatile ? Info.VolatileStorageSize : Info.StorageSize;
   if (Info.IsSigned) {
-    assert(static_cast<unsigned>(Info.Offset + Info.Size) <= Info.StorageSize);
-    unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size;
+    assert(static_cast<unsigned>(Offset + Info.Size) <= StorageSize);
+    unsigned HighBits = StorageSize - Offset - Info.Size;
     if (HighBits)
       Val = Builder.CreateShl(Val, HighBits, "bf.shl");
-    if (Info.Offset + HighBits)
-      Val = Builder.CreateAShr(Val, Info.Offset + HighBits, "bf.ashr");
+    if (Offset + HighBits)
+      Val = Builder.CreateAShr(Val, Offset + HighBits, "bf.ashr");
   } else {
-    if (Info.Offset)
-      Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr");
-    if (static_cast<unsigned>(Info.Offset) + Info.Size < Info.StorageSize)
-      Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize,
-                                                              Info.Size),
-                              "bf.clear");
+    if (Offset)
+      Val = Builder.CreateLShr(Val, Offset, "bf.lshr");
+    if (static_cast<unsigned>(Offset) + Info.Size < StorageSize)
+      Val = Builder.CreateAnd(
+          Val, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), "bf.clear");
   }
   Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast");
   EmitScalarRangeCheck(Val, LV.getType(), Loc);
@@ -2038,34 +2047,39 @@
                                  /*isSigned=*/false);
   llvm::Value *MaskedVal = SrcVal;
 
+  const bool UseVolatile = Dst.isVolatileQualified() &&
+                          Info.VolatileStorageSize != 0 &&
+                          isAAPCS(CGM.getTarget());
+  const unsigned StorageSize =
+      UseVolatile ? Info.VolatileStorageSize : Info.StorageSize;
+  const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset;
   // See if there are other bits in the bitfield's storage we'll need to load
   // and mask together with source before storing.
-  if (Info.StorageSize != Info.Size) {
-    assert(Info.StorageSize > Info.Size && "Invalid bitfield size.");
-    llvm::Value *Val =
-      Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load");
+  if (StorageSize != Info.Size) {
+    assert(StorageSize > Info.Size && "Invalid bitfield size.");
+    llvm::Value *Val = Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load");
 
     // Mask the source value as needed.
     if (!hasBooleanRepresentation(Dst.getType()))
       SrcVal = Builder.CreateAnd(SrcVal,
-                                 llvm::APInt::getLowBitsSet(Info.StorageSize,
+                                 llvm::APInt::getLowBitsSet(StorageSize,
                                                             Info.Size),
                                  "bf.value");
     MaskedVal = SrcVal;
-    if (Info.Offset)
-      SrcVal = Builder.CreateShl(SrcVal, Info.Offset, "bf.shl");
+    if (Offset)
+      SrcVal = Builder.CreateShl(SrcVal, Offset, "bf.shl");
 
     // Mask out the original value.
     Val = Builder.CreateAnd(Val,
-                            ~llvm::APInt::getBitsSet(Info.StorageSize,
-                                                     Info.Offset,
-                                                     Info.Offset + Info.Size),
+                            ~llvm::APInt::getBitsSet(StorageSize,
+                                                     Offset,
+                                                    Offset + Info.Size),
                             "bf.clear");
 
     // Or together the unchanged values and the source value.
     SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set");
   } else {
-    assert(Info.Offset == 0);
+    assert(Offset == 0);
   }
 
   // Write the new value back out.
@@ -2077,8 +2091,8 @@
 
     // Sign extend the value if needed.
     if (Info.IsSigned) {
-      assert(Info.Size <= Info.StorageSize);
-      unsigned HighBits = Info.StorageSize - Info.Size;
+      assert(Info.Size <= StorageSize);
+      unsigned HighBits = StorageSize - Info.Size;
       if (HighBits) {
         ResultVal = Builder.CreateShl(ResultVal, HighBits, "bf.result.shl");
         ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr");
@@ -4060,29 +4074,41 @@
 
   if (field->isBitField()) {
     const CGRecordLayout &RL =
-      CGM.getTypes().getCGRecordLayout(field->getParent());
+        CGM.getTypes().getCGRecordLayout(field->getParent());
     const CGBitFieldInfo &Info = RL.getBitFieldInfo(field);
+    const bool UseVolatile = isAAPCS(CGM.getTarget()) &&
+                             Info.VolatileStorageSize != 0 &&
+                             field->getType()
+                              .withCVRQualifiers(base.getVRQualifiers())
+                              .isVolatileQualified();
     Address Addr = base.getAddress(*this);
     unsigned Idx = RL.getLLVMFieldNo(field);
     const RecordDecl *rec = field->getParent();
-    if (!IsInPreservedAIRegion &&
-        (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
-      if (Idx != 0)
-        // For structs, we GEP to the field that the record layout suggests.
-        Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
-    } else {
-      llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType(
-          getContext().getRecordType(rec), rec->getLocation());
-      Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx,
-          getDebugInfoFIndex(rec, field->getFieldIndex()),
-          DbgInfo);
+    if (!UseVolatile){
+      if (!IsInPreservedAIRegion &&
+          (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
+        if (Idx != 0)
+          // For structs, we GEP to the field that the record layout suggests.
+          Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
+      } else {
+        llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType(
+            getContext().getRecordType(rec), rec->getLocation());
+        Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx,
+            getDebugInfoFIndex(rec, field->getFieldIndex()),
+            DbgInfo);
+      }
     }
-
+    const unsigned SS = UseVolatile ? Info.VolatileStorageSize : Info.StorageSize;
     // Get the access type.
-    llvm::Type *FieldIntTy =
-      llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize);
+    llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), SS);
     if (Addr.getElementType() != FieldIntTy)
       Addr = Builder.CreateElementBitCast(Addr, FieldIntTy);
+    if (UseVolatile) {
+      const unsigned VolatileOffset =
+          Info.VolatileStorageOffset.getQuantity();
+      if (VolatileOffset)
+        Addr = Builder.CreateConstInBoundsGEP(Addr, VolatileOffset);
+    }
 
     QualType fieldType =
       field->getType().withCVRQualifiers(base.getVRQualifiers());
diff --git a/clang/lib/CodeGen/CGRecordLayout.h b/clang/lib/CodeGen/CGRecordLayout.h
--- a/clang/lib/CodeGen/CGRecordLayout.h
+++ b/clang/lib/CodeGen/CGRecordLayout.h
@@ -80,13 +80,30 @@
   /// The offset of the bitfield storage from the start of the struct.
   CharUnits StorageOffset;
 
+  /// The offset within a contiguous run of bitfields that are represented as
+  /// a single "field" within the LLVM struct type. This offset is in bits.
+  unsigned VolatileOffset : 16;
+
+  /// The storage size in bits which should be used when accessing this
+  /// bitfield.
+  unsigned VolatileStorageSize;
+
+  /// The offset of the bitfield storage from the start of the struct.
+  CharUnits VolatileStorageOffset;
+
   CGBitFieldInfo()
-      : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset() {}
+      : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset(),
+        VolatileOffset(), VolatileStorageSize(), VolatileStorageOffset() {}
 
   CGBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned,
-                 unsigned StorageSize, CharUnits StorageOffset)
+                 unsigned StorageSize, CharUnits StorageOffset,
+                 unsigned VolatileOffset, unsigned VolatileStorageSize,
+                 CharUnits VolatileStorageOffset)
       : Offset(Offset), Size(Size), IsSigned(IsSigned),
-        StorageSize(StorageSize), StorageOffset(StorageOffset) {}
+        StorageSize(StorageSize), StorageOffset(StorageOffset),
+        VolatileOffset(VolatileOffset),
+        VolatileStorageSize(VolatileStorageSize),
+        VolatileStorageOffset(VolatileStorageOffset) {}
 
   void print(raw_ostream &OS) const;
   void dump() const;
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -109,6 +109,14 @@
            D->isMsStruct(Context);
   }
 
+  /// Helper function to check if we are targeting AAPCS
+  bool isAAPCS() const {
+    return Context.getTargetInfo().getABI().startswith("aapcs");
+  }
+
+  /// Helper function to check if the target machine is BigEndian
+  bool isBE() const { return Context.getTargetInfo().isBigEndian(); }
+
   /// The Itanium base layout rule allows virtual bases to overlap
   /// other bases, which complicates layout in specific ways.
   ///
@@ -173,6 +181,7 @@
   void accumulateFields();
   void accumulateBitFields(RecordDecl::field_iterator Field,
                         RecordDecl::field_iterator FieldEnd);
+  void computeVolatileBitfields();
   void accumulateBases();
   void accumulateVPtrs();
   void accumulateVBases();
@@ -237,6 +246,9 @@
   // least-significant-bit.
   if (DataLayout.isBigEndian())
     Info.Offset = Info.StorageSize - (Info.Offset + Info.Size);
+  Info.VolatileStorageSize = 0;
+  Info.VolatileOffset = 0;
+  Info.VolatileStorageOffset = CharUnits::Zero();
 }
 
 void CGRecordLowering::lower(bool NVBaseType) {
@@ -261,15 +273,19 @@
   // 8) Format the complete list of members in a way that can be consumed by
   //    CodeGenTypes::ComputeRecordLayout.
   CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize();
-  if (D->isUnion())
-    return lowerUnion();
+  if (D->isUnion()) {
+    lowerUnion();
+    return computeVolatileBitfields();
+  }
   accumulateFields();
   // RD implies C++.
   if (RD) {
     accumulateVPtrs();
     accumulateBases();
-    if (Members.empty())
-      return appendPaddingBytes(Size);
+    if (Members.empty()) {
+      appendPaddingBytes(Size);
+      return computeVolatileBitfields();
+    }
     if (!NVBaseType)
       accumulateVBases();
   }
@@ -281,6 +297,7 @@
   Members.pop_back();
   calculateZeroInit();
   fillOutputFields();
+  computeVolatileBitfields();
 }
 
 void CGRecordLowering::lowerUnion() {
@@ -500,6 +517,108 @@
   }
 }
 
+/// Volatile bit-fields might have some access regulations, as done
+/// by the AAPCS that defines that, when possible, bit-fields should
+/// be accessed using contaiers of the declared type width:
+/// Volatile bit-fields – preserving number and width of container accesses
+/// When a volatile bit-field is read, and its container does not overlap
+/// with any non-bit-field member, itscontainer must be read exactly once
+/// using the access width appropriate to the type of the container.
+/// When a volatile bit-field is written, and its container does not overlap
+/// with any non-bit-field member, itscontainer must be read exactly once and
+/// written exactly once using the access width appropriate to thetype of the
+/// container. The two accesses are not atomic.
+void CGRecordLowering::computeVolatileBitfields() {
+  if (!isAAPCS())
+    return;
+
+  for (auto &I : BitFields) {
+    const FieldDecl *Field = I.first;
+    CGBitFieldInfo &Info = I.second;
+    llvm::Type *ResLTy = Types.ConvertTypeForMem(Field->getType());
+    // CGRecordLowering::setBitFieldInfo() pre-adjusts the bitfield offsets
+    // for big-endian targets, but it assumes a container of width
+    // Info.StorageSize. Since AAPCS uses a different container size (width
+    // of the type), we first undo that calculation here and redo it once
+    // the bitfield offset within the new container is calculated
+    const unsigned OldOffset =
+        isBE() ? Info.StorageSize - (Info.Offset + Info.Size) : Info.Offset;
+    // Offset to the bitfield from the beginning of the struct
+    const unsigned AbsoluteOffset =
+        Context.toBits(Info.StorageOffset) + OldOffset;
+
+    // Container size is the width of the bitfield type
+    const unsigned StorageSize = ResLTy->getPrimitiveSizeInBits();
+    // Nothing to do if the access uses the desired
+    // container width and is naturally aligned
+    if (Info.StorageSize == StorageSize && (OldOffset % StorageSize == 0))
+      continue;
+
+    // Offset within the container
+    unsigned Offset = AbsoluteOffset & (StorageSize - 1);
+    // Bail out if an aligned load of the container cannot cover the entire
+    // bitfield. This can happen for example, if the bitfield is part of a
+    // packed struct. AAPCS does not define access rules for such cases, we let
+    // clang to follow its own rules.
+    if (Offset + Info.Size > StorageSize)
+      continue;
+
+    // Re-adjust offsets for big-endian targets
+    if (isBE())
+      Offset = StorageSize - (Offset + Info.Size);
+
+    const CharUnits StorageOffset =
+        Context.toCharUnitsFromBits(AbsoluteOffset & ~(StorageSize - 1));
+    const CharUnits End = StorageOffset +
+                          Context.toCharUnitsFromBits(StorageSize) -
+                          CharUnits::One();
+
+    const ASTRecordLayout &Layout =
+        Context.getASTRecordLayout(Field->getParent());
+    // If we access outside memory outside the record, than bail out
+    const CharUnits RecordSize = Layout.getSize();
+    if (End >= RecordSize)
+      continue;
+
+    // Bail out if performing this load would access non-bitfields members
+    bool Conflict = false;
+    for (const auto &it : Fields) {
+      const FieldDecl *F = it.first;
+      // Allow bitfields overlaps
+      if (F->isBitField())
+        continue;
+      const CharUnits FOffset = Context.toCharUnitsFromBits(
+          Layout.getFieldOffset(F->getFieldIndex()));
+      const CharUnits FEnd =
+          FOffset +
+          Context.toCharUnitsFromBits(
+              Types.ConvertTypeForMem(F->getType())->getPrimitiveSizeInBits()) -
+          CharUnits::One();
+      // The other field starts after the desired load end.
+      if (End < FOffset)
+        break;
+
+      // The other field ends before the desired load offset.
+      if (FEnd < StorageOffset)
+        continue;
+
+      // The desired load overlaps a non-bitfiel member, bail out.
+      Conflict = true;
+      break;
+    }
+
+    if (Conflict)
+      continue;
+    // Write the new bitfield access parameters
+    // As the storage offset now is defined as the number of elements from the
+    // start of the structure, we should divide the Offset by the element size
+    Info.VolatileStorageOffset = StorageOffset /
+                      Context.toCharUnitsFromBits(StorageSize).getQuantity();
+    Info.VolatileStorageSize = StorageSize;
+    Info.VolatileOffset = Offset;
+  }
+}
+
 void CGRecordLowering::accumulateVPtrs() {
   if (Layout.hasOwnVFPtr())
     Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr,
@@ -726,7 +845,8 @@
     Offset = StorageSize - (Offset + Size);
   }
 
-  return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageOffset);
+  return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageOffset,
+                        Offset, StorageSize, StorageOffset);
 }
 
 CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
@@ -845,8 +965,10 @@
       assert(Info.StorageSize <= SL->getSizeInBits() &&
              "Union not large enough for bitfield storage");
     } else {
-      assert(Info.StorageSize ==
-             getDataLayout().getTypeAllocSizeInBits(ElementTy) &&
+      assert((Info.StorageSize ==
+                  getDataLayout().getTypeAllocSizeInBits(ElementTy) ||
+              Info.VolatileStorageSize ==
+                  getDataLayout().getTypeAllocSizeInBits(ElementTy)) &&
              "Storage size does not match the element type size");
     }
     assert(Info.Size > 0 && "Empty bitfield!");
@@ -894,11 +1016,12 @@
 
 void CGBitFieldInfo::print(raw_ostream &OS) const {
   OS << "<CGBitFieldInfo"
-     << " Offset:" << Offset
-     << " Size:" << Size
-     << " IsSigned:" << IsSigned
+     << " Offset:" << Offset << " Size:" << Size << " IsSigned:" << IsSigned
      << " StorageSize:" << StorageSize
-     << " StorageOffset:" << StorageOffset.getQuantity() << ">";
+     << " StorageOffset:" << StorageOffset.getQuantity()
+     << " VolatileOffset:" << VolatileOffset
+     << " VolatileStorageSize:" << VolatileStorageSize
+     << " VolatileStorageOffset:" << VolatileStorageOffset.getQuantity() << ">";
 }
 
 LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const {
diff --git a/clang/test/CodeGen/aapcs-bitfield.c b/clang/test/CodeGen/aapcs-bitfield.c
--- a/clang/test/CodeGen/aapcs-bitfield.c
+++ b/clang/test/CodeGen/aapcs-bitfield.c
@@ -151,19 +151,19 @@
 
 // LE-LABEL: @st3_check_load(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
-// LE-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
-// LE-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
-// LE-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// LE-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
+// LE-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9
+// LE-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
 // LE-NEXT:    ret i32 [[CONV]]
 //
 // BE-LABEL: @st3_check_load(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
-// BE-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
-// BE-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// BE-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9
+// BE-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
 // BE-NEXT:    ret i32 [[CONV]]
 //
 int st3_check_load(struct st3 *m) {
@@ -172,20 +172,20 @@
 
 // LE-LABEL: @st3_check_store(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
-// LE-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
-// LE-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
-// LE-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128
+// LE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @st3_check_store(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
-// BE-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
-// BE-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
-// BE-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511
+// BE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
+// BE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
 // BE-NEXT:    ret void
 //
 void st3_check_store(struct st3 *m) {
@@ -199,24 +199,22 @@
 
 // LE-LABEL: @st4_check_load(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// LE-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2
-// LE-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11
-// LE-NEXT:    [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32
-// LE-NEXT:    [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24
-// LE-NEXT:    [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// LE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// LE-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2
+// LE-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
+// LE-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
 // LE-NEXT:    ret i32 [[CONV]]
 //
 // BE-LABEL: @st4_check_load(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// BE-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
-// BE-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11
-// BE-NEXT:    [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32
-// BE-NEXT:    [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24
-// BE-NEXT:    [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// BE-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// BE-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
+// BE-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
 // BE-NEXT:    ret i32 [[CONV]]
 //
 int st4_check_load(struct st4 *m) {
@@ -225,20 +223,22 @@
 
 // LE-LABEL: @st4_check_store(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// LE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873
-// LE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
-// LE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// LE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63
+// LE-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// LE-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @st4_check_store(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// BE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125
-// BE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4
-// BE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125
+// BE-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4
+// BE-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
 // BE-NEXT:    ret void
 //
 void st4_check_store(struct st4 *m) {
@@ -324,16 +324,16 @@
 // LE-LABEL: @st6_check_load(
 // LE-NEXT:  entry:
 // LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
 // LE-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4
 // LE-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
 // LE-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
 // LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
-// LE-NEXT:    [[TMP1:%.*]] = load i8, i8* [[B]], align 2, !tbaa !3
+// LE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
 // LE-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 // LE-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
 // LE-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
-// LE-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
+// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
 // LE-NEXT:    [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3
 // LE-NEXT:    [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3
 // LE-NEXT:    [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32
@@ -343,21 +343,21 @@
 // BE-LABEL: @st6_check_load(
 // BE-NEXT:  entry:
 // BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
 // BE-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
 // BE-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
 // BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
-// BE-NEXT:    [[TMP1:%.*]] = load i8, i8* [[B]], align 2, !tbaa !3
+// BE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
 // BE-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 // BE-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
 // BE-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
-// BE-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
+// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
 // BE-NEXT:    [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3
 // BE-NEXT:    [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
 // BE-NEXT:    [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
 // BE-NEXT:    ret i32 [[ADD4]]
 //
-int st6_check_load(struct st6 *m) {
+int st6_check_load(volatile struct st6 *m) {
   int x = m->a;
   x += m->b;
   x += m->c;
@@ -410,7 +410,7 @@
 
 struct st7b {
   char x;
-  struct st7a y;
+  volatile struct st7a y;
 };
 
 // LE-LABEL: @st7_check_load(
@@ -419,11 +419,11 @@
 // LE-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
 // LE-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
 // LE-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
-// LE-NEXT:    [[TMP1:%.*]] = load i8, i8* [[A]], align 4, !tbaa !11
+// LE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
 // LE-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
 // LE-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
 // LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
-// LE-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 1
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
 // LE-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
 // LE-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
 // LE-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
@@ -436,11 +436,11 @@
 // BE-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
 // BE-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
 // BE-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
-// BE-NEXT:    [[TMP1:%.*]] = load i8, i8* [[A]], align 4, !tbaa !11
+// BE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
 // BE-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
 // BE-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
 // BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
-// BE-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 1
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
 // BE-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
 // BE-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
 // BE-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
@@ -458,12 +458,12 @@
 // LE-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
 // LE-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
 // LE-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
-// LE-NEXT:    store i8 2, i8* [[A]], align 4, !tbaa !11
+// LE-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
 // LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
-// LE-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 1
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
 // LE-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
 // LE-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3
-// LE-NEXT:    store i8 [[BF_SET]], i8* [[B]], align 1
+// LE-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @st7_check_store(
@@ -471,12 +471,12 @@
 // BE-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
 // BE-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
 // BE-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
-// BE-NEXT:    store i8 2, i8* [[A]], align 4, !tbaa !11
+// BE-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
 // BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
-// BE-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 1
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
 // BE-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
 // BE-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24
-// BE-NEXT:    store i8 [[BF_SET]], i8* [[B]], align 1
+// BE-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
 // BE-NEXT:    ret void
 //
 void st7_check_store(struct st7b *m) {
@@ -512,17 +512,18 @@
 
 // LE-LABEL: @read_st9(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
-// LE-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
-// LE-NEXT:    ret i32 [[BF_CAST]]
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24
+// LE-NEXT:    [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24
+// LE-NEXT:    ret i32 [[BF_ASHR]]
 //
 // BE-LABEL: @read_st9(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
-// BE-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
-// BE-NEXT:    ret i32 [[BF_CAST]]
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24
+// BE-NEXT:    ret i32 [[BF_ASHR]]
 //
 int read_st9(volatile struct st9 *m) {
   return m->f;
@@ -530,14 +531,20 @@
 
 // LE-LABEL: @store_st9(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    store volatile i8 1, i8* [[TMP0]], align 4
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256
+// LE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1
+// LE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @store_st9(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    store volatile i8 1, i8* [[TMP0]], align 4
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215
+// BE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216
+// BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
 void store_st9(volatile struct st9 *m) {
@@ -546,18 +553,26 @@
 
 // LE-LABEL: @increment_st9(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
-// LE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
-// LE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 255
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
+// LE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @increment_st9(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
-// BE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
-// BE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
+// BE-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
+// BE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
 void increment_st9(volatile struct st9 *m) {
@@ -571,21 +586,19 @@
 
 // LE-LABEL: @read_st10(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// LE-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7
-// LE-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8
-// LE-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
-// LE-NEXT:    ret i32 [[BF_CAST]]
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23
+// LE-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
+// LE-NEXT:    ret i32 [[BF_ASHR]]
 //
 // BE-LABEL: @read_st10(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// BE-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1
-// BE-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8
-// BE-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
-// BE-NEXT:    ret i32 [[BF_CAST]]
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1
+// BE-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
+// BE-NEXT:    ret i32 [[BF_ASHR]]
 //
 int read_st10(volatile struct st10 *m) {
   return m->f;
@@ -593,20 +606,20 @@
 
 // LE-LABEL: @store_st10(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// LE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511
-// LE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2
-// LE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511
+// LE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2
+// LE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @store_st10(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// BE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641
-// BE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
-// BE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041
+// BE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608
+// BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
 void store_st10(volatile struct st10 *m) {
@@ -615,26 +628,26 @@
 
 // LE-LABEL: @increment_st10(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// LE-NEXT:    [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2
-// LE-NEXT:    [[BF_SHL2:%.*]] = and i16 [[TMP1]], 510
-// LE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -511
-// LE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]]
-// LE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 2
+// LE-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 510
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511
+// LE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// LE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @increment_st10(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
-// BE-NEXT:    [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128
-// BE-NEXT:    [[BF_SHL2:%.*]] = and i16 [[TMP1]], 32640
-// BE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -32641
-// BE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]]
-// BE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608
+// BE-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041
+// BE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
 void increment_st10(volatile struct st10 *m) {
@@ -822,3 +835,325 @@
 void increment_e_st12(volatile struct st12 *m) {
   ++m->e;
 }
+
+struct st13 {
+  char a : 8;
+  int b : 32;
+} __attribute__((packed));
+
+// LE-LABEL: @increment_b_st13(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LE-NEXT:    [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
+// LE-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
+// LE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LE-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i40
+// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LE-NEXT:    [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
+// LE-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
+// LE-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_b_st13(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BE-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
+// BE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BE-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i40
+// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
+// BE-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
+// BE-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// BE-NEXT:    ret void
+//
+void increment_b_st13(volatile struct st13 *s) {
+  s->b++;
+}
+
+struct st14 {
+  char a : 8;
+} __attribute__((packed));
+
+// LE-LABEL: @increment_a_st14(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_a_st14(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BE-NEXT:    ret void
+//
+void increment_a_st14(volatile struct st14 *s) {
+  s->a++;
+}
+
+struct st15 {
+  short a : 8;
+} __attribute__((packed));
+
+// LE-LABEL: @increment_a_st15(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_a_st15(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BE-NEXT:    ret void
+//
+void increment_a_st15(volatile struct st15 *s) {
+  s->a++;
+}
+
+struct st16 {
+  int a : 32;
+  int b : 16;
+  int c : 32;
+  int d : 16;
+};
+
+// LE-LABEL: @increment_a_st16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LE-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LE-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
+// LE-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_a_st16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BE-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BE-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BE-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BE-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
+// BE-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BE-NEXT:    ret void
+//
+void increment_a_st16(struct st16 *s) {
+  s->a++;
+}
+
+// LE-LABEL: @increment_b_st16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LE-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LE-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LE-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LE-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LE-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
+// LE-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_b_st16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BE-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BE-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BE-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BE-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
+// BE-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BE-NEXT:    ret void
+//
+void increment_b_st16(struct st16 *s) {
+  s->b++;
+}
+
+// LE-LABEL: @increment_c_st16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LE-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LE-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LE-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
+// LE-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_c_st16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BE-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BE-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BE-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BE-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BE-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
+// BE-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BE-NEXT:    ret void
+//
+void increment_c_st16(struct st16 *s) {
+  s->c++;
+}
+
+// LE-LABEL: @increment_d_st16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LE-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LE-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LE-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LE-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LE-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LE-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
+// LE-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_d_st16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BE-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BE-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BE-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BE-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BE-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
+// BE-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BE-NEXT:    ret void
+//
+void increment_d_st16(struct st16 *s) {
+  s->d++;
+}
+
+// LE-LABEL: @increment_v_a_st16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// LE-NEXT:    store volatile i32 [[INC]], i32* [[TMP0]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_v_a_st16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// BE-NEXT:    store volatile i32 [[INC]], i32* [[TMP0]], align 4
+// BE-NEXT:    ret void
+//
+void increment_v_a_st16(volatile struct st16 *s) {
+  s->a++;
+}
+
+// LE-LABEL: @increment_v_b_st16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LE-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LE-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
+// LE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_v_b_st16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BE-NEXT:    [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
+// BE-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
+// BE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// BE-NEXT:    ret void
+//
+void increment_v_b_st16(volatile struct st16 *s) {
+  s->b++;
+}
+
+// LE-LABEL: @increment_v_c_st16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LE-NEXT:    [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// LE-NEXT:    store volatile i32 [[INC]], i32* [[TMP1]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_v_c_st16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BE-NEXT:    [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BE-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// BE-NEXT:    store volatile i32 [[INC]], i32* [[TMP1]], align 4
+// BE-NEXT:    ret void
+//
+void increment_v_c_st16(volatile struct st16 *s) {
+  s->c++;
+}
+
+// LE-LABEL: @increment_v_d_st16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LE-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LE-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
+// LE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_v_d_st16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BE-NEXT:    [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
+// BE-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
+// BE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// BE-NEXT:    ret void
+//
+void increment_v_d_st16(volatile struct st16 *s) {
+  s->d++;
+}
diff --git a/clang/test/CodeGen/bitfield-2.c b/clang/test/CodeGen/bitfield-2.c
--- a/clang/test/CodeGen/bitfield-2.c
+++ b/clang/test/CodeGen/bitfield-2.c
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1 -emit-llvm -triple x86_64 -O3 -o %t.opt.ll %s \
 // RUN:   -fdump-record-layouts > %t.dump.txt
 // RUN: FileCheck -check-prefix=CHECK-RECORD < %t.dump.txt %s
@@ -14,7 +15,7 @@
 // CHECK-RECORD:   LLVMType:%struct.s0 = type { [3 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:24 IsSigned:1 StorageSize:24 StorageOffset:0>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:24 IsSigned:1 StorageSize:24 StorageOffset:0
 struct __attribute((packed)) s0 {
   int f0 : 24;
 };
@@ -54,8 +55,8 @@
 // CHECK-RECORD:   LLVMType:%struct.s1 = type { [3 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0>
-// CHECK-RECORD:     <CGBitFieldInfo Offset:10 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0
+// CHECK-RECORD:     <CGBitFieldInfo Offset:10 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0
 
 #pragma pack(push)
 #pragma pack(1)
@@ -93,7 +94,6 @@
 /****/
 
 // Check that we don't access beyond the bounds of a union.
-//
 // PR5567
 
 // CHECK-RECORD: *** Dumping IRgen Record Layout
@@ -102,7 +102,7 @@
 // CHECK-RECORD:   LLVMType:%union.u2 = type { i8 }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0
 
 union __attribute__((packed)) u2 {
   unsigned long long f0 : 3;
@@ -267,15 +267,14 @@
 /***/
 
 // Check that we compute the best alignment possible for each access.
-//
 // CHECK-RECORD: *** Dumping IRgen Record Layout
 // CHECK-RECORD: Record: RecordDecl{{.*}}s7
 // CHECK-RECORD: Layout: <CGRecordLayout
 // CHECK-RECORD:   LLVMType:%struct.s7 = type { i32, i32, i32, i8, i32, [12 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:5 IsSigned:1 StorageSize:8 StorageOffset:12>
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:29 IsSigned:1 StorageSize:32 StorageOffset:16>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:5 IsSigned:1 StorageSize:8 StorageOffset:12
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:29 IsSigned:1 StorageSize:32 StorageOffset:16
 
 struct __attribute__((aligned(16))) s7 {
   int a, b, c;
@@ -325,7 +324,6 @@
 /***/
 
 // This is another case where we narrow the access width immediately.
-//
 // <rdar://problem/7893760>
 
 struct __attribute__((packed)) s9 {