Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -1799,16 +1799,48 @@
   return EmitLoadOfBitfieldLValue(LV, Loc);
 }
 
+void CodeGenFunction::AdjustBitfieldAccess(CGBitFieldInfo &Info, Address &Ptr, const llvm::Type *ResLTy) {
+  // The bitfield can be really large. Let's try to find the subset that we
+  // need to load by finding the closest address that preserves the alignment
+  unsigned LoadOffset = ((Info.Offset/8) / Ptr.getAlignment().getQuantity()) * Ptr.getAlignment().getQuantity();
+
+  // Patch the bitfield Info to account for the offset we applied.
+  Info.Offset -= LoadOffset*8;
+  // The "StorageSize" will be the number of bits actually loaded
+  // We default here to the minimum number of bits rounded up to next multiple
+  // of a 8 (byte size)
+  unsigned LoadSize = llvm::alignTo(Info.Size + Info.Offset, 8);
+  // If the size of the resulting type is larger than the LoadSize, then we
+  // extend the load now, it'll simplify the codegen.
+  unsigned RetSize = llvm::alignTo(ResLTy->getScalarSizeInBits(), 8);
+  if (RetSize > LoadSize && LoadOffset*8 + RetSize <= Info.StorageSize)
+     LoadSize = RetSize;
+  Info.StorageSize = LoadSize;
+
+  // Generate the pointer to access the data, applying possible offset and bitcast
+  llvm::Value *Addr = Ptr.getPointer();
+  if (LoadOffset) {
+    Addr = Builder.CreatePointerCast(Addr, llvm::PointerType::getUnqual(Builder.getInt8Ty()));
+    auto *Offset = llvm::ConstantInt::get(IntPtrTy, LoadOffset);
+    Addr = Builder.CreateGEP(Addr, Offset, "bf.elt_offset");
+  }
+  if (Info.StorageSize != Addr->getType()->getScalarSizeInBits())
+    Addr = Builder.CreatePointerCast(Addr, llvm::PointerType::getUnqual(Builder.getIntNTy(Info.StorageSize)));
+  Ptr = Address(Addr, Ptr.getAlignment());
+}
+
+
 RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV,
                                                  SourceLocation Loc) {
-  const CGBitFieldInfo &Info = LV.getBitFieldInfo();
+  CGBitFieldInfo Info = LV.getBitFieldInfo();
 
   // Get the output type.
   llvm::Type *ResLTy = ConvertType(LV.getType());
-
   Address Ptr = LV.getBitFieldAddress();
-  llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load");
 
+  AdjustBitfieldAccess(Info, Ptr, ResLTy);
+
+  llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load");
   if (Info.IsSigned) {
     assert(static_cast<unsigned>(Info.Offset + Info.Size) <= Info.StorageSize);
     unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size;
@@ -1819,8 +1851,8 @@
   } else {
     if (Info.Offset)
       Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr");
-    if (static_cast<unsigned>(Info.Offset) + Info.Size < Info.StorageSize)
-      Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize,
+    if (static_cast<unsigned>(Info.Offset) + Info.Size < Val->getType()->getScalarSizeInBits())
+      Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Val->getType()->getScalarSizeInBits(),
                                                               Info.Size),
                               "bf.clear");
   }
@@ -2006,10 +2038,12 @@
 
 void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                                      llvm::Value **Result) {
-  const CGBitFieldInfo &Info = Dst.getBitFieldInfo();
+  CGBitFieldInfo Info = Dst.getBitFieldInfo();
   llvm::Type *ResLTy = ConvertTypeForMem(Dst.getType());
   Address Ptr = Dst.getBitFieldAddress();
 
+  AdjustBitfieldAccess(Info, Ptr, ResLTy);
+
   // Get the source value, truncated to the width of the bit-field.
   llvm::Value *SrcVal = Src.getScalarVal();
 
@@ -2022,7 +2056,7 @@
   // and mask together with source before storing.
   if (Info.StorageSize != Info.Size) {
     assert(Info.StorageSize > Info.Size && "Invalid bitfield size.");
-    llvm::Value *Val =
+    llvm::Value *	Val =
       Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load");
 
     // Mask the source value as needed.
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -3430,6 +3430,12 @@
   /// If so, atomic qualifiers are ignored and the store is always non-atomic.
   void EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit=false);
 
+  /// Adjust accesses to bitfields by finding an offset in the storage that will
+  /// generate a smaller load to extract only the desired field.
+  /// The bitfield Info and the base Ptr are modified accordingly, ResLTy is the
+  /// type of the returned value so that the load is widened as appropriate.
+  void AdjustBitfieldAccess(CGBitFieldInfo &Info, Address &Ptr, const llvm::Type *ResLTy);
+
   /// EmitLoadOfLValue - Given an expression that represents a value lvalue,
   /// this method emits the address of the lvalue, then loads the result as an
   /// rvalue, returning the rvalue.
Index: clang/test/CodeGen/arm-bitfield-alignment.c
===================================================================
--- clang/test/CodeGen/arm-bitfield-alignment.c
+++ clang/test/CodeGen/arm-bitfield-alignment.c
@@ -12,4 +12,4 @@
 }
 
 // CHECK: @g = external global %struct.T, align 4
-// CHECK: %{{.*}} = load i64, i64* bitcast (%struct.T* @g to i64*), align 4
+// CHECK: %{{.*}} = load i32, i32* bitcast (%struct.T* @g to i32*), align 4
Index: clang/test/CodeGen/no-bitfield-type-align.c
===================================================================
--- clang/test/CodeGen/no-bitfield-type-align.c
+++ clang/test/CodeGen/no-bitfield-type-align.c
@@ -10,33 +10,41 @@
 };
 
 // CHECK: define void @test_zero_width_bitfield(%[[STRUCT_S]]* %[[A:.*]])
-// CHECK: %[[BF_LOAD:.*]] = load i32, i32* %[[V1:.*]], align 1
-// CHECK: %[[BF_CLEAR:.*]] = and i32 %[[BF_LOAD]], 32767
-// CHECK: %[[BF_CAST:.*]] = trunc i32 %[[BF_CLEAR]] to i16
-// CHECK: %[[CONV:.*]] = zext i16 %[[BF_CAST]] to i32
+// CHECK: %[[V1:.*]] = bitcast %struct.S* %0 to i32*
+// CHECK: %[[V1_I16:.*]] = bitcast i32* %[[V1]] to i16*
+// CHECK: %[[BF_LOAD:.*]] = load i16, i16* %[[V1_I16:.*]], align 1
+// CHECK: %[[BF_CLEAR:.*]] = and i16 %[[BF_LOAD]], 32767
+// CHECK: %[[CONV:.*]] = zext i16 %[[BF_CLEAR]] to i32
 // CHECK: %[[ADD:.*]] = add nsw i32 %[[CONV]], 1
 // CHECK: %[[CONV1:.*]] = trunc i32 %[[ADD]] to i16
-// CHECK: %[[V2:.*]] = zext i16 %[[CONV1]] to i32
-// CHECK: %[[BF_LOAD2:.*]] = load i32, i32* %[[V1]], align 1
-// CHECK: %[[BF_VALUE:.*]] = and i32 %[[V2]], 32767
-// CHECK: %[[BF_CLEAR3:.*]] = and i32 %[[BF_LOAD2]], -32768
-// CHECK: %[[BF_SET:.*]] = or i32 %[[BF_CLEAR3]], %[[BF_VALUE]]
-// CHECK: store i32 %[[BF_SET]], i32* %[[V1]], align 1
+// CHECK: %[[V1_I16:.*]] = bitcast i32* %[[V1]] to i16*
+// CHECK: %[[BF_LOAD2:.*]] = load i16, i16* %[[V1_I16]], align 1
+// CHECK: %[[BF_VALUE:.*]] = and i16 %[[CONV1]], 32767
+// CHECK: %[[BF_CLEAR3:.*]] = and i16 %[[BF_LOAD2]], -32768
+// CHECK: %[[BF_SET:.*]] = or i16 %[[BF_CLEAR3]], %[[BF_VALUE]]
+// CHECK: store i16 %[[BF_SET]], i16* %[[V1_I16]], align 1
 
-// CHECK: %[[BF_LOAD4:.*]] = load i32, i32* %[[V4:.*]], align 1
-// CHECK: %[[BF_LSHR:.*]] = lshr i32 %[[BF_LOAD4]], 15
-// CHECK: %[[BF_CLEAR5:.*]] = and i32 %[[BF_LSHR]], 32767
-// CHECK: %[[BF_CAST6:.*]] = trunc i32 %[[BF_CLEAR5]] to i16
-// CHECK: %[[CONV7:.*]] = zext i16 %[[BF_CAST6]] to i32
+// CHECK: %[[BF_S_I32:.*]]  = bitcast %struct.S* {{.*}} to i32*
+// CHECK: %[[BF_S_I8:.*]] = bitcast i32* %[[BF_S_I32]] to i8*
+// CHECK: %[[BF_ELT_PTR:.*]] = getelementptr i8, i8* %[[BF_S_I8]], i64 1
+// CHECK: %[[BF_ELT_PTR_CAST:.*]] = bitcast i8* %[[BF_ELT_PTR]] to i24*
+// CHECK: %[[BF_LOAD4:.*]] = load i24, i24* %[[BF_ELT_PTR_CAST:.*]], align 1
+// CHECK: %[[BF_LSHR:.*]] = lshr i24 %[[BF_LOAD4]], 7
+// CHECK: %[[BF_AND:.*]] = and i24 %[[BF_LSHR]], 32767
+// CHECK: %[[BF_TRUNC:.*]] = trunc i24 %[[BF_AND]] to i16
+// CHECK: %[[CONV7:.*]] = zext i16 %[[BF_TRUNC]] to i32
 // CHECK: %[[ADD8:.*]] = add nsw i32 %[[CONV7]], 2
 // CHECK: %[[CONV9:.*]] = trunc i32 %[[ADD8]] to i16
-// CHECK: %[[V5:.*]] = zext i16 %[[CONV9]] to i32
-// CHECK: %[[BF_LOAD10:.*]] = load i32, i32* %[[V4]], align 1
-// CHECK: %[[BF_VALUE11:.*]] = and i32 %[[V5]], 32767
-// CHECK: %[[BF_SHL:.*]] = shl i32 %[[BF_VALUE11]], 15
-// CHECK: %[[BF_CLEAR12:.*]] = and i32 %[[BF_LOAD10]], -1073709057
-// CHECK: %[[BF_SET13:.*]] = or i32 %[[BF_CLEAR12]], %[[BF_SHL]]
-// CHECK: store i32 %[[BF_SET13]], i32* %[[V4]], align 1
+// CHECK: %[[BF_S_I8:.*]] = bitcast i32* %[[BF_S_I32]] to i8*
+// CHECK: %[[BF_ELT_PTR:.*]] = getelementptr i8, i8* %[[BF_S_I8]], i64 1
+// CHECK: %[[BF_ELT_PTR_CAST:.*]] = bitcast i8* %[[BF_ELT_PTR]] to i24*
+// CHECK: %[[CONV9_I24:.*]] = zext i16 %[[CONV9]] to i24
+// CHECK: %[[BF_LOAD10:.*]] = load i24, i24* %[[BF_ELT_PTR_CAST:.*]], align 1
+// CHECK: %[[BF_VALUE11:.*]] = and i24 %[[CONV9_I24]], 32767
+// CHECK: %[[BF_SHL:.*]] = shl i24 %[[BF_VALUE11]], 7
+// CHECK: %[[BF_CLEAR12:.*]] = and i24 %[[BF_LOAD10]], -4194177
+// CHECK: %[[BF_SET13:.*]] = or i24 %[[BF_CLEAR12]], %[[BF_SHL]]
+// CHECK: store i24 %[[BF_SET13]], i24* %[[BF_ELT_PTR_CAST]], align 1
 
 void test_zero_width_bitfield(struct S *a) {
   a->f1 += 1;
Index: clang/test/CodeGen/packed-nest-unpacked.c
===================================================================
--- clang/test/CodeGen/packed-nest-unpacked.c
+++ clang/test/CodeGen/packed-nest-unpacked.c
@@ -8,40 +8,40 @@
 
 // <rdar://problem/10463337>
 struct X test1() {
-  // CHECK: @test1
+  // CHECK-LABEL: @test1
   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false)
   return g.y;
 }
 struct X test2() {
-  // CHECK: @test2
+  // CHECK-LABEL: @test2
   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false)
   struct X a = g.y;
   return a;
 }
 
 void test3(struct X a) {
-  // CHECK: @test3
+  // CHECK-LABEL: @test3
   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* {{.*}}, i64 24, i1 false)
   g.y = a;
 }
 
 // <rdar://problem/10530444>
 void test4() {
-  // CHECK: @test4
+  // CHECK-LABEL: @test4
   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false)
   f(g.y);
 }
 
 // PR12395
 int test5() {
-  // CHECK: @test5
+  // CHECK-LABEL: @test5
   // CHECK: load i32, i32* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1, i32 0, i64 0), align 1
   return g.y.x[0];
 }
 
 // <rdar://problem/11220251>
 void test6() {
-  // CHECK: @test6
+  // CHECK-LABEL: @test6
   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* align 4 %{{.*}}, i64 24, i1 false)
   g.y = foo();
 }
@@ -59,15 +59,15 @@
 struct YBitfield gbitfield;
 
 unsigned test7() {
-  // CHECK: @test7
-  // CHECK: load i32, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1
+  // CHECK-LABEL: @test7
+  // CHECK: load i16, i16* bitcast (i8* getelementptr (i8, i8* bitcast (i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0) to i8*), i64 1) to i16*), align 1
   return gbitfield.y.b2;
 }
 
 void test8(unsigned x) {
-  // CHECK: @test8
-  // CHECK: load i32, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1
-  // CHECK: store i32 {{.*}}, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1
+  // CHECK-LABEL: @test8
+  // CHECK: load i16, i16* bitcast (i8* getelementptr (i8, i8* bitcast (i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0) to i8*), i64 1) to i16*), align 1
+  // CHECK: store i16 %bf.set, i16* bitcast (i8* getelementptr (i8, i8* bitcast (i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0) to i8*), i64 1) to i16*), align 1
   gbitfield.y.b2 = x;
 }
 
@@ -80,13 +80,13 @@
 struct TBitfield tbitfield;
 
 unsigned test9() {
-  // CHECK: @test9
+  // CHECK-LABEL: @test9
   // CHECK: load i16, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1
   return tbitfield.c;
 }
 
 void test10(unsigned x) {
-  // CHECK: @test10
+  // CHECK-LABEL: @test10
   // CHECK: load i16, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1
   // CHECK: store i16 {{.*}}, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1
   tbitfield.c = x;
Index: clang/test/CodeGenCXX/bitfield.cpp
===================================================================
--- clang/test/CodeGenCXX/bitfield.cpp
+++ clang/test/CodeGenCXX/bitfield.cpp
@@ -21,134 +21,149 @@
   };
   unsigned read00(S* s) {
     // CHECK-X86-64-LABEL: define i32 @_ZN2N06read00
-    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-X86-64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-X86-64:   %[[and:.*]]   = and i64 %[[val]], 16383
-    // CHECK-X86-64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-X86-64:                   ret i32 %[[trunc]]
+    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast {{.*}}* %{{.*}} to i32*
+    // CHECK-X86-64:   %[[val:.*]]   = load i32, i32* %[[ptr]]
+    // CHECK-X86-64:   %[[and:.*]]   = and i32 %[[val]], 16383
+    // CHECK-X86-64:                   ret i32 %[[and]]
     // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read00
-    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-PPC64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-PPC64:   %[[shr:.*]]   = lshr i64 %[[val]], 50
-    // CHECK-PPC64:   %[[trunc:.*]] = trunc i64 %[[shr]] to i32
-    // CHECK-PPC64:                   ret i32 %[[trunc]]
+    // CHECK-PPC64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-PPC64:   %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 6
+    // CHECK-PPC64:   %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i16*
+    // CHECK-PPC64:   %[[val:.*]]   = load i16, i16* %[[ptr_cast]]
+    // CHECK-PPC64:   %[[shr:.*]]   = lshr i16 %[[val]], 2
+    // CHECK-PPC64:   %[[ext:.*]]   = zext i16 %[[shr]] to i32
+    // CHECK-PPC64:                   ret i32 %[[ext]]
     return s->b00;
   }
   unsigned read01(S* s) {
     // CHECK-X86-64-LABEL: define i32 @_ZN2N06read01
-    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-X86-64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-X86-64:   %[[shr:.*]]   = lshr i64 %[[val]], 14
-    // CHECK-X86-64:   %[[and:.*]]   = and i64 %[[shr]], 3
-    // CHECK-X86-64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-X86-64:                   ret i32 %[[trunc]]
+    // CHECK-X86-64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-X86-64:   %[[ptr_offset:.*]]   = getelementptr i8, i8* %[[ptr]], i64 1
+    // CHECK-X86-64:   %[[ptr_cast:.*]]   = bitcast i8* %[[ptr_offset]] to i32*
+    // CHECK-X86-64:   %[[val:.*]]   = load i32, i32* %[[ptr_cast]]
+    // CHECK-X86-64:   %[[shr:.*]]   = lshr i32 %[[val]], 6
+    // CHECK-X86-64:   %[[and:.*]]   = and i32 %[[shr]], 3
+    // CHECK-X86-64:                   ret i32 %[[and]]
     // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read01
-    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-PPC64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-PPC64:   %[[shr:.*]]   = lshr i64 %[[val]], 48
-    // CHECK-PPC64:   %[[and:.*]]   = and i64 %[[shr]], 3
-    // CHECK-PPC64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-PPC64:                   ret i32 %[[trunc]]
+    // CHECK-PPC64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-PPC64:   %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 6
+    // CHECK-PPC64:   %[[val:.*]]   = load i8, i8* %[[ptr_offset]]
+    // CHECK-PPC64:   %[[and:.*]]   = and i8 %[[val]], 3
+    // CHECK-PPC64:   %[[ext:.*]]   = zext i8 %[[and]] to i32
+    // CHECK-PPC64:                   ret i32 %[[ext]]
     return s->b01;
   }
   unsigned read20(S* s) {
     // CHECK-X86-64-LABEL: define i32 @_ZN2N06read20
     // CHECK-X86-64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-X86-64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-X86-64:   %[[shr:.*]]   = lshr i64 %[[val]], 16
-    // CHECK-X86-64:   %[[and:.*]]   = and i64 %[[shr]], 63
-    // CHECK-X86-64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-X86-64:                   ret i32 %[[trunc]]
+    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-X86-64:   %[[ptr_offset:.*]]   = getelementptr i8, i8* %[[ptr]], i64 2
+    // CHECK-X86-64:   %[[ptr_cast:.*]]   = bitcast i8* %[[ptr_offset]] to i32*
+    // CHECK-X86-64:   %[[val:.*]]   = load i32, i32* %[[ptr_cast]]
+    // CHECK-X86-64:   %[[and:.*]]   = and i32 %[[val]], 63
+    // CHECK-X86-64:                   ret i32 %[[and]]
     // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read20
-    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-PPC64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-PPC64:   %[[shr:.*]]   = lshr i64 %[[val]], 42
-    // CHECK-PPC64:   %[[and:.*]]   = and i64 %[[shr]], 63
-    // CHECK-PPC64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-PPC64:                   ret i32 %[[trunc]]
+    // CHECK-PPC64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-PPC64:   %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 5
+    // CHECK-PPC64:   %[[val:.*]]   = load i8, i8* %[[ptr_offset]]
+    // CHECK-PPC64:   %[[lshr:.*]]   = lshr i8 %[[val]], 2
+    // CHECK-PPC64:   %[[ext:.*]]   = zext i8 %[[lshr]] to i32
+    // CHECK-PPC64:                   ret i32 %[[ext]]
     return s->b20;
   }
   unsigned read21(S* s) {
     // CHECK-X86-64-LABEL: define i32 @_ZN2N06read21
     // CHECK-X86-64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-X86-64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-X86-64:   %[[shr:.*]]   = lshr i64 %[[val]], 22
-    // CHECK-X86-64:   %[[and:.*]]   = and i64 %[[shr]], 3
-    // CHECK-X86-64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-X86-64:                   ret i32 %[[trunc]]
+    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-X86-64:   %[[ptr_offset:.*]]   = getelementptr i8, i8* %[[ptr]], i64 2
+    // CHECK-X86-64:   %[[ptr_cast:.*]]   = bitcast i8* %[[ptr_offset]] to i32*
+    // CHECK-X86-64:   %[[val:.*]]   = load i32, i32* %[[ptr_cast]]
+    // CHECK-X86-64:   %[[shr:.*]]   = lshr i32 %[[val]], 6
+    // CHECK-X86-64:   %[[and:.*]]   = and i32 %[[shr]], 3
+    // CHECK-X86-64:                   ret i32 %[[and]]
     // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read21
-    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-PPC64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-PPC64:   %[[shr:.*]]   = lshr i64 %[[val]], 40
-    // CHECK-PPC64:   %[[and:.*]]   = and i64 %[[shr]], 3
-    // CHECK-PPC64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-PPC64:                   ret i32 %[[trunc]]
+    // CHECK-PPC64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-PPC64:   %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 5
+    // CHECK-PPC64:   %[[val:.*]]   = load i8, i8* %[[ptr_offset]]
+    // CHECK-PPC64:   %[[and:.*]]   = and i8 %[[val]], 3
+    // CHECK-PPC64:   %[[ext:.*]]   = zext i8 %[[and]] to i32
+    // CHECK-PPC64:                   ret i32 %[[ext]]
     return s->b21;
   }
   unsigned read30(S* s) {
     // CHECK-X86-64-LABEL: define i32 @_ZN2N06read30
     // CHECK-X86-64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-X86-64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-X86-64:   %[[shr:.*]]   = lshr i64 %[[val]], 24
-    // CHECK-X86-64:   %[[and:.*]]   = and i64 %[[shr]], 1073741823
-    // CHECK-X86-64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-X86-64:                   ret i32 %[[trunc]]
+    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-X86-64:   %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 3
+    // CHECK-X86-64:   %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i32*
+    // CHECK-X86-64:   %[[val:.*]]   = load i32, i32* %[[ptr_cast]]
+    // CHECK-X86-64:   %[[and:.*]]   = and i32 %[[val]], 1073741823
+    // CHECK-X86-64:                   ret i32 %[[and]]
     // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read30
-    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-PPC64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-PPC64:   %[[shr:.*]]   = lshr i64 %[[val]], 10
-    // CHECK-PPC64:   %[[and:.*]]   = and i64 %[[shr]], 1073741823
-    // CHECK-PPC64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-PPC64:                   ret i32 %[[trunc]]
+    // CHECK-PPC64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-PPC64:   %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 1
+    // CHECK-PPC64:   %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i32*
+    // CHECK-PPC64:   %[[val:.*]]   = load i32, i32* %[[ptr_cast]]
+    // CHECK-PPC64:   %[[shr:.*]]   = lshr i32 %[[val]], 2
+    // CHECK-PPC64:                   ret i32 %[[shr]]
     return s->b30;
   }
   unsigned read31(S* s) {
     // CHECK-X86-64-LABEL: define i32 @_ZN2N06read31
-    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-X86-64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-X86-64:   %[[shr:.*]]   = lshr i64 %[[val]], 54
-    // CHECK-X86-64:   %[[and:.*]]   = and i64 %[[shr]], 3
-    // CHECK-X86-64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-X86-64:                   ret i32 %[[trunc]]
+    // CHECK-X86-64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-X86-64:   %[[ptr_offset:.*]]   = getelementptr i8, i8* %[[ptr]], i64 6
+    // CHECK-X86-64:   %[[val:.*]]   = load i8, i8* %[[ptr_offset]]
+    // CHECK-X86-64:   %[[shr:.*]]   = lshr i8 %[[val]], 6
+    // CHECK-X86-64:   %[[ext:.*]]   = zext i8 %[[shr]] to i32
+    // CHECK-X86-64:                   ret i32 %[[ext]]
     // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read31
-    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-PPC64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-PPC64:   %[[shr:.*]]   = lshr i64 %[[val]], 8
-    // CHECK-PPC64:   %[[and:.*]]   = and i64 %[[shr]], 3
-    // CHECK-PPC64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-PPC64:                   ret i32 %[[trunc]]
+    // CHECK-PPC64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-PPC64:   %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 1
+    // CHECK-PPC64:   %[[ptr_cast:.*]]   = bitcast i8* %[[ptr_offset]] to i32*
+    // CHECK-PPC64:   %[[val:.*]]   = load i32, i32* %[[ptr_cast]]
+    // CHECK-PPC64:   %[[and:.*]]   = and i32 %[[val]], 3
+    // CHECK-PPC64:                   ret i32 %[[and]]
     return s->b31;
   }
   unsigned read70(S* s) {
     // CHECK-X86-64-LABEL: define i32 @_ZN2N06read70
-    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-X86-64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-X86-64:   %[[shr:.*]]   = lshr i64 %[[val]], 56
-    // CHECK-X86-64:   %[[and:.*]]   = and i64 %[[shr]], 63
-    // CHECK-X86-64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-X86-64:                   ret i32 %[[trunc]]
+    // CHECK-X86-64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-X86-64:   %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 7
+    // CHECK-X86-64:   %[[val:.*]]   = load i8, i8* %[[ptr_offset]]
+    // CHECK-X86-64:   %[[and:.*]]   = and i8 %[[val]], 63
+    // CHECK-X86-64:   %[[ext:.*]]   = zext i8 %[[and]] to i32
+    // CHECK-X86-64:                   ret i32 %[[ext]]
     // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read70
-    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-PPC64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-PPC64:   %[[shr:.*]]   = lshr i64 %[[val]], 2
-    // CHECK-PPC64:   %[[and:.*]]   = and i64 %[[shr]], 63
-    // CHECK-PPC64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-PPC64:                   ret i32 %[[trunc]]
+    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast {{.*}}* %{{.*}} to i32*
+    // CHECK-PPC64:   %[[val:.*]]   = load i32, i32* %[[ptr]]
+    // CHECK-PPC64:   %[[shr:.*]]   = lshr i32 %[[val]], 2
+    // CHECK-PPC64:   %[[and:.*]]   = and i32 %[[shr]], 63
+    // CHECK-PPC64:                   ret i32 %[[and]]
     return s->b70;
   }
   unsigned read71(S* s) {
     // CHECK-X86-64-LABEL: define i32 @_ZN2N06read71
-    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-X86-64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-X86-64:   %[[shr:.*]]   = lshr i64 %[[val]], 62
-    // CHECK-X86-64:   %[[trunc:.*]] = trunc i64 %[[shr]] to i32
-    // CHECK-X86-64:                   ret i32 %[[trunc]]
+    // CHECK-X86-64:   %[[ptr1:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
+    // CHECK-X86-64:   %[[ptr:.*]]   = bitcast i64* %[[ptr1]] to i8*
+    // CHECK-X86-64:   %[[ptr_offset:.*]]   = getelementptr i8, i8* %[[ptr]], i64 7
+    // CHECK-X86-64:   %[[val:.*]]   = load i8, i8* %[[ptr_offset]]
+    // CHECK-X86-64:   %[[shr:.*]]   = lshr i8 %[[val]], 6
+    // CHECK-X86-64:   %[[ext:.*]]   = zext i8 %[[shr]] to i32
+    // CHECK-X86-64:                   ret i32 %[[ext]]
     // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read71
-    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast %{{.*}}* %{{.*}} to i64*
-    // CHECK-PPC64:   %[[val:.*]]   = load i64, i64* %[[ptr]]
-    // CHECK-PPC64:   %[[and:.*]]   = and i64 %[[val]], 3
-    // CHECK-PPC64:   %[[trunc:.*]] = trunc i64 %[[and]] to i32
-    // CHECK-PPC64:                   ret i32 %[[trunc]]
+    // CHECK-PPC64:   %[[ptr:.*]]   = bitcast {{.*}}* %{{.*}} to i32*
+    // CHECK-PPC64:   %[[val:.*]]   = load i32, i32* %[[ptr]]
+    // CHECK-PPC64:   %[[and:.*]]   = and i32 %[[val]], 3
+    // CHECK-PPC64:                   ret i32 %[[and]]
     return s->b71;
   }
 }
Index: clang/test/OpenMP/atomic_capture_codegen.cpp
===================================================================
--- clang/test/OpenMP/atomic_capture_codegen.cpp
+++ clang/test/OpenMP/atomic_capture_codegen.cpp
@@ -822,20 +822,20 @@
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
 // CHECK: store i64 [[OLD_BF_VALUE]], i64* [[TEMP1:%.+]],
 // CHECK: store i64 [[OLD_BF_VALUE]], i64* [[TEMP:%.+]],
-// CHECK: [[A_LD:%.+]] = load i64, i64* [[TEMP]],
-// CHECK: [[A_SHL:%.+]] = shl i64 [[A_LD]], 47
-// CHECK: [[A_ASHR:%.+]] = ashr i64 [[A_SHL:%.+]], 63
-// CHECK: [[A_CAST:%.+]] = trunc i64 [[A_ASHR:%.+]] to i32
-// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST:%.+]] to x86_fp80
+// CHECK: [[TEMP_CAST:%.+]] = bitcast i64* [[TEMP]] to i32*
+// CHECK: [[A_LD:%.+]] = load i32, i32* [[TEMP_CAST]],
+// CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 15
+// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL:%.+]], 31
+// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[A_ASHR:%.+]] to x86_fp80
 // CHECK: [[MUL:%.+]] = fmul x86_fp80 [[X_RVAL]], [[EXPR]]
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[MUL]] to i32
-// CHECK: [[ZEXT:%.+]] = zext i32 [[NEW_VAL]] to i64
-// CHECK: [[BF_LD:%.+]] = load i64, i64* [[TEMP1]],
-// CHECK: [[BF_AND:%.+]] = and i64 [[ZEXT]], 1
-// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 16
-// CHECK: [[BF_CLEAR:%.+]] = and i64 [[BF_LD]], -65537
-// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i64 %{{.+}}, i64* [[TEMP1]]
+// CHECK: [[TEMP1_I32:%.+]] = bitcast i64* [[TEMP1]] to i32
+// CHECK: [[BF_LD:%.+]] = load i32, i32* [[TEMP1_I32]],
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 16
+// CHECK: [[BF_CLEAR:%.+]] = and i32 [[BF_LD]], -65537
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, i32* [[TEMP1_I32]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[TEMP1]]
 // CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
Index: clang/test/OpenMP/atomic_read_codegen.c
===================================================================
--- clang/test/OpenMP/atomic_read_codegen.c
+++ clang/test/OpenMP/atomic_read_codegen.c
@@ -289,11 +289,11 @@
 #pragma omp atomic read
   ldv = bfx3_packed.a;
 // CHECK: [[LD:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @bfx4 to i64*) monotonic
-// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]]
-// CHECK: [[LD:%.+]] = load i64, i64* [[LDTEMP]]
-// CHECK: [[SHL:%.+]] = shl i64 [[LD]], 47
-// CHECK: [[ASHR:%.+]] = ashr i64 [[SHL]], 63
-// CHECK: trunc i64 [[ASHR]] to i32
+// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]], align
+// CHECK: [[LDTEMP_CAST:%.+]] = bitcast i64* [[LDTEMP]] to i32*
+// CHECK: [[LD:%.+]] = load i32, i32* [[LDTEMP_CAST]]
+// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 15
+// CHECK: [[ASHR:%.+]] = ashr i32 [[SHL]], 31
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx4.a;
Index: clang/test/OpenMP/atomic_update_codegen.cpp
===================================================================
--- clang/test/OpenMP/atomic_update_codegen.cpp
+++ clang/test/OpenMP/atomic_update_codegen.cpp
@@ -758,20 +758,20 @@
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
 // CHECK: store i64 [[OLD_BF_VALUE]], i64* [[TEMP1:%.+]],
 // CHECK: store i64 [[OLD_BF_VALUE]], i64* [[TEMP:%.+]],
-// CHECK: [[A_LD:%.+]] = load i64, i64* [[TEMP]],
-// CHECK: [[A_SHL:%.+]] = shl i64 [[A_LD]], 47
-// CHECK: [[A_ASHR:%.+]] = ashr i64 [[A_SHL:%.+]], 63
-// CHECK: [[A_CAST:%.+]] = trunc i64 [[A_ASHR:%.+]] to i32
-// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST:%.+]] to x86_fp80
+// CHECK: [[TEMP_CAST:%.+]] = bitcast i64* [[TEMP]] to i32*
+// CHECK: [[A_LD:%.+]] = load i32, i32* [[TEMP_CAST]],
+// CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 15
+// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 31
+// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80
 // CHECK: [[MUL:%.+]] = fmul x86_fp80 [[X_RVAL]], [[EXPR]]
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[MUL]] to i32
-// CHECK: [[ZEXT:%.+]] = zext i32 [[NEW_VAL]] to i64
-// CHECK: [[BF_LD:%.+]] = load i64, i64* [[TEMP1]],
-// CHECK: [[BF_AND:%.+]] = and i64 [[ZEXT]], 1
-// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 16
-// CHECK: [[BF_CLEAR:%.+]] = and i64 [[BF_LD]], -65537
-// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i64 %{{.+}}, i64* [[TEMP1]]
+// CHECK: [[TEMP1_I32:%.+]] = bitcast i64* [[TEMP1]] to i32
+// CHECK: [[BF_LD:%.+]] = load i32, i32* [[TEMP1_I32]],
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 16
+// CHECK: [[BF_CLEAR:%.+]] = and i32 [[BF_LD]], -65537
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, i32* [[TEMP1_I32]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[TEMP1]]
 // CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
Index: clang/test/OpenMP/atomic_write_codegen.c
===================================================================
--- clang/test/OpenMP/atomic_write_codegen.c
+++ clang/test/OpenMP/atomic_write_codegen.c
@@ -416,12 +416,12 @@
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
-// CHECK: [[ZEXT:%.+]] = zext i32 [[NEW_VAL]] to i64
-// CHECK: [[BF_AND:%.+]] = and i64 [[ZEXT]], 1
-// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 16
-// CHECK: [[BF_CLEAR:%.+]] = and i64 %{{.+}}, -65537
-// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i64 %{{.+}}, i64* [[LDTEMP:%.+]]
+// CHECK: [[LDTEMP_I32:%.+]] = bitcast i64* [[LDTEMP:%.+]] to i32*
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 16
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -65537
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, i32* [[LDTEMP_I32]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[LDTEMP]]
 // CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0