Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -1799,16 +1799,48 @@ return EmitLoadOfBitfieldLValue(LV, Loc); } +void CodeGenFunction::AdjustBitfieldAccess(CGBitFieldInfo &Info, Address &Ptr, const llvm::Type *ResLTy) { + // The bitfield can be really large. Let's try to find the subset that we + // need to load by finding the closest address that preserves the alignment + unsigned LoadOffset = ((Info.Offset/8) / Ptr.getAlignment().getQuantity()) * Ptr.getAlignment().getQuantity(); + + // Patch the bitfield Info to account for the offset we applied. + Info.Offset -= LoadOffset*8; + // The "StorageSize" will be the number of bits actually loaded + // We default here to the minimum number of bits rounded up to next multiple + // of a 8 (byte size) + unsigned LoadSize = llvm::alignTo(Info.Size + Info.Offset, 8); + // If the size of the resulting type is larger than the LoadSize, then we + // extend the load now, it'll simplify the codegen. + unsigned RetSize = llvm::alignTo(ResLTy->getScalarSizeInBits(), 8); + if (RetSize > LoadSize && LoadOffset*8 + RetSize <= Info.StorageSize) + LoadSize = RetSize; + Info.StorageSize = LoadSize; + + // Generate the pointer to access the data, applying possible offset and bitcast + llvm::Value *Addr = Ptr.getPointer(); + if (LoadOffset) { + Addr = Builder.CreatePointerCast(Addr, llvm::PointerType::getUnqual(Builder.getInt8Ty())); + auto *Offset = llvm::ConstantInt::get(IntPtrTy, LoadOffset); + Addr = Builder.CreateGEP(Addr, Offset, "bf.elt_offset"); + } + if (Info.StorageSize != Addr->getType()->getScalarSizeInBits()) + Addr = Builder.CreatePointerCast(Addr, llvm::PointerType::getUnqual(Builder.getIntNTy(Info.StorageSize))); + Ptr = Address(Addr, Ptr.getAlignment()); +} + + RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV, SourceLocation Loc) { - const CGBitFieldInfo &Info = LV.getBitFieldInfo(); + CGBitFieldInfo Info = LV.getBitFieldInfo(); // Get the output type. llvm::Type *ResLTy = ConvertType(LV.getType()); - Address Ptr = LV.getBitFieldAddress(); - llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); + AdjustBitfieldAccess(Info, Ptr, ResLTy); + + llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); if (Info.IsSigned) { assert(static_cast(Info.Offset + Info.Size) <= Info.StorageSize); unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size; @@ -1819,8 +1851,8 @@ } else { if (Info.Offset) Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr"); - if (static_cast(Info.Offset) + Info.Size < Info.StorageSize) - Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize, + if (static_cast(Info.Offset) + Info.Size < Val->getType()->getScalarSizeInBits()) + Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Val->getType()->getScalarSizeInBits(), Info.Size), "bf.clear"); } @@ -2006,10 +2038,12 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, llvm::Value **Result) { - const CGBitFieldInfo &Info = Dst.getBitFieldInfo(); + CGBitFieldInfo Info = Dst.getBitFieldInfo(); llvm::Type *ResLTy = ConvertTypeForMem(Dst.getType()); Address Ptr = Dst.getBitFieldAddress(); + AdjustBitfieldAccess(Info, Ptr, ResLTy); + // Get the source value, truncated to the width of the bit-field. llvm::Value *SrcVal = Src.getScalarVal(); @@ -2022,7 +2056,7 @@ // and mask together with source before storing. if (Info.StorageSize != Info.Size) { assert(Info.StorageSize > Info.Size && "Invalid bitfield size."); - llvm::Value *Val = + llvm::Value * Val = Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); // Mask the source value as needed. Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -3430,6 +3430,12 @@ /// If so, atomic qualifiers are ignored and the store is always non-atomic. void EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit=false); + /// Adjust accesses to bitfields by finding an offset in the storage that will + /// generate a smaller load to extract only the desired field. + /// The bitfield Info and the base Ptr are modified accordingly, ResLTy is the + /// type of the returned value so that the load is widened as appropriate. + void AdjustBitfieldAccess(CGBitFieldInfo &Info, Address &Ptr, const llvm::Type *ResLTy); + /// EmitLoadOfLValue - Given an expression that represents a value lvalue, /// this method emits the address of the lvalue, then loads the result as an /// rvalue, returning the rvalue. Index: clang/test/CodeGen/arm-bitfield-alignment.c =================================================================== --- clang/test/CodeGen/arm-bitfield-alignment.c +++ clang/test/CodeGen/arm-bitfield-alignment.c @@ -12,4 +12,4 @@ } // CHECK: @g = external global %struct.T, align 4 -// CHECK: %{{.*}} = load i64, i64* bitcast (%struct.T* @g to i64*), align 4 +// CHECK: %{{.*}} = load i32, i32* bitcast (%struct.T* @g to i32*), align 4 Index: clang/test/CodeGen/no-bitfield-type-align.c =================================================================== --- clang/test/CodeGen/no-bitfield-type-align.c +++ clang/test/CodeGen/no-bitfield-type-align.c @@ -10,33 +10,41 @@ }; // CHECK: define void @test_zero_width_bitfield(%[[STRUCT_S]]* %[[A:.*]]) -// CHECK: %[[BF_LOAD:.*]] = load i32, i32* %[[V1:.*]], align 1 -// CHECK: %[[BF_CLEAR:.*]] = and i32 %[[BF_LOAD]], 32767 -// CHECK: %[[BF_CAST:.*]] = trunc i32 %[[BF_CLEAR]] to i16 -// CHECK: %[[CONV:.*]] = zext i16 %[[BF_CAST]] to i32 +// CHECK: %[[V1:.*]] = bitcast %struct.S* %0 to i32* +// CHECK: %[[V1_I16:.*]] = bitcast i32* %[[V1]] to i16* +// CHECK: %[[BF_LOAD:.*]] = load i16, i16* %[[V1_I16:.*]], align 1 +// CHECK: %[[BF_CLEAR:.*]] = and i16 %[[BF_LOAD]], 32767 +// CHECK: %[[CONV:.*]] = zext i16 %[[BF_CLEAR]] to i32 // CHECK: %[[ADD:.*]] = add nsw i32 %[[CONV]], 1 // CHECK: %[[CONV1:.*]] = trunc i32 %[[ADD]] to i16 -// CHECK: %[[V2:.*]] = zext i16 %[[CONV1]] to i32 -// CHECK: %[[BF_LOAD2:.*]] = load i32, i32* %[[V1]], align 1 -// CHECK: %[[BF_VALUE:.*]] = and i32 %[[V2]], 32767 -// CHECK: %[[BF_CLEAR3:.*]] = and i32 %[[BF_LOAD2]], -32768 -// CHECK: %[[BF_SET:.*]] = or i32 %[[BF_CLEAR3]], %[[BF_VALUE]] -// CHECK: store i32 %[[BF_SET]], i32* %[[V1]], align 1 +// CHECK: %[[V1_I16:.*]] = bitcast i32* %[[V1]] to i16* +// CHECK: %[[BF_LOAD2:.*]] = load i16, i16* %[[V1_I16]], align 1 +// CHECK: %[[BF_VALUE:.*]] = and i16 %[[CONV1]], 32767 +// CHECK: %[[BF_CLEAR3:.*]] = and i16 %[[BF_LOAD2]], -32768 +// CHECK: %[[BF_SET:.*]] = or i16 %[[BF_CLEAR3]], %[[BF_VALUE]] +// CHECK: store i16 %[[BF_SET]], i16* %[[V1_I16]], align 1 -// CHECK: %[[BF_LOAD4:.*]] = load i32, i32* %[[V4:.*]], align 1 -// CHECK: %[[BF_LSHR:.*]] = lshr i32 %[[BF_LOAD4]], 15 -// CHECK: %[[BF_CLEAR5:.*]] = and i32 %[[BF_LSHR]], 32767 -// CHECK: %[[BF_CAST6:.*]] = trunc i32 %[[BF_CLEAR5]] to i16 -// CHECK: %[[CONV7:.*]] = zext i16 %[[BF_CAST6]] to i32 +// CHECK: %[[BF_S_I32:.*]] = bitcast %struct.S* {{.*}} to i32* +// CHECK: %[[BF_S_I8:.*]] = bitcast i32* %[[BF_S_I32]] to i8* +// CHECK: %[[BF_ELT_PTR:.*]] = getelementptr i8, i8* %[[BF_S_I8]], i64 1 +// CHECK: %[[BF_ELT_PTR_CAST:.*]] = bitcast i8* %[[BF_ELT_PTR]] to i24* +// CHECK: %[[BF_LOAD4:.*]] = load i24, i24* %[[BF_ELT_PTR_CAST:.*]], align 1 +// CHECK: %[[BF_LSHR:.*]] = lshr i24 %[[BF_LOAD4]], 7 +// CHECK: %[[BF_AND:.*]] = and i24 %[[BF_LSHR]], 32767 +// CHECK: %[[BF_TRUNC:.*]] = trunc i24 %[[BF_AND]] to i16 +// CHECK: %[[CONV7:.*]] = zext i16 %[[BF_TRUNC]] to i32 // CHECK: %[[ADD8:.*]] = add nsw i32 %[[CONV7]], 2 // CHECK: %[[CONV9:.*]] = trunc i32 %[[ADD8]] to i16 -// CHECK: %[[V5:.*]] = zext i16 %[[CONV9]] to i32 -// CHECK: %[[BF_LOAD10:.*]] = load i32, i32* %[[V4]], align 1 -// CHECK: %[[BF_VALUE11:.*]] = and i32 %[[V5]], 32767 -// CHECK: %[[BF_SHL:.*]] = shl i32 %[[BF_VALUE11]], 15 -// CHECK: %[[BF_CLEAR12:.*]] = and i32 %[[BF_LOAD10]], -1073709057 -// CHECK: %[[BF_SET13:.*]] = or i32 %[[BF_CLEAR12]], %[[BF_SHL]] -// CHECK: store i32 %[[BF_SET13]], i32* %[[V4]], align 1 +// CHECK: %[[BF_S_I8:.*]] = bitcast i32* %[[BF_S_I32]] to i8* +// CHECK: %[[BF_ELT_PTR:.*]] = getelementptr i8, i8* %[[BF_S_I8]], i64 1 +// CHECK: %[[BF_ELT_PTR_CAST:.*]] = bitcast i8* %[[BF_ELT_PTR]] to i24* +// CHECK: %[[CONV9_I24:.*]] = zext i16 %[[CONV9]] to i24 +// CHECK: %[[BF_LOAD10:.*]] = load i24, i24* %[[BF_ELT_PTR_CAST:.*]], align 1 +// CHECK: %[[BF_VALUE11:.*]] = and i24 %[[CONV9_I24]], 32767 +// CHECK: %[[BF_SHL:.*]] = shl i24 %[[BF_VALUE11]], 7 +// CHECK: %[[BF_CLEAR12:.*]] = and i24 %[[BF_LOAD10]], -4194177 +// CHECK: %[[BF_SET13:.*]] = or i24 %[[BF_CLEAR12]], %[[BF_SHL]] +// CHECK: store i24 %[[BF_SET13]], i24* %[[BF_ELT_PTR_CAST]], align 1 void test_zero_width_bitfield(struct S *a) { a->f1 += 1; Index: clang/test/CodeGen/packed-nest-unpacked.c =================================================================== --- clang/test/CodeGen/packed-nest-unpacked.c +++ clang/test/CodeGen/packed-nest-unpacked.c @@ -8,40 +8,40 @@ // struct X test1() { - // CHECK: @test1 + // CHECK-LABEL: @test1 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false) return g.y; } struct X test2() { - // CHECK: @test2 + // CHECK-LABEL: @test2 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false) struct X a = g.y; return a; } void test3(struct X a) { - // CHECK: @test3 + // CHECK-LABEL: @test3 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* {{.*}}, i64 24, i1 false) g.y = a; } // void test4() { - // CHECK: @test4 + // CHECK-LABEL: @test4 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false) f(g.y); } // PR12395 int test5() { - // CHECK: @test5 + // CHECK-LABEL: @test5 // CHECK: load i32, i32* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1, i32 0, i64 0), align 1 return g.y.x[0]; } // void test6() { - // CHECK: @test6 + // CHECK-LABEL: @test6 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* align 4 %{{.*}}, i64 24, i1 false) g.y = foo(); } @@ -59,15 +59,15 @@ struct YBitfield gbitfield; unsigned test7() { - // CHECK: @test7 - // CHECK: load i32, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1 + // CHECK-LABEL: @test7 + // CHECK: load i16, i16* bitcast (i8* getelementptr (i8, i8* bitcast (i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0) to i8*), i64 1) to i16*), align 1 return gbitfield.y.b2; } void test8(unsigned x) { - // CHECK: @test8 - // CHECK: load i32, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1 - // CHECK: store i32 {{.*}}, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1 + // CHECK-LABEL: @test8 + // CHECK: load i16, i16* bitcast (i8* getelementptr (i8, i8* bitcast (i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0) to i8*), i64 1) to i16*), align 1 + // CHECK: store i16 %bf.set, i16* bitcast (i8* getelementptr (i8, i8* bitcast (i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0) to i8*), i64 1) to i16*), align 1 gbitfield.y.b2 = x; } @@ -80,13 +80,13 @@ struct TBitfield tbitfield; unsigned test9() { - // CHECK: @test9 + // CHECK-LABEL: @test9 // CHECK: load i16, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1 return tbitfield.c; } void test10(unsigned x) { - // CHECK: @test10 + // CHECK-LABEL: @test10 // CHECK: load i16, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1 // CHECK: store i16 {{.*}}, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1 tbitfield.c = x; Index: clang/test/CodeGenCXX/bitfield.cpp =================================================================== --- clang/test/CodeGenCXX/bitfield.cpp +++ clang/test/CodeGenCXX/bitfield.cpp @@ -21,134 +21,149 @@ }; unsigned read00(S* s) { // CHECK-X86-64-LABEL: define i32 @_ZN2N06read00 - // CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-X86-64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-X86-64: %[[and:.*]] = and i64 %[[val]], 16383 - // CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-X86-64: ret i32 %[[trunc]] + // CHECK-X86-64: %[[ptr:.*]] = bitcast {{.*}}* %{{.*}} to i32* + // CHECK-X86-64: %[[val:.*]] = load i32, i32* %[[ptr]] + // CHECK-X86-64: %[[and:.*]] = and i32 %[[val]], 16383 + // CHECK-X86-64: ret i32 %[[and]] // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read00 - // CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-PPC64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 50 - // CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[shr]] to i32 - // CHECK-PPC64: ret i32 %[[trunc]] + // CHECK-PPC64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-PPC64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-PPC64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 6 + // CHECK-PPC64: %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i16* + // CHECK-PPC64: %[[val:.*]] = load i16, i16* %[[ptr_cast]] + // CHECK-PPC64: %[[shr:.*]] = lshr i16 %[[val]], 2 + // CHECK-PPC64: %[[ext:.*]] = zext i16 %[[shr]] to i32 + // CHECK-PPC64: ret i32 %[[ext]] return s->b00; } unsigned read01(S* s) { // CHECK-X86-64-LABEL: define i32 @_ZN2N06read01 - // CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-X86-64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 14 - // CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 3 - // CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-X86-64: ret i32 %[[trunc]] + // CHECK-X86-64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-X86-64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-X86-64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 1 + // CHECK-X86-64: %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i32* + // CHECK-X86-64: %[[val:.*]] = load i32, i32* %[[ptr_cast]] + // CHECK-X86-64: %[[shr:.*]] = lshr i32 %[[val]], 6 + // CHECK-X86-64: %[[and:.*]] = and i32 %[[shr]], 3 + // CHECK-X86-64: ret i32 %[[and]] // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read01 - // CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-PPC64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 48 - // CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 3 - // CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-PPC64: ret i32 %[[trunc]] + // CHECK-PPC64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-PPC64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-PPC64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 6 + // CHECK-PPC64: %[[val:.*]] = load i8, i8* %[[ptr_offset]] + // CHECK-PPC64: %[[and:.*]] = and i8 %[[val]], 3 + // CHECK-PPC64: %[[ext:.*]] = zext i8 %[[and]] to i32 + // CHECK-PPC64: ret i32 %[[ext]] return s->b01; } unsigned read20(S* s) { // CHECK-X86-64-LABEL: define i32 @_ZN2N06read20 // CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-X86-64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 16 - // CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 63 - // CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-X86-64: ret i32 %[[trunc]] + // CHECK-X86-64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-X86-64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 2 + // CHECK-X86-64: %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i32* + // CHECK-X86-64: %[[val:.*]] = load i32, i32* %[[ptr_cast]] + // CHECK-X86-64: %[[and:.*]] = and i32 %[[val]], 63 + // CHECK-X86-64: ret i32 %[[and]] // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read20 - // CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-PPC64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 42 - // CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 63 - // CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-PPC64: ret i32 %[[trunc]] + // CHECK-PPC64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-PPC64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-PPC64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 5 + // CHECK-PPC64: %[[val:.*]] = load i8, i8* %[[ptr_offset]] + // CHECK-PPC64: %[[lshr:.*]] = lshr i8 %[[val]], 2 + // CHECK-PPC64: %[[ext:.*]] = zext i8 %[[lshr]] to i32 + // CHECK-PPC64: ret i32 %[[ext]] return s->b20; } unsigned read21(S* s) { // CHECK-X86-64-LABEL: define i32 @_ZN2N06read21 // CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-X86-64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 22 - // CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 3 - // CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-X86-64: ret i32 %[[trunc]] + // CHECK-X86-64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-X86-64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 2 + // CHECK-X86-64: %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i32* + // CHECK-X86-64: %[[val:.*]] = load i32, i32* %[[ptr_cast]] + // CHECK-X86-64: %[[shr:.*]] = lshr i32 %[[val]], 6 + // CHECK-X86-64: %[[and:.*]] = and i32 %[[shr]], 3 + // CHECK-X86-64: ret i32 %[[and]] // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read21 - // CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-PPC64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 40 - // CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 3 - // CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-PPC64: ret i32 %[[trunc]] + // CHECK-PPC64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-PPC64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-PPC64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 5 + // CHECK-PPC64: %[[val:.*]] = load i8, i8* %[[ptr_offset]] + // CHECK-PPC64: %[[and:.*]] = and i8 %[[val]], 3 + // CHECK-PPC64: %[[ext:.*]] = zext i8 %[[and]] to i32 + // CHECK-PPC64: ret i32 %[[ext]] return s->b21; } unsigned read30(S* s) { // CHECK-X86-64-LABEL: define i32 @_ZN2N06read30 // CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-X86-64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 24 - // CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 1073741823 - // CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-X86-64: ret i32 %[[trunc]] + // CHECK-X86-64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-X86-64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 3 + // CHECK-X86-64: %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i32* + // CHECK-X86-64: %[[val:.*]] = load i32, i32* %[[ptr_cast]] + // CHECK-X86-64: %[[and:.*]] = and i32 %[[val]], 1073741823 + // CHECK-X86-64: ret i32 %[[and]] // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read30 - // CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-PPC64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 10 - // CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 1073741823 - // CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-PPC64: ret i32 %[[trunc]] + // CHECK-PPC64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-PPC64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-PPC64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 1 + // CHECK-PPC64: %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i32* + // CHECK-PPC64: %[[val:.*]] = load i32, i32* %[[ptr_cast]] + // CHECK-PPC64: %[[shr:.*]] = lshr i32 %[[val]], 2 + // CHECK-PPC64: ret i32 %[[shr]] return s->b30; } unsigned read31(S* s) { // CHECK-X86-64-LABEL: define i32 @_ZN2N06read31 - // CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-X86-64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 54 - // CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 3 - // CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-X86-64: ret i32 %[[trunc]] + // CHECK-X86-64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-X86-64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-X86-64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 6 + // CHECK-X86-64: %[[val:.*]] = load i8, i8* %[[ptr_offset]] + // CHECK-X86-64: %[[shr:.*]] = lshr i8 %[[val]], 6 + // CHECK-X86-64: %[[ext:.*]] = zext i8 %[[shr]] to i32 + // CHECK-X86-64: ret i32 %[[ext]] // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read31 - // CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-PPC64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 8 - // CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 3 - // CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-PPC64: ret i32 %[[trunc]] + // CHECK-PPC64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-PPC64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-PPC64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 1 + // CHECK-PPC64: %[[ptr_cast:.*]] = bitcast i8* %[[ptr_offset]] to i32* + // CHECK-PPC64: %[[val:.*]] = load i32, i32* %[[ptr_cast]] + // CHECK-PPC64: %[[and:.*]] = and i32 %[[val]], 3 + // CHECK-PPC64: ret i32 %[[and]] return s->b31; } unsigned read70(S* s) { // CHECK-X86-64-LABEL: define i32 @_ZN2N06read70 - // CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-X86-64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 56 - // CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 63 - // CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-X86-64: ret i32 %[[trunc]] + // CHECK-X86-64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-X86-64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-X86-64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 7 + // CHECK-X86-64: %[[val:.*]] = load i8, i8* %[[ptr_offset]] + // CHECK-X86-64: %[[and:.*]] = and i8 %[[val]], 63 + // CHECK-X86-64: %[[ext:.*]] = zext i8 %[[and]] to i32 + // CHECK-X86-64: ret i32 %[[ext]] // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read70 - // CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-PPC64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 2 - // CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 63 - // CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-PPC64: ret i32 %[[trunc]] + // CHECK-PPC64: %[[ptr:.*]] = bitcast {{.*}}* %{{.*}} to i32* + // CHECK-PPC64: %[[val:.*]] = load i32, i32* %[[ptr]] + // CHECK-PPC64: %[[shr:.*]] = lshr i32 %[[val]], 2 + // CHECK-PPC64: %[[and:.*]] = and i32 %[[shr]], 63 + // CHECK-PPC64: ret i32 %[[and]] return s->b70; } unsigned read71(S* s) { // CHECK-X86-64-LABEL: define i32 @_ZN2N06read71 - // CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-X86-64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 62 - // CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[shr]] to i32 - // CHECK-X86-64: ret i32 %[[trunc]] + // CHECK-X86-64: %[[ptr1:.*]] = bitcast %{{.*}}* %{{.*}} to i64* + // CHECK-X86-64: %[[ptr:.*]] = bitcast i64* %[[ptr1]] to i8* + // CHECK-X86-64: %[[ptr_offset:.*]] = getelementptr i8, i8* %[[ptr]], i64 7 + // CHECK-X86-64: %[[val:.*]] = load i8, i8* %[[ptr_offset]] + // CHECK-X86-64: %[[shr:.*]] = lshr i8 %[[val]], 6 + // CHECK-X86-64: %[[ext:.*]] = zext i8 %[[shr]] to i32 + // CHECK-X86-64: ret i32 %[[ext]] // CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read71 - // CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64* - // CHECK-PPC64: %[[val:.*]] = load i64, i64* %[[ptr]] - // CHECK-PPC64: %[[and:.*]] = and i64 %[[val]], 3 - // CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32 - // CHECK-PPC64: ret i32 %[[trunc]] + // CHECK-PPC64: %[[ptr:.*]] = bitcast {{.*}}* %{{.*}} to i32* + // CHECK-PPC64: %[[val:.*]] = load i32, i32* %[[ptr]] + // CHECK-PPC64: %[[and:.*]] = and i32 %[[val]], 3 + // CHECK-PPC64: ret i32 %[[and]] return s->b71; } } Index: clang/test/OpenMP/atomic_capture_codegen.cpp =================================================================== --- clang/test/OpenMP/atomic_capture_codegen.cpp +++ clang/test/OpenMP/atomic_capture_codegen.cpp @@ -822,20 +822,20 @@ // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] // CHECK: store i64 [[OLD_BF_VALUE]], i64* [[TEMP1:%.+]], // CHECK: store i64 [[OLD_BF_VALUE]], i64* [[TEMP:%.+]], -// CHECK: [[A_LD:%.+]] = load i64, i64* [[TEMP]], -// CHECK: [[A_SHL:%.+]] = shl i64 [[A_LD]], 47 -// CHECK: [[A_ASHR:%.+]] = ashr i64 [[A_SHL:%.+]], 63 -// CHECK: [[A_CAST:%.+]] = trunc i64 [[A_ASHR:%.+]] to i32 -// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST:%.+]] to x86_fp80 +// CHECK: [[TEMP_CAST:%.+]] = bitcast i64* [[TEMP]] to i32* +// CHECK: [[A_LD:%.+]] = load i32, i32* [[TEMP_CAST]], +// CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 15 +// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL:%.+]], 31 +// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[A_ASHR:%.+]] to x86_fp80 // CHECK: [[MUL:%.+]] = fmul x86_fp80 [[X_RVAL]], [[EXPR]] // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[MUL]] to i32 -// CHECK: [[ZEXT:%.+]] = zext i32 [[NEW_VAL]] to i64 -// CHECK: [[BF_LD:%.+]] = load i64, i64* [[TEMP1]], -// CHECK: [[BF_AND:%.+]] = and i64 [[ZEXT]], 1 -// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 16 -// CHECK: [[BF_CLEAR:%.+]] = and i64 [[BF_LD]], -65537 -// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i64 %{{.+}}, i64* [[TEMP1]] +// CHECK: [[TEMP1_I32:%.+]] = bitcast i64* [[TEMP1]] to i32 +// CHECK: [[BF_LD:%.+]] = load i32, i32* [[TEMP1_I32]], +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 16 +// CHECK: [[BF_CLEAR:%.+]] = and i32 [[BF_LD]], -65537 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, i32* [[TEMP1_I32]] // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[TEMP1]] // CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0 Index: clang/test/OpenMP/atomic_read_codegen.c =================================================================== --- clang/test/OpenMP/atomic_read_codegen.c +++ clang/test/OpenMP/atomic_read_codegen.c @@ -289,11 +289,11 @@ #pragma omp atomic read ldv = bfx3_packed.a; // CHECK: [[LD:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @bfx4 to i64*) monotonic -// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]] -// CHECK: [[LD:%.+]] = load i64, i64* [[LDTEMP]] -// CHECK: [[SHL:%.+]] = shl i64 [[LD]], 47 -// CHECK: [[ASHR:%.+]] = ashr i64 [[SHL]], 63 -// CHECK: trunc i64 [[ASHR]] to i32 +// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]], align +// CHECK: [[LDTEMP_CAST:%.+]] = bitcast i64* [[LDTEMP]] to i32* +// CHECK: [[LD:%.+]] = load i32, i32* [[LDTEMP_CAST]] +// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 15 +// CHECK: [[ASHR:%.+]] = ashr i32 [[SHL]], 31 // CHECK: store x86_fp80 #pragma omp atomic read ldv = bfx4.a; Index: clang/test/OpenMP/atomic_update_codegen.cpp =================================================================== --- clang/test/OpenMP/atomic_update_codegen.cpp +++ clang/test/OpenMP/atomic_update_codegen.cpp @@ -758,20 +758,20 @@ // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] // CHECK: store i64 [[OLD_BF_VALUE]], i64* [[TEMP1:%.+]], // CHECK: store i64 [[OLD_BF_VALUE]], i64* [[TEMP:%.+]], -// CHECK: [[A_LD:%.+]] = load i64, i64* [[TEMP]], -// CHECK: [[A_SHL:%.+]] = shl i64 [[A_LD]], 47 -// CHECK: [[A_ASHR:%.+]] = ashr i64 [[A_SHL:%.+]], 63 -// CHECK: [[A_CAST:%.+]] = trunc i64 [[A_ASHR:%.+]] to i32 -// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST:%.+]] to x86_fp80 +// CHECK: [[TEMP_CAST:%.+]] = bitcast i64* [[TEMP]] to i32* +// CHECK: [[A_LD:%.+]] = load i32, i32* [[TEMP_CAST]], +// CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 15 +// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 31 +// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80 // CHECK: [[MUL:%.+]] = fmul x86_fp80 [[X_RVAL]], [[EXPR]] // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[MUL]] to i32 -// CHECK: [[ZEXT:%.+]] = zext i32 [[NEW_VAL]] to i64 -// CHECK: [[BF_LD:%.+]] = load i64, i64* [[TEMP1]], -// CHECK: [[BF_AND:%.+]] = and i64 [[ZEXT]], 1 -// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 16 -// CHECK: [[BF_CLEAR:%.+]] = and i64 [[BF_LD]], -65537 -// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i64 %{{.+}}, i64* [[TEMP1]] +// CHECK: [[TEMP1_I32:%.+]] = bitcast i64* [[TEMP1]] to i32 +// CHECK: [[BF_LD:%.+]] = load i32, i32* [[TEMP1_I32]], +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 16 +// CHECK: [[BF_CLEAR:%.+]] = and i32 [[BF_LD]], -65537 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, i32* [[TEMP1_I32]] // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[TEMP1]] // CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0 Index: clang/test/OpenMP/atomic_write_codegen.c =================================================================== --- clang/test/OpenMP/atomic_write_codegen.c +++ clang/test/OpenMP/atomic_write_codegen.c @@ -416,12 +416,12 @@ // CHECK: br label %[[CONT:.+]] // CHECK: [[CONT]] // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] -// CHECK: [[ZEXT:%.+]] = zext i32 [[NEW_VAL]] to i64 -// CHECK: [[BF_AND:%.+]] = and i64 [[ZEXT]], 1 -// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 16 -// CHECK: [[BF_CLEAR:%.+]] = and i64 %{{.+}}, -65537 -// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i64 %{{.+}}, i64* [[LDTEMP:%.+]] +// CHECK: [[LDTEMP_I32:%.+]] = bitcast i64* [[LDTEMP:%.+]] to i32* +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 16 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -65537 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, i32* [[LDTEMP_I32]] // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[LDTEMP]] // CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0