Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -912,20 +912,21 @@ /// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as -/// a pointer to an object of type \arg Ty. +/// a pointer to an object of type \arg Ty, known to be aligned to +/// \arg SrcAlign bytes. /// /// This safely handles the case when the src type is smaller than the /// destination type; in this situation the values of bits which not /// present in the src are undefined. static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr, - llvm::Type *Ty, + llvm::Type *Ty, CharUnits SrcAlign, CodeGenFunction &CGF) { llvm::Type *SrcTy = cast(SrcPtr->getType())->getElementType(); // If SrcTy and Ty are the same, just do a load. if (SrcTy == Ty) - return CGF.Builder.CreateLoad(SrcPtr); + return CGF.Builder.CreateAlignedLoad(SrcPtr, SrcAlign.getQuantity()); uint64_t DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty); @@ -940,7 +941,8 @@ // extension or truncation to the desired type. if ((isa(Ty) || isa(Ty)) && (isa(SrcTy) || isa(SrcTy))) { - llvm::LoadInst *Load = CGF.Builder.CreateLoad(SrcPtr); + llvm::LoadInst *Load = + CGF.Builder.CreateAlignedLoad(SrcPtr, SrcAlign.getQuantity()); return CoerceIntOrPtrToIntOrPtr(Load, Ty, CGF); } @@ -954,23 +956,20 @@ // to that information. llvm::Value *Casted = CGF.Builder.CreateBitCast(SrcPtr, llvm::PointerType::getUnqual(Ty)); - llvm::LoadInst *Load = CGF.Builder.CreateLoad(Casted); - // FIXME: Use better alignment / avoid requiring aligned load. - Load->setAlignment(1); - return Load; + return CGF.Builder.CreateAlignedLoad(Casted, SrcAlign.getQuantity()); } // Otherwise do coercion through memory. This is stupid, but // simple. - llvm::Value *Tmp = CGF.CreateTempAlloca(Ty); + llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(Ty); + Tmp->setAlignment(SrcAlign.getQuantity()); llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy(); llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy); llvm::Value *SrcCasted = CGF.Builder.CreateBitCast(SrcPtr, I8PtrTy); - // FIXME: Use better alignment. CGF.Builder.CreateMemCpy(Casted, SrcCasted, llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize), - 1, false); - return CGF.Builder.CreateLoad(Tmp); + SrcAlign.getQuantity(), false); + return CGF.Builder.CreateAlignedLoad(Tmp, SrcAlign.getQuantity()); } // Function to store a first-class aggregate into memory. We prefer to @@ -979,39 +978,45 @@ // FIXME: Do we need to recurse here? static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val, llvm::Value *DestPtr, bool DestIsVolatile, - bool LowAlignment) { + CharUnits DestAlign) { // Prefer scalar stores to first-class aggregate stores. if (llvm::StructType *STy = dyn_cast(Val->getType())) { + const llvm::StructLayout *Layout = + CGF.CGM.getDataLayout().getStructLayout(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { llvm::Value *EltPtr = CGF.Builder.CreateConstGEP2_32(STy, DestPtr, 0, i); llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i); - llvm::StoreInst *SI = CGF.Builder.CreateStore(Elt, EltPtr, - DestIsVolatile); - if (LowAlignment) - SI->setAlignment(1); + uint64_t EltOffset = Layout->getElementOffset(i); + CharUnits EltAlign = + DestAlign.alignmentAtOffset(CharUnits::fromQuantity(EltOffset)); + CGF.Builder.CreateAlignedStore(Elt, EltPtr, EltAlign.getQuantity(), + DestIsVolatile); } } else { - llvm::StoreInst *SI = CGF.Builder.CreateStore(Val, DestPtr, DestIsVolatile); - if (LowAlignment) - SI->setAlignment(1); + CGF.Builder.CreateAlignedStore(Val, DestPtr, DestAlign.getQuantity(), + DestIsVolatile); } } /// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src, -/// where the source and destination may have different types. +/// where the source and destination may have different types. The +/// destination is known to be aligned to \arg DstAlign bytes. /// /// This safely handles the case when the src type is larger than the /// destination type; the upper bits of the src will be lost. static void CreateCoercedStore(llvm::Value *Src, llvm::Value *DstPtr, bool DstIsVolatile, + CharUnits DstAlign, CodeGenFunction &CGF) { llvm::Type *SrcTy = Src->getType(); llvm::Type *DstTy = cast(DstPtr->getType())->getElementType(); if (SrcTy == DstTy) { - CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile); + CGF.Builder.CreateAlignedStore(Src, DstPtr, DstAlign.getQuantity(), + DstIsVolatile); return; } @@ -1027,7 +1032,8 @@ if ((isa(SrcTy) || isa(SrcTy)) && (isa(DstTy) || isa(DstTy))) { Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF); - CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile); + CGF.Builder.CreateAlignedStore(Src, DstPtr, DstAlign.getQuantity(), + DstIsVolatile); return; } @@ -1037,8 +1043,7 @@ if (SrcSize <= DstSize) { llvm::Value *Casted = CGF.Builder.CreateBitCast(DstPtr, llvm::PointerType::getUnqual(SrcTy)); - // FIXME: Use better alignment / avoid requiring aligned store. - BuildAggStore(CGF, Src, Casted, DstIsVolatile, true); + BuildAggStore(CGF, Src, Casted, DstIsVolatile, DstAlign); } else { // Otherwise do coercion through memory. This is stupid, but // simple. @@ -1049,15 +1054,15 @@ // // FIXME: Assert that we aren't truncating non-padding bits when have access // to that information. - llvm::Value *Tmp = CGF.CreateTempAlloca(SrcTy); - CGF.Builder.CreateStore(Src, Tmp); + llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(SrcTy); + Tmp->setAlignment(DstAlign.getQuantity()); + CGF.Builder.CreateAlignedStore(Src, Tmp, DstAlign.getQuantity()); llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy(); llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy); llvm::Value *DstCasted = CGF.Builder.CreateBitCast(DstPtr, I8PtrTy); - // FIXME: Use better alignment. CGF.Builder.CreateMemCpy(DstCasted, Casted, llvm::ConstantInt::get(CGF.IntPtrTy, DstSize), - 1, false); + DstAlign.getQuantity(), false); } } @@ -1996,6 +2001,7 @@ Alloca->setAlignment(AlignmentToUse); llvm::Value *V = Alloca; llvm::Value *Ptr = V; // Pointer to store into. + CharUnits PtrAlign = CharUnits::fromQuantity(AlignmentToUse); // If the value is offset in memory, apply the offset now. if (unsigned Offs = ArgI.getDirectOffset()) { @@ -2003,6 +2009,7 @@ Ptr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), Ptr, Offs); Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ArgI.getCoerceToType())); + PtrAlign = PtrAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs)); } // Fast-isel and the optimizer generally like scalar values better than @@ -2047,7 +2054,7 @@ assert(NumIRArgs == 1); auto AI = FnArgs[FirstIRArg]; AI->setName(Arg->getName() + ".coerce"); - CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this); + CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, PtrAlign, *this); } @@ -2415,15 +2422,17 @@ } } else { llvm::Value *V = ReturnValue; + CharUnits Align = getContext().getTypeAlignInChars(RetTy); // If the value is offset in memory, apply the offset now. if (unsigned Offs = RetAI.getDirectOffset()) { V = Builder.CreateBitCast(V, Builder.getInt8PtrTy()); V = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), V, Offs); V = Builder.CreateBitCast(V, llvm::PointerType::getUnqual(RetAI.getCoerceToType())); + Align = Align.alignmentAtOffset(CharUnits::fromQuantity(Offs)); } - RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this); + RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), Align, *this); } // In ARC, end functions that return a retainable type with a call @@ -3286,12 +3295,17 @@ // FIXME: Avoid the conversion through memory if possible. llvm::Value *SrcPtr; + CharUnits SrcAlign; if (RV.isScalar() || RV.isComplex()) { SrcPtr = CreateMemTemp(I->Ty, "coerce"); + SrcAlign = TypeAlign; LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign); EmitInitStoreOfNonAggregate(*this, RV, SrcLV); - } else + } else { SrcPtr = RV.getAggregateAddr(); + // This alignment is guaranteed by EmitCallArg. + SrcAlign = TypeAlign; + } // If the value is offset in memory, apply the offset now. if (unsigned Offs = ArgInfo.getDirectOffset()) { @@ -3299,7 +3313,7 @@ SrcPtr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), SrcPtr, Offs); SrcPtr = Builder.CreateBitCast(SrcPtr, llvm::PointerType::getUnqual(ArgInfo.getCoerceToType())); - + SrcAlign = SrcAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs)); } // Fast-isel and the optimizer generally like scalar values better than @@ -3338,7 +3352,8 @@ // In the simple case, just pass the coerced loaded value. assert(NumIRArgs == 1); IRCallArgs[FirstIRArg] = - CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this); + CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), + SrcAlign, *this); } break; @@ -3535,12 +3550,13 @@ case TEK_Aggregate: { llvm::Value *DestPtr = ReturnValue.getValue(); bool DestIsVolatile = ReturnValue.isVolatile(); + CharUnits DestAlign = getContext().getTypeAlignInChars(RetTy); if (!DestPtr) { DestPtr = CreateMemTemp(RetTy, "agg.tmp"); DestIsVolatile = false; } - BuildAggStore(*this, CI, DestPtr, DestIsVolatile, false); + BuildAggStore(*this, CI, DestPtr, DestIsVolatile, DestAlign); return RValue::getAggregate(DestPtr); } case TEK_Scalar: { @@ -3557,6 +3573,7 @@ llvm::Value *DestPtr = ReturnValue.getValue(); bool DestIsVolatile = ReturnValue.isVolatile(); + CharUnits DestAlign = getContext().getTypeAlignInChars(RetTy); if (!DestPtr) { DestPtr = CreateMemTemp(RetTy, "coerce"); @@ -3565,14 +3582,17 @@ // If the value is offset in memory, apply the offset now. llvm::Value *StorePtr = DestPtr; + CharUnits StoreAlign = DestAlign; if (unsigned Offs = RetAI.getDirectOffset()) { StorePtr = Builder.CreateBitCast(StorePtr, Builder.getInt8PtrTy()); StorePtr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), StorePtr, Offs); StorePtr = Builder.CreateBitCast(StorePtr, llvm::PointerType::getUnqual(RetAI.getCoerceToType())); + StoreAlign = + StoreAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs)); } - CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this); + CreateCoercedStore(CI, StorePtr, DestIsVolatile, StoreAlign, *this); return convertTempToRValue(DestPtr, RetTy, SourceLocation()); } Index: test/CodeGen/align-systemz.c =================================================================== --- test/CodeGen/align-systemz.c +++ test/CodeGen/align-systemz.c @@ -25,3 +25,19 @@ s = es; } + +// Alignment should be respected for coerced argument loads + +struct arg { long y __attribute__((packed, aligned(4))); }; + +extern struct arg x; +void f(struct arg); + +void test (void) +{ + f(x); +} + +// CHECK-LABEL: @test +// CHECK: load i64, i64* getelementptr inbounds (%struct.arg, %struct.arg* @x, i32 0, i32 0), align 4 + Index: test/CodeGen/arm64-abi-vector.c =================================================================== --- test/CodeGen/arm64-abi-vector.c +++ test/CodeGen/arm64-abi-vector.c @@ -309,7 +309,7 @@ // CHECK: args_vec_5c // CHECK: [[C5:%.*]] = alloca <5 x i8>, align 8 // CHECK: [[TMP:%.*]] = bitcast <5 x i8>* [[C5]] to <2 x i32>* -// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 1 +// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 8 double sum = fixed; sum = sum + c5.x + c5.y; return sum; @@ -325,7 +325,7 @@ // CHECK: args_vec_9c // CHECK: [[C9:%.*]] = alloca <9 x i8>, align 16 // CHECK: [[TMP:%.*]] = bitcast <9 x i8>* [[C9]] to <4 x i32>* -// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1 +// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16 double sum = fixed; sum = sum + c9.x + c9.y; return sum; @@ -355,7 +355,7 @@ // CHECK: args_vec_3s // CHECK: [[C3:%.*]] = alloca <3 x i16>, align 8 // CHECK: [[TMP:%.*]] = bitcast <3 x i16>* [[C3]] to <2 x i32>* -// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 1 +// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 8 double sum = fixed; sum = sum + c3.x + c3.y; return sum; @@ -371,7 +371,7 @@ // CHECK: args_vec_5s // CHECK: [[C5:%.*]] = alloca <5 x i16>, align 16 // CHECK: [[TMP:%.*]] = bitcast <5 x i16>* [[C5]] to <4 x i32>* -// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1 +// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16 double sum = fixed; sum = sum + c5.x + c5.y; return sum; @@ -387,7 +387,7 @@ // CHECK: args_vec_3i // CHECK: [[C3:%.*]] = alloca <3 x i32>, align 16 // CHECK: [[TMP:%.*]] = bitcast <3 x i32>* [[C3]] to <4 x i32>* -// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1 +// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16 double sum = fixed; sum = sum + c3.x + c3.y; return sum; Index: test/CodeGen/arm64-arguments.c =================================================================== --- test/CodeGen/arm64-arguments.c +++ test/CodeGen/arm64-arguments.c @@ -219,8 +219,8 @@ // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce) // CHECK: %s1 = alloca %struct.s36, align 16 // CHECK: %s2 = alloca %struct.s36, align 16 -// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 -// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 +// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 +// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>* // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>* @@ -275,8 +275,8 @@ // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) // CHECK: %s1 = alloca %struct.s38, align 8 // CHECK: %s2 = alloca %struct.s38, align 8 -// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1 -// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1 +// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 8 +// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 8 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1 @@ -287,8 +287,8 @@ s38_no_align g38_2; int caller38() { // CHECK: define i32 @caller38() -// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1 -// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1 +// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 +// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]]) return f38(3, g38, g38_2); } @@ -299,8 +299,8 @@ // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) // CHECK: %s1 = alloca %struct.s38, align 8 // CHECK: %s2 = alloca %struct.s38, align 8 -// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1 -// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1 +// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 8 +// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 8 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1 @@ -309,8 +309,8 @@ } int caller38_stack() { // CHECK: define i32 @caller38_stack() -// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1 -// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1 +// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 +// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]]) return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2); } @@ -328,8 +328,8 @@ // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) // CHECK: %s1 = alloca %struct.s39, align 16 // CHECK: %s2 = alloca %struct.s39, align 16 -// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 -// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 +// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 +// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1 @@ -340,8 +340,8 @@ s39_with_align g39_2; int caller39() { // CHECK: define i32 @caller39() -// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1 -// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1 +// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 +// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]]) return f39(3, g39, g39_2); } @@ -352,8 +352,8 @@ // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) // CHECK: %s1 = alloca %struct.s39, align 16 // CHECK: %s2 = alloca %struct.s39, align 16 -// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 -// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 +// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 +// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1 @@ -362,8 +362,8 @@ } int caller39_stack() { // CHECK: define i32 @caller39_stack() -// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1 -// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1 +// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 +// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2); } @@ -383,8 +383,8 @@ // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) // CHECK: %s1 = alloca %struct.s40, align 8 // CHECK: %s2 = alloca %struct.s40, align 8 -// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1 -// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1 +// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 8 +// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 8 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1 @@ -395,8 +395,8 @@ s40_no_align g40_2; int caller40() { // CHECK: define i32 @caller40() -// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1 -// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1 +// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 +// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]]) return f40(3, g40, g40_2); } @@ -407,8 +407,8 @@ // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) // CHECK: %s1 = alloca %struct.s40, align 8 // CHECK: %s2 = alloca %struct.s40, align 8 -// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1 -// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1 +// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 8 +// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 8 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1 @@ -417,8 +417,8 @@ } int caller40_stack() { // CHECK: define i32 @caller40_stack() -// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1 -// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1 +// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 +// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]]) return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2); } @@ -438,8 +438,8 @@ // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) // CHECK: %s1 = alloca %struct.s41, align 16 // CHECK: %s2 = alloca %struct.s41, align 16 -// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 -// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 +// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 +// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1 @@ -450,8 +450,8 @@ s41_with_align g41_2; int caller41() { // CHECK: define i32 @caller41() -// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1 -// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1 +// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 +// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]]) return f41(3, g41, g41_2); } @@ -462,8 +462,8 @@ // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) // CHECK: %s1 = alloca %struct.s41, align 16 // CHECK: %s2 = alloca %struct.s41, align 16 -// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 -// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 +// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 +// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1 @@ -472,8 +472,8 @@ } int caller41_stack() { // CHECK: define i32 @caller41_stack() -// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1 -// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1 +// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 +// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2); } Index: test/CodeGen/arm64-be-bitfield.c =================================================================== --- test/CodeGen/arm64-be-bitfield.c +++ test/CodeGen/arm64-be-bitfield.c @@ -7,7 +7,7 @@ // Get the high 32-bits and then shift appropriately for big-endian. signed callee_b0f(struct bt3 bp11) { // IR: callee_b0f(i64 [[ARG:%.*]]) -// IR: store i64 [[ARG]], i64* [[PTR:%.*]] +// IR: store i64 [[ARG]], i64* [[PTR:%.*]], align 8 // IR: [[BITCAST:%.*]] = bitcast i64* [[PTR]] to i8* // IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* [[BITCAST]], i64 4 // ARM: asr x0, x0, #54 Index: test/CodeGen/ppc64-struct-onefloat.c =================================================================== --- test/CodeGen/ppc64-struct-onefloat.c +++ test/CodeGen/ppc64-struct-onefloat.c @@ -14,15 +14,15 @@ // CHECK: %d = alloca %struct.s4, align 4 // CHECK: %e = alloca %struct.s5, align 8 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %a, i32 0, i32 0 -// CHECK: store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1 +// CHECK: store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %b, i32 0, i32 0 -// CHECK: store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1 +// CHECK: store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %d, i32 0, i32 0 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0 -// CHECK: store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1 +// CHECK: store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %e, i32 0, i32 0 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0 -// CHECK: store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1 +// CHECK: store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8 // CHECK: ret void void foo(void) @@ -36,14 +36,14 @@ // CHECK-LABEL: define void @foo // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %p1, i32 0, i32 0 -// CHECK: %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1 +// CHECK: %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %p2, i32 0, i32 0 -// CHECK: %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1 +// CHECK: %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %p4, i32 0, i32 0 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0 -// CHECK: %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1 +// CHECK: %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %p5, i32 0, i32 0 // CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0 -// CHECK: %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1 +// CHECK: %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8 // CHECK: call void @bar(float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}) // CHECK: ret void Index: test/CodeGen/ppc64le-aggregates.c =================================================================== --- test/CodeGen/ppc64le-aggregates.c +++ test/CodeGen/ppc64le-aggregates.c @@ -54,49 +54,49 @@ struct f2a2b func_f2a2b(struct f2a2b x) { return x; } // CHECK-LABEL: @call_f1 -// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1 +// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 4 // CHECK: call [1 x float] @func_f1(float inreg %[[TMP]]) struct f1 global_f1; void call_f1(void) { global_f1 = func_f1(global_f1); } // CHECK-LABEL: @call_f2 -// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 1 +// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 4 // CHECK: call [2 x float] @func_f2([2 x float] %[[TMP]]) struct f2 global_f2; void call_f2(void) { global_f2 = func_f2(global_f2); } // CHECK-LABEL: @call_f3 -// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 1 +// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 4 // CHECK: call [3 x float] @func_f3([3 x float] %[[TMP]]) struct f3 global_f3; void call_f3(void) { global_f3 = func_f3(global_f3); } // CHECK-LABEL: @call_f4 -// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 1 +// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 4 // CHECK: call [4 x float] @func_f4([4 x float] %[[TMP]]) struct f4 global_f4; void call_f4(void) { global_f4 = func_f4(global_f4); } // CHECK-LABEL: @call_f5 -// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 1 +// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 4 // CHECK: call [5 x float] @func_f5([5 x float] %[[TMP]]) struct f5 global_f5; void call_f5(void) { global_f5 = func_f5(global_f5); } // CHECK-LABEL: @call_f6 -// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 1 +// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 4 // CHECK: call [6 x float] @func_f6([6 x float] %[[TMP]]) struct f6 global_f6; void call_f6(void) { global_f6 = func_f6(global_f6); } // CHECK-LABEL: @call_f7 -// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 1 +// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 4 // CHECK: call [7 x float] @func_f7([7 x float] %[[TMP]]) struct f7 global_f7; void call_f7(void) { global_f7 = func_f7(global_f7); } // CHECK-LABEL: @call_f8 -// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 1 +// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 4 // CHECK: call [8 x float] @func_f8([8 x float] %[[TMP]]) struct f8 global_f8; void call_f8(void) { global_f8 = func_f8(global_f8); } @@ -104,7 +104,7 @@ // CHECK-LABEL: @call_f9 // CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64] // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 1, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false) // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]] // CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]]) struct f9 global_f9; Index: test/CodeGenCXX/2012-03-16-StoreAlign.cpp =================================================================== --- test/CodeGenCXX/2012-03-16-StoreAlign.cpp +++ test/CodeGenCXX/2012-03-16-StoreAlign.cpp @@ -28,7 +28,7 @@ }; // CHECK: @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth = linkonce_odr global %struct.Length zeroinitializer, align 4 -// CHECK: store float %{{.*}}, float* getelementptr inbounds (%struct.Length, %struct.Length* @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth, i32 0, i32 0), align 1 +// CHECK: store float %{{.*}}, float* getelementptr inbounds (%struct.Length, %struct.Length* @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth, i32 0, i32 0), align 4 bool bar(Length &b) { Foo f; Index: test/CodeGenCXX/varargs.cpp =================================================================== --- test/CodeGenCXX/varargs.cpp +++ test/CodeGenCXX/varargs.cpp @@ -37,7 +37,7 @@ // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[X]] to i8* // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 8, i32 4, i1 false) // CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[TMP]] to i64* - // CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 1 + // CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 4 // CHECK-NEXT: call void (...) @_ZN5test13fooEz(i64 [[T1]]) // CHECK-NEXT: ret void }