Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -162,6 +162,23 @@ OS << ")\n"; } +// Dynamically round a pointer up to a multiple of the given alignment. +static llvm::Value* emitRoundPointerUpToAlignment(CodeGenFunction &CGF, + llvm::Value *Ptr, + CharUnits Align) { + llvm::Value *PtrAsInt = Ptr; + // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align; + PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy); + PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt, + llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1)); + PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt, + llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())); + PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt, + Ptr->getType(), + Ptr->getName() + ".aligned"); + return PtrAsInt; +} + /// Emit va_arg for a platform using the common void* representation, /// where arguments are simply emitted in an array of slots on the stack. /// @@ -192,19 +209,11 @@ // If the CC aligns values higher than the slot size, do so if needed. Address Addr = Address::invalid(); - if (AllowHigherAlign && DirectAlign > SlotSize) { - llvm::Value *PtrAsInt = Ptr; - PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy); - PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt, - llvm::ConstantInt::get(CGF.IntPtrTy, DirectAlign.getQuantity() - 1)); - PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt, - llvm::ConstantInt::get(CGF.IntPtrTy, -DirectAlign.getQuantity())); - Addr = Address(CGF.Builder.CreateIntToPtr(PtrAsInt, Ptr->getType(), - "argp.cur.aligned"), - DirectAlign); - } else { - Addr = Address(Ptr, SlotSize); - } + if (AllowHigherAlign && DirectAlign > SlotSize) + Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign), + DirectAlign); + else + Addr = Address(Ptr, SlotSize); // Advance the pointer past the argument, then store that back. CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize); @@ -3072,20 +3081,10 @@ // byte boundary if alignment needed by type exceeds 8 byte boundary. // It isn't stated explicitly in the standard, but in practice we use // alignment greater than 16 where necessary. - uint64_t Align = CGF.getContext().getTypeAlignInChars(Ty).getQuantity(); - if (Align > 8) { - // overflow_arg_area = (overflow_arg_area + align - 1) & -align; - llvm::Value *Offset = - llvm::ConstantInt::get(CGF.Int64Ty, Align - 1); - overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset); - llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(overflow_arg_area, - CGF.Int64Ty); - llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, -(uint64_t)Align); - overflow_arg_area = - CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask), - overflow_arg_area->getType(), - "overflow_arg_area.align"); - } + CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); + if (Align > CharUnits::fromQuantity(8)) + overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, + Align); // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); @@ -3106,7 +3105,7 @@ CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. - return Address(Res, CharUnits::fromQuantity(Align)); + return Address(Res, Align); } Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -3541,11 +3540,16 @@ Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4)); - Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr), + Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), OverflowAreaAlign); - - // The current address is the address of the varargs element. - // FIXME: do we not need to round up to alignment? + // Round up address of argument to alignment + CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); + if (Align > OverflowAreaAlign) { + llvm::Value *Ptr = OverflowArea.getPointer(); + OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align), + Align); + } + MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy); // Increase the overflow area. Index: test/CodeGen/ppc-varargs-struct.c =================================================================== --- test/CodeGen/ppc-varargs-struct.c +++ test/CodeGen/ppc-varargs-struct.c @@ -39,9 +39,13 @@ // CHECK-PPC:[[USING_OVERFLOW]] // CHECK-PPC-NEXT: [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 3 // CHECK-PPC-NEXT: [[OVERFLOW_AREA:%.+]] = load i8*, i8** [[OVERFLOW_AREA_P]], align 4 -// CHECK-PPC-NEXT: [[MEMADDR:%.+]] = bitcast i8* [[OVERFLOW_AREA]] to %struct.x** -// CHECK-PPC-NEXT: [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, i8* [[OVERFLOW_AREA]], i32 4 -// CHECK-PPC-NEXT: store i8* [[NEW_OVERFLOW_AREA]], i8** [[OVERFLOW_AREA_P]] +// CHECK-PPC-NEXT: %{{[0-9]+}} = ptrtoint i8* %argp.cur to i32 +// CHECK-PPC-NEXT: %{{[0-9]+}} = add i32 %{{[0-9]+}}, 7 +// CHECK-PPC-NEXT: %{{[0-9]+}} = and i32 %{{[0-9]+}}, -8 +// CHECK-PPC-NEXT: %argp.cur.aligned = inttoptr i32 %{{[0-9]+}} to i8* +// CHECK-PPC-NEXT: [[MEMADDR:%.+]] = bitcast i8* %argp.cur.aligned to %struct.x** +// CHECK-PPC-NEXT: [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 4 +// CHECK-PPC-NEXT: store i8* [[NEW_OVERFLOW_AREA:%[0-9]+]], i8** [[OVERFLOW_AREA_P]], align 4 // CHECK-PPC-NEXT: br label %[[CONT]] // // CHECK-PPC:[[CONT]] Index: test/CodeGen/x86_64-arguments.c =================================================================== --- test/CodeGen/x86_64-arguments.c +++ test/CodeGen/x86_64-arguments.c @@ -336,7 +336,8 @@ // CHECK-LABEL: define i32 @f44 // CHECK: ptrtoint -// CHECK-NEXT: and {{.*}}, -32 +// CHECK-NEXT: add i64 %{{[0-9]+}}, 31 +// CHECK-NEXT: and i64 %{{[0-9]+}}, -32 // CHECK-NEXT: inttoptr typedef int T44 __attribute((vector_size(32))); struct s44 { T44 x; int y; };