Index: cfe/trunk/lib/CodeGen/TargetInfo.cpp =================================================================== --- cfe/trunk/lib/CodeGen/TargetInfo.cpp +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp @@ -162,6 +162,23 @@ OS << ")\n"; } +// Dynamically round a pointer up to a multiple of the given alignment. +static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF, + llvm::Value *Ptr, + CharUnits Align) { + llvm::Value *PtrAsInt = Ptr; + // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align; + PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy); + PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt, + llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1)); + PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt, + llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())); + PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt, + Ptr->getType(), + Ptr->getName() + ".aligned"); + return PtrAsInt; +} + /// Emit va_arg for a platform using the common void* representation, /// where arguments are simply emitted in an array of slots on the stack. /// @@ -193,17 +210,10 @@ // If the CC aligns values higher than the slot size, do so if needed. Address Addr = Address::invalid(); if (AllowHigherAlign && DirectAlign > SlotSize) { - llvm::Value *PtrAsInt = Ptr; - PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy); - PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt, - llvm::ConstantInt::get(CGF.IntPtrTy, DirectAlign.getQuantity() - 1)); - PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt, - llvm::ConstantInt::get(CGF.IntPtrTy, -DirectAlign.getQuantity())); - Addr = Address(CGF.Builder.CreateIntToPtr(PtrAsInt, Ptr->getType(), - "argp.cur.aligned"), - DirectAlign); + Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign), + DirectAlign); } else { - Addr = Address(Ptr, SlotSize); + Addr = Address(Ptr, SlotSize); } // Advance the pointer past the argument, then store that back. @@ -3072,19 +3082,10 @@ // byte boundary if alignment needed by type exceeds 8 byte boundary. // It isn't stated explicitly in the standard, but in practice we use // alignment greater than 16 where necessary. - uint64_t Align = CGF.getContext().getTypeAlignInChars(Ty).getQuantity(); - if (Align > 8) { - // overflow_arg_area = (overflow_arg_area + align - 1) & -align; - llvm::Value *Offset = - llvm::ConstantInt::get(CGF.Int64Ty, Align - 1); - overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset); - llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(overflow_arg_area, - CGF.Int64Ty); - llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, -(uint64_t)Align); - overflow_arg_area = - CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask), - overflow_arg_area->getType(), - "overflow_arg_area.align"); + CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); + if (Align > CharUnits::fromQuantity(8)) { + overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, + Align); } // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. @@ -3106,7 +3107,7 @@ CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. - return Address(Res, CharUnits::fromQuantity(Align)); + return Address(Res, Align); } Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -3541,11 +3542,16 @@ Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4)); - Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr), + Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), OverflowAreaAlign); - - // The current address is the address of the varargs element. - // FIXME: do we not need to round up to alignment? + // Round up address of argument to alignment + CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); + if (Align > OverflowAreaAlign) { + llvm::Value *Ptr = OverflowArea.getPointer(); + OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align), + Align); + } + MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy); // Increase the overflow area. Index: cfe/trunk/test/CodeGen/ppc-varargs-struct.c =================================================================== --- cfe/trunk/test/CodeGen/ppc-varargs-struct.c +++ cfe/trunk/test/CodeGen/ppc-varargs-struct.c @@ -39,9 +39,13 @@ // CHECK-PPC:[[USING_OVERFLOW]] // CHECK-PPC-NEXT: [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 3 // CHECK-PPC-NEXT: [[OVERFLOW_AREA:%.+]] = load i8*, i8** [[OVERFLOW_AREA_P]], align 4 -// CHECK-PPC-NEXT: [[MEMADDR:%.+]] = bitcast i8* [[OVERFLOW_AREA]] to %struct.x** -// CHECK-PPC-NEXT: [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, i8* [[OVERFLOW_AREA]], i32 4 -// CHECK-PPC-NEXT: store i8* [[NEW_OVERFLOW_AREA]], i8** [[OVERFLOW_AREA_P]] +// CHECK-PPC-NEXT: %{{[0-9]+}} = ptrtoint i8* %argp.cur to i32 +// CHECK-PPC-NEXT: %{{[0-9]+}} = add i32 %{{[0-9]+}}, 7 +// CHECK-PPC-NEXT: %{{[0-9]+}} = and i32 %{{[0-9]+}}, -8 +// CHECK-PPC-NEXT: %argp.cur.aligned = inttoptr i32 %{{[0-9]+}} to i8* +// CHECK-PPC-NEXT: [[MEMADDR:%.+]] = bitcast i8* %argp.cur.aligned to %struct.x** +// CHECK-PPC-NEXT: [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 4 +// CHECK-PPC-NEXT: store i8* [[NEW_OVERFLOW_AREA:%[0-9]+]], i8** [[OVERFLOW_AREA_P]], align 4 // CHECK-PPC-NEXT: br label %[[CONT]] // // CHECK-PPC:[[CONT]] Index: cfe/trunk/test/CodeGen/x86_64-arguments.c =================================================================== --- cfe/trunk/test/CodeGen/x86_64-arguments.c +++ cfe/trunk/test/CodeGen/x86_64-arguments.c @@ -336,7 +336,8 @@ // CHECK-LABEL: define i32 @f44 // CHECK: ptrtoint -// CHECK-NEXT: and {{.*}}, -32 +// CHECK-NEXT: add i64 %{{[0-9]+}}, 31 +// CHECK-NEXT: and i64 %{{[0-9]+}}, -32 // CHECK-NEXT: inttoptr typedef int T44 __attribute((vector_size(32))); struct s44 { T44 x; int y; };