Index: lib/CodeGen/CGExprScalar.cpp =================================================================== --- lib/CodeGen/CGExprScalar.cpp +++ lib/CodeGen/CGExprScalar.cpp @@ -3296,18 +3296,26 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) { QualType Ty = VE->getType(); + if (Ty->isVariablyModifiedType()) CGF.EmitVariablyModifiedType(Ty); llvm::Value *ArgValue = CGF.EmitVAListRef(VE->getSubExpr()); llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, VE->getType()); + llvm::Type *ArgTy = ConvertType(VE->getType()); // If EmitVAArg fails, we fall back to the LLVM instruction. if (!ArgPtr) - return Builder.CreateVAArg(ArgValue, ConvertType(VE->getType())); + return Builder.CreateVAArg(ArgValue, ArgTy); // FIXME Volatility. - return Builder.CreateLoad(ArgPtr); + llvm::Value *Val = Builder.CreateLoad(ArgPtr); + + // If EmitVAArg promoted the type, we must truncate it. + if (ArgTy != Val->getType()) + Val = Builder.CreateTrunc(Val, ArgTy); + + return Val; } Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) { Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -5919,6 +5919,14 @@ CodeGenFunction &CGF) const { llvm::Type *BP = CGF.Int8PtrTy; llvm::Type *BPP = CGF.Int8PtrPtrTy; + + // Integer arguments are promoted 32-bit on O32 and 64-bit on N32/N64. + unsigned SlotSizeInBits = IsO32 ? 32 : 64; + if (Ty->isIntegerType() && + CGF.getContext().getIntWidth(Ty) < SlotSizeInBits) { + Ty = CGF.getContext().getIntTypeForBitwidth(SlotSizeInBits, + Ty->isSignedIntegerType()); + } CGBuilderTy &Builder = CGF.Builder; llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap"); @@ -5943,8 +5951,8 @@ llvm::Value *AlignedAddr = Builder.CreateBitCast(AddrTyped, BP); TypeAlign = std::max((unsigned)TypeAlign, MinABIStackAlignInBytes); - uint64_t Offset = - llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, TypeAlign); + unsigned ArgSizeInBits = CGF.getContext().getTypeSize(Ty); + uint64_t Offset = llvm::RoundUpToAlignment(ArgSizeInBits / 8, TypeAlign); llvm::Value *NextAddr = Builder.CreateGEP(AlignedAddr, llvm::ConstantInt::get(IntTy, Offset), "ap.next"); Index: test/CodeGen/mips-varargs.c =================================================================== --- test/CodeGen/mips-varargs.c +++ test/CodeGen/mips-varargs.c @@ -1,22 +1,25 @@ // RUN: %clang_cc1 -triple mips-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 +// RUN: %clang_cc1 -triple mipsel-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 +// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW +// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW +// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW +// RUN: %clang_cc1 -triple mips64el-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW #include typedef int v4i32 __attribute__ ((__vector_size__ (16))); -int test_v4i32(char *fmt, ...) { +int test_i32(char *fmt, ...) { va_list va; va_start(va, fmt); - v4i32 v = va_arg(va, v4i32); + int v = va_arg(va, int); va_end(va); - return v[0]; + return v; } -// ALL: define i32 @test_v4i32(i8*{{.*}} %fmt, ...) +// ALL-LABEL: define i32 @test_i32(i8*{{.*}} %fmt, ...) // // O32: %va = alloca i8*, align [[PTRALIGN:4]] // N32: %va = alloca i8*, align [[PTRALIGN:4]] @@ -24,26 +27,155 @@ // // ALL: [[VA1:%.+]] = bitcast i8** %va to i8* // ALL: call void @llvm.va_start(i8* [[VA1]]) +// +// O32: [[TMP0:%.+]] = bitcast i8** %va to i32** +// O32: [[AP_CUR:%.+]] = load i32** [[TMP0]], align [[PTRALIGN]] +// NEW: [[TMP0:%.+]] = bitcast i8** %va to i64** +// NEW: [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]] +// +// O32: [[TMP1:%.+]] = getelementptr i32* [[AP_CUR]], i32 1 +// O32: [[AP_NEXT:%.+]] = bitcast i32* [[TMP1]] to i8* +// N32: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], {{i32|i64}} 1 +// N64: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], {{i32|i64}} 1 +// NEW: [[AP_NEXT:%.+]] = bitcast i64* [[TMP1]] to i8* +// +// ALL: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]] +// +// O32: [[ARG1:%.+]] = load i32* [[AP_CUR]], align 4 +// NEW: [[TMP2:%.+]] = load i64* [[AP_CUR]], align 8 +// NEW: [[ARG1:%.+]] = trunc i64 [[TMP2]] to i32 +// +// ALL: call void @llvm.va_end(i8* [[VA1]]) +// ALL: ret i32 [[ARG1]] +// ALL: } + +int test_i32_2args(char *fmt, ...) { + va_list va; + + va_start(va, fmt); + int v1 = va_arg(va, int); + int v2 = va_arg(va, int); + va_end(va); + + return v1 + v2; +} + +// ALL-LABEL: define i32 @test_i32_2args(i8*{{.*}} %fmt, ...) +// +// ALL: %va = alloca i8*, align [[PTRALIGN]] +// ALL: [[VA1:%.+]] = bitcast i8** %va to i8* +// ALL: call void @llvm.va_start(i8* [[VA1]]) +// +// O32: [[TMP0:%.+]] = bitcast i8** %va to i32** +// O32: [[AP_CUR:%.+]] = load i32** [[TMP0]], align [[PTRALIGN]] +// NEW: [[TMP0:%.+]] = bitcast i8** %va to i64** +// NEW: [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]] +// +// O32: [[TMP1:%.+]] = getelementptr i32* [[AP_CUR]], i32 1 +// O32: [[AP_NEXT:%.+]] = bitcast i32* [[TMP1]] to i8* +// N32: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T:i32]] 1 +// FIXME: N32 optimised this bitcast out. Why only for this ABI? +// N64: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T:i64]] 1 +// N64: [[AP_NEXT:%.+]] = bitcast i64* [[TMP1]] to i8* +// +// O32: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]] +// FIXME: N32 optimised this store out. Why only for this ABI? +// N64: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]] +// +// O32: [[ARG1:%.+]] = load i32* [[AP_CUR]], align 4 +// NEW: [[TMP3:%.+]] = load i64* [[AP_CUR]], align 8 +// NEW: [[ARG1:%.+]] = trunc i64 [[TMP3]] to i32 +// +// O32: [[TMP2:%.+]] = getelementptr i32* [[AP_CUR]], i32 2 +// O32: [[AP_NEXT:%.+]] = bitcast i32* [[TMP2]] to i8* +// NEW: [[TMP2:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T]] 2 +// NEW: [[AP_NEXT:%.+]] = bitcast i64* [[TMP2]] to i8* +// +// ALL: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]] +// +// O32: [[ARG2:%.+]] = load i32* [[TMP1]], align 4 +// NEW: [[TMP4:%.+]] = load i64* [[TMP1]], align 8 +// NEW: [[ARG2:%.+]] = trunc i64 [[TMP4]] to i32 +// +// ALL: call void @llvm.va_end(i8* [[VA1]]) +// ALL: [[ADD:%.+]] = add nsw i32 [[ARG1]], [[ARG2]] +// ALL: ret i32 [[ADD]] +// ALL: } + +long long test_i64(char *fmt, ...) { + va_list va; + + va_start(va, fmt); + long long v = va_arg(va, long long); + va_end(va); + + return v; +} + +// ALL-LABEL: define i64 @test_i64(i8*{{.*}} %fmt, ...) +// +// ALL: %va = alloca i8*, align [[PTRALIGN]] +// ALL: [[VA1:%.+]] = bitcast i8** %va to i8* +// ALL: call void @llvm.va_start(i8* [[VA1]]) +// +// O32: [[AP_CUR:%.+]] = load i8** %va, align [[PTRALIGN]] +// NEW: [[TMP0:%.+]] = bitcast i8** %va to i64** +// NEW: [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]] +// +// i64 is 8-byte aligned, while this is within O32's stack alignment there's no +// guarantee that the offset is still 8-byte aligned after earlier reads. +// O32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i32]] +// O32: [[PTR1:%.+]] = add i32 [[PTR0]], 7 +// O32: [[PTR2:%.+]] = and i32 [[PTR1]], -8 +// O32: [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i64* +// O32: [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8* +// +// O32: [[AP_NEXT:%.+]] = getelementptr i8* [[PTR4]], [[INTPTR_T]] 8 +// N32: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], i32 1 +// N64: [[TMP1:%.+]] = getelementptr i64* [[AP_CUR]], i64 1 +// NEW: [[AP_NEXT:%.+]] = bitcast i64* [[TMP1]] to i8* +// +// ALL: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]] +// +// O32: [[ARG1:%.+]] = load i64* [[PTR3]], align 8 +// NEW: [[ARG1:%.+]] = load i64* [[AP_CUR]], align 8 +// +// ALL: call void @llvm.va_end(i8* [[VA1]]) +// ALL: ret i64 [[ARG1]] +// ALL: } + +int test_v4i32(char *fmt, ...) { + va_list va; + + va_start(va, fmt); + v4i32 v = va_arg(va, v4i32); + va_end(va); + + return v[0]; +} + +// ALL-LABEL: define i32 @test_v4i32(i8*{{.*}} %fmt, ...) +// +// ALL: %va = alloca i8*, align [[PTRALIGN]] +// ALL: [[VA1:%.+]] = bitcast i8** %va to i8* +// ALL: call void @llvm.va_start(i8* [[VA1]]) // ALL: [[AP_CUR:%.+]] = load i8** %va, align [[PTRALIGN]] // -// O32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[PTRTYPE:i32]] -// N32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[PTRTYPE:i32]] -// N64: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[PTRTYPE:i64]] +// O32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i32]] +// N32: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i32]] +// N64: [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i64]] // // Vectors are 16-byte aligned, however the O32 ABI has a maximum alignment of // 8-bytes since the base of the stack is 8-byte aligned. // O32: [[PTR1:%.+]] = add i32 [[PTR0]], 7 // O32: [[PTR2:%.+]] = and i32 [[PTR1]], -8 // -// N32: [[PTR1:%.+]] = add i32 [[PTR0]], 15 -// N32: [[PTR2:%.+]] = and i32 [[PTR1]], -16 -// -// N64: [[PTR1:%.+]] = add i64 [[PTR0]], 15 -// N64: [[PTR2:%.+]] = and i64 [[PTR1]], -16 +// NEW: [[PTR1:%.+]] = add [[INTPTR_T]] [[PTR0]], 15 +// NEW: [[PTR2:%.+]] = and [[INTPTR_T]] [[PTR1]], -16 // -// ALL: [[PTR3:%.+]] = inttoptr [[PTRTYPE]] [[PTR2]] to <4 x i32>* -// ALL: [[PTR4:%.+]] = inttoptr [[PTRTYPE]] [[PTR2]] to i8* -// ALL: [[AP_NEXT:%.+]] = getelementptr i8* [[PTR4]], [[PTRTYPE]] 16 +// ALL: [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to <4 x i32>* +// ALL: [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8* +// ALL: [[AP_NEXT:%.+]] = getelementptr i8* [[PTR4]], [[INTPTR_T]] 16 // ALL: store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]] // ALL: [[PTR5:%.+]] = load <4 x i32>* [[PTR3]], align 16 // ALL: call void @llvm.va_end(i8* [[VA1]])