Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -11325,6 +11325,167 @@ }; } // end anonymous namespace +//===----------------------------------------------------------------------===// +// CSKY ABI Implementation +//===----------------------------------------------------------------------===// +namespace { +class CSKYABIInfo : public DefaultABIInfo { + static const int NumArgGPRs = 4; + static const int NumArgFPRs = 4; + + static const unsigned XLen = 32; + unsigned FLen; + +public: + CSKYABIInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen) + : DefaultABIInfo(CGT), FLen(FLen) {} + + void computeInfo(CGFunctionInfo &FI) const override; + ABIArgInfo classifyArgumentType(QualType Ty, int &ArgGPRsLeft, + int &ArgFPRsLeft, + bool isReturnType = false) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +} // end anonymous namespace + +void CSKYABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + + // We must track the number of GPRs used in order to conform to the CSKY + // ABI, as integer scalars passed in registers should have signext/zeroext + // when promoted. + // As GPR usage is different for variadic arguments, + // we must also track whether we are examining a vararg or not. + int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int ArgFPRsLeft = FLen ? NumArgFPRs : 0; + + for (auto &ArgInfo : FI.arguments()) { + ArgInfo.info = classifyArgumentType(ArgInfo.type, ArgGPRsLeft, ArgFPRsLeft); + } +} + +Address CSKYABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), + getVAListElementType(CGF), SlotSize); + Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); + return Addr; + } + + auto TInfo = getContext().getTypeInfoInChars(Ty); + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, false, TInfo, SlotSize, + /*AllowHigherAlign=*/true); +} + +ABIArgInfo CSKYABIInfo::classifyArgumentType(QualType Ty, int &ArgGPRsLeft, + int &ArgFPRsLeft, + bool isReturnType) const { + assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (ArgGPRsLeft) + ArgGPRsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + if (!Ty->getAsUnionType()) + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + uint64_t Size = getContext().getTypeSize(Ty); + // Pass floating point values via FPRs if possible. + if (Ty->isFloatingType() && !Ty->isComplexType() && FLen >= Size && + ArgFPRsLeft) { + ArgFPRsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the hard float ABI must be passed direct rather than + // using CoerceAndExpand. + if (Ty->isComplexType() && FLen && !isReturnType) { + QualType EltTy = Ty->castAs()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FLen) { + ArgFPRsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to XLen width, unless passed on the + // stack. + if (Size < XLen && Ty->isIntegralOrEnumerationType()) + return ABIArgInfo::getExtend(Ty); + + if (const auto *EIT = Ty->getAs()) { + if (EIT->getNumBits() < XLen) + return ABIArgInfo::getExtend(Ty); + } + + return ABIArgInfo::getDirect(); + } + + // For argument type, the first 4*XLen parts of aggregate will be passed + // in registers, and the rest will be passed in stack. + // So we can coerce to integers directly and let backend handle it correctly. + // For return type, aggregate which <= 2*XLen will be returned in registers. + // Otherwise, aggregate will be returned indirectly. + if (!isReturnType || (isReturnType && Size <= 2 * XLen)) { + if (Size <= XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), XLen)); + } else { + return ABIArgInfo::getDirect(llvm::ArrayType::get( + llvm::IntegerType::get(getVMContext(), XLen), (Size + 31) / XLen)); + } + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo CSKYABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + int ArgGPRsLeft = 2; + int ArgFPRsLeft = FLen ? 1 : 0; + + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + return classifyArgumentType(RetTy, ArgGPRsLeft, ArgFPRsLeft, true); +} + +namespace { +class CSKYTargetCodeGenInfo : public TargetCodeGenInfo { +public: + CSKYTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen) + : TargetCodeGenInfo(std::make_unique(CGT, FLen)) {} +}; +} // end anonymous namespace + //===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// @@ -11545,6 +11706,14 @@ return SetCGInfo(new SPIRVTargetCodeGenInfo(Types)); case llvm::Triple::ve: return SetCGInfo(new VETargetCodeGenInfo(Types)); + case llvm::Triple::csky: { + bool IsSoftFloat = !getTarget().hasFeature("hard-float-abi"); + bool hasFP64 = getTarget().hasFeature("fpuv2_df") || + getTarget().hasFeature("fpuv3_df"); + return SetCGInfo(new CSKYTargetCodeGenInfo(Types, IsSoftFloat ? 0 + : hasFP64 ? 64 + : 32)); + } } } Index: clang/test/CodeGen/CSKY/csky-abi.c =================================================================== --- /dev/null +++ clang/test/CodeGen/CSKY/csky-abi.c @@ -0,0 +1,344 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple csky -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -triple csky -target-feature +fpuv2_df -target-feature +fpuv2_sf \ +// RUN: -target-feature +hard-float -target-feature +hard-float-abi -emit-llvm %s -o - | FileCheck %s + +#include +#include + +// CHECK-LABEL: define{{.*}} void @f_void() +void f_void(void) {} + +// Scalar arguments and return values smaller than the word size are extended +// according to the sign of their type, up to 32 bits + +// CHECK-LABEL: define{{.*}} zeroext i1 @f_scalar_0(i1 noundef zeroext %x) +_Bool f_scalar_0(_Bool x) { return x; } + +// CHECK-LABEL: define{{.*}} signext i8 @f_scalar_1(i8 noundef signext %x) +int8_t f_scalar_1(int8_t x) { return x; } + +// CHECK-LABEL: define{{.*}} zeroext i8 @f_scalar_2(i8 noundef zeroext %x) +uint8_t f_scalar_2(uint8_t x) { return x; } + +// CHECK-LABEL: define{{.*}} i32 @f_scalar_3(i32 noundef %x) +int32_t f_scalar_3(int32_t x) { return x; } + +// CHECK-LABEL: define{{.*}} i64 @f_scalar_4(i64 noundef %x) +int64_t f_scalar_4(int64_t x) { return x; } + +// CHECK-LABEL: define{{.*}} float @f_fp_scalar_1(float noundef %x) +float f_fp_scalar_1(float x) { return x; } + +// CHECK-LABEL: define{{.*}} double @f_fp_scalar_2(double noundef %x) +double f_fp_scalar_2(double x) { return x; } + +// CHECK-LABEL: define{{.*}} double @f_fp_scalar_3(double noundef %x) +long double f_fp_scalar_3(long double x) { return x; } + +// Empty structs or unions are ignored. + +struct empty_s {}; + +// CHECK-LABEL: define{{.*}} void @f_agg_empty_struct() +struct empty_s f_agg_empty_struct(struct empty_s x) { + return x; +} + +union empty_u {}; + +// CHECK-LABEL: define{{.*}} void @f_agg_empty_union() +union empty_u f_agg_empty_union(union empty_u x) { + return x; +} + +// Aggregates <= 4*xlen may be passed in registers, so will be coerced to +// integer arguments. The rules for return are <= 2*xlen. + +struct tiny { + uint8_t a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} void @f_agg_tiny(i32 %x.coerce) +void f_agg_tiny(struct tiny x) { + x.a += x.b; + x.c += x.d; +} + +// CHECK-LABEL: define{{.*}} i32 @f_agg_tiny_ret() +struct tiny f_agg_tiny_ret(void) { + return (struct tiny){1, 2, 3, 4}; +} + +struct small { + int32_t a, *b; +}; + +// CHECK-LABEL: define{{.*}} void @f_agg_small([2 x i32] %x.coerce) +void f_agg_small(struct small x) { + x.a += *x.b; + x.b = &x.a; +} + +// CHECK-LABEL: define{{.*}} [2 x i32] @f_agg_small_ret() +struct small f_agg_small_ret(void) { + return (struct small){1, 0}; +} + +struct small_aligned { + int64_t a; +}; + +// CHECK-LABEL: define{{.*}} void @f_agg_small_aligned(i64 %x.coerce) +void f_agg_small_aligned(struct small_aligned x) { + x.a += x.a; +} + +// CHECK-LABEL: define{{.*}} i64 @f_agg_small_aligned_ret(i64 %x.coerce) +struct small_aligned f_agg_small_aligned_ret(struct small_aligned x) { + return (struct small_aligned){10}; +} + +// For argument type, the first 4*XLen parts of aggregate will be passed +// in registers, and the rest will be passed in stack. +// So we can coerce to integers directly and let backend handle it correctly. +// For return type, aggregate which <= 2*XLen will be returned in registers. +// Otherwise, aggregate will be returned indirectly. +struct large { + int32_t a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} void @f_agg_large([4 x i32] %x.coerce) +void f_agg_large(struct large x) { + x.a = x.b + x.c + x.d; +} + +// The address where the struct should be written to will be the first +// argument +// CHECK-LABEL: define{{.*}} void @f_agg_large_ret(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 noundef %i, i8 noundef signext %j) +struct large f_agg_large_ret(int32_t i, int8_t j) { + return (struct large){1, 2, 3, 4}; +} + +typedef unsigned char v16i8 __attribute__((vector_size(16))); + +// CHECK-LABEL: define{{.*}} void @f_vec_large_v16i8(<16 x i8> noundef %x) +void f_vec_large_v16i8(v16i8 x) { + x[0] = x[7]; +} + +// CHECK-LABEL: define{{.*}} <16 x i8> @f_vec_large_v16i8_ret() +v16i8 f_vec_large_v16i8_ret(void) { + return (v16i8){1, 2, 3, 4, 5, 6, 7, 8}; +} + +// CHECK-LABEL: define{{.*}} i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, i64 %c.coerce, [4 x i32] %d.coerce, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g, i8 noundef signext %h) +int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c, + struct large d, uint8_t e, int8_t f, uint8_t g, int8_t h) { + return g + h; +} + +// Ensure that scalars passed on the stack are still determined correctly in +// the presence of large return values that consume a register due to the need +// to pass a pointer. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 noundef %a, i64 noundef %b, i64 noundef %c, double noundef %d, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g) +struct large f_scalar_stack_2(int32_t a, int64_t b, int64_t c, long double d, + uint8_t e, int8_t f, uint8_t g) { + return (struct large){a, e, f, g}; +} + +// CHECK-LABEL: define{{.*}} double @f_scalar_stack_4(i32 noundef %a, i64 noundef %b, i64 noundef %c, double noundef %d, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g) +long double f_scalar_stack_4(int32_t a, int64_t b, int64_t c, long double d, + uint8_t e, int8_t f, uint8_t g) { + return d; +} + +// Aggregates should be coerced integer arrary. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_5(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 noundef %e, i64 noundef %f, float noundef %g, double noundef %h, double noundef %i) +void f_scalar_stack_5(double a, int64_t b, double c, int64_t d, int e, + int64_t f, float g, double h, long double i) {} + +// CHECK-LABEL: define{{.*}} void @f_agg_stack(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, [4 x i32] %h.coerce) +void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e, + struct small f, struct small_aligned g, struct large h) {} + +// Ensure that ABI lowering happens as expected for vararg calls. For CSKY +// with the base integer calling convention there will be no observable +// differences in the lowered IR for a call with varargs vs without. + +int f_va_callee(int, ...); + +// CHECK-LABEL: define{{.*}} void @f_va_caller() +// CHECK: call i32 (i32, ...) @f_va_callee(i32 noundef 1, i32 noundef 2, i64 noundef 3, double noundef 4.000000e+00, double noundef 5.000000e+00, i32 {{%.*}}, [2 x i32] {{%.*}}, i64 {{%.*}}, [4 x i32] {{%.*}}) +void f_va_caller(void) { + f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct tiny){6, 7, 8, 9}, + (struct small){10, NULL}, (struct small_aligned){11}, + (struct large){12, 13, 14, 15}); +} + +// CHECK-LABEL: define{{.*}} i32 @f_va_1(i8* noundef %fmt, ...) {{.*}} { +// CHECK: [[FMT_ADDR:%.*]] = alloca i8*, align 4 +// CHECK: [[VA:%.*]] = alloca i8*, align 4 +// CHECK: [[V:%.*]] = alloca i32, align 4 +// CHECK: store i8* %fmt, i8** [[FMT_ADDR]], align 4 +// CHECK: [[VA1:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK: call void @llvm.va_start(i8* [[VA1]]) +// CHECK: [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 4 +// CHECK: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 +// CHECK: [[TMP0:%.*]] = bitcast i8* [[ARGP_CUR]] to i32* +// CHECK: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK: store i32 [[TMP1]], i32* [[V]], align 4 +// CHECK: [[VA2:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK: call void @llvm.va_end(i8* [[VA2]]) +// CHECK: [[TMP2:%.*]] = load i32, i32* [[V]], align 4 +// CHECK: ret i32 [[TMP2]] +// CHECK: } +int f_va_1(char *fmt, ...) { + __builtin_va_list va; + + __builtin_va_start(va, fmt); + int v = __builtin_va_arg(va, int); + __builtin_va_end(va); + + return v; +} + +// CHECK-LABEL: @f_va_2( +// CHECK: [[FMT_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[VA:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[V:%.*]] = alloca double, align 4 +// CHECK-NEXT: store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4 +// CHECK-NEXT: [[VA1:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARGP_CUR]] to double* +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 4 +// CHECK-NEXT: store double [[TMP4]], double* [[V]], align 4 +// CHECK-NEXT: [[VA2:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_end(i8* [[VA2]]) +// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[V]], align 4 +// CHECK-NEXT: ret double [[TMP5]] +double f_va_2(char *fmt, ...) { + __builtin_va_list va; + + __builtin_va_start(va, fmt); + double v = __builtin_va_arg(va, double); + __builtin_va_end(va); + + return v; +} + +// CHECK-LABEL: @f_va_3( +// CHECK: [[FMT_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[VA:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[V:%.*]] = alloca double, align 4 +// CHECK-NEXT: [[W:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca double, align 4 +// CHECK-NEXT: store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4 +// CHECK-NEXT: [[VA1:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARGP_CUR]] to double* +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 4 +// CHECK-NEXT: store double [[TMP4]], double* [[V]], align 4 +// CHECK-NEXT: [[ARGP_CUR2:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT3:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR2]], i32 4 +// CHECK-NEXT: store i8* [[ARGP_NEXT3]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[ARGP_CUR2]] to i32* +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], i32* [[W]], align 4 +// CHECK-NEXT: [[ARGP_CUR4:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT5:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR4]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT5]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[ARGP_CUR4]] to double* +// CHECK-NEXT: [[TMP11:%.*]] = load double, double* [[TMP10]], align 4 +// CHECK-NEXT: store double [[TMP11]], double* [[X]], align 4 +// CHECK-NEXT: [[VA6:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_end(i8* [[VA6]]) +// CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[V]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load double, double* [[X]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK-NEXT: ret double [[ADD]] +double f_va_3(char *fmt, ...) { + __builtin_va_list va; + + __builtin_va_start(va, fmt); + double v = __builtin_va_arg(va, double); + int w = __builtin_va_arg(va, int); + double x = __builtin_va_arg(va, double); + __builtin_va_end(va); + + return v + x; +} + +// CHECK-LABEL: define{{.*}} i32 @f_va_4(i8* noundef %fmt, ...) {{.*}} { +// CHECK: [[FMT_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[VA:%.*]] = alloca i8*, align 4 +// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LD:%.*]] = alloca double, align 4 +// CHECK-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 +// CHECK-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 +// CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 +// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[FMT:%.*]], i8** [[FMT_ADDR]], align 4 +// CHECK-NEXT: [[VA1:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARGP_CUR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], i32* [[V]], align 4 +// CHECK-NEXT: [[ARGP_CUR2:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT3:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR2]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT3]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ARGP_CUR2]] to double* +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[TMP2]], align 4 +// CHECK-NEXT: store double [[TMP4]], double* [[LD]], align 4 +// CHECK-NEXT: [[ARGP_CUR4:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT5:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR4]], i32 4 +// CHECK-NEXT: store i8* [[ARGP_NEXT5]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[ARGP_CUR4]] to %struct.tiny* +// CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.tiny* [[TS]] to i8* +// CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.tiny* [[TMP5]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[TMP6]], i8* align 4 [[TMP7]], i32 4, i1 false) +// CHECK-NEXT: [[ARGP_CUR6:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT7:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR6]], i32 8 +// CHECK-NEXT: store i8* [[ARGP_NEXT7]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[ARGP_CUR6]] to %struct.small* +// CHECK-NEXT: [[TMP9:%.*]] = bitcast %struct.small* [[SS]] to i8* +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct.small* [[TMP8]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK-NEXT: [[ARGP_CUR8:%.*]] = load i8*, i8** [[VA]], align 4 +// CHECK-NEXT: [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, i8* [[ARGP_CUR8]], i32 16 +// CHECK-NEXT: store i8* [[ARGP_NEXT9]], i8** [[VA]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[ARGP_CUR8]] to %struct.large* +// CHECK-NEXT: [[TMP13:%.*]] = bitcast %struct.large* [[LS]] to i8* +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.large* [[TMP11]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 16, i1 false) +// CHECK-NEXT: [[VA10:%.*]] = bitcast i8** [[VA]] to i8* +// CHECK-NEXT: call void @llvm.va_end(i8* [[VA10]]) +int f_va_4(char *fmt, ...) { + __builtin_va_list va; + + __builtin_va_start(va, fmt); + int v = __builtin_va_arg(va, int); + long double ld = __builtin_va_arg(va, long double); + struct tiny ts = __builtin_va_arg(va, struct tiny); + struct small ss = __builtin_va_arg(va, struct small); + struct large ls = __builtin_va_arg(va, struct large); + __builtin_va_end(va); + + int ret = (int)((long double)v + ld); + ret = ret + ts.a + ts.b + ts.c + ts.d; + ret = ret + ss.a + (int)ss.b; + ret = ret + ls.a + ls.b + ls.c + ls.d; + + return ret; +} Index: clang/test/CodeGen/CSKY/csky-hard-abi.c =================================================================== --- /dev/null +++ clang/test/CodeGen/CSKY/csky-hard-abi.c @@ -0,0 +1,394 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple csky -target-feature +fpuv2_sf -target-feature +fpuv2_df -target-feature +hard-float-abi -target-feature +hard-float -emit-llvm %s -o - | FileCheck %s + +#include + +// Verify that the tracking of used GPRs and FPRs works correctly by checking +// that small integers are sign/zero extended when passed in registers. + +// Doubles are passed in FPRs, so argument 'i' will be passed zero-extended +// because it will be passed in a GPR. + +// CHECK: define{{.*}} void @f_fpr_tracking(double noundef %a, double noundef %b, double noundef %c, double noundef %d, i8 noundef zeroext %i) +void f_fpr_tracking(double a, double b, double c, double d, uint8_t i) {} + +// A struct containing just one floating-point real is passed as though it +// were a standalone floating-point real. +struct double_s { + double f; +}; + +// CHECK: define{{.*}} void @f_double_s_arg(double %a.coerce) +void f_double_s_arg(struct double_s a) {} + +// CHECK: define{{.*}} double @f_ret_double_s() +struct double_s f_ret_double_s(void) { + return (struct double_s){1.0}; +} + +// A struct containing a double and any number of zero-width bitfields is +// passed as though it were a standalone floating-point real. + +struct zbf_double_s { + int : 0; + double f; +}; +struct zbf_double_zbf_s { + int : 0; + double f; + int : 0; +}; + +// CHECK: define{{.*}} void @f_zbf_double_s_arg(double %a.coerce) +void f_zbf_double_s_arg(struct zbf_double_s a) {} + +// CHECK: define{{.*}} double @f_ret_zbf_double_s() +struct zbf_double_s f_ret_zbf_double_s(void) { + return (struct zbf_double_s){1.0}; +} + +// CHECK: define{{.*}} void @f_zbf_double_zbf_s_arg(double %a.coerce) +void f_zbf_double_zbf_s_arg(struct zbf_double_zbf_s a) {} + +// CHECK: define{{.*}} double @f_ret_zbf_double_zbf_s() +struct zbf_double_zbf_s f_ret_zbf_double_zbf_s(void) { + return (struct zbf_double_zbf_s){1.0}; +} + +// For argument type, the first 4*XLen parts of aggregate will be passed +// in registers, and the rest will be passed in stack. +// So we can coerce to integers directly and let backend handle it correctly. +// For return type, aggregate which <= 2*XLen will be returned in registers. +// Otherwise, aggregate will be returned indirectly. + +struct double_double_s { + double f; + double g; +}; +struct double_float_s { + double f; + float g; +}; + +// CHECK: define{{.*}} void @f_double_double_s_arg([4 x i32] %a.coerce) +void f_double_double_s_arg(struct double_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_double_s(%struct.double_double_s* noalias sret(%struct.double_double_s) align 4 %agg.result) +struct double_double_s f_ret_double_double_s(void) { + return (struct double_double_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_double_float_s_arg([3 x i32] %a.coerce) +void f_double_float_s_arg(struct double_float_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_float_s(%struct.double_float_s* noalias sret(%struct.double_float_s) align 4 %agg.result) +struct double_float_s f_ret_double_float_s(void) { + return (struct double_float_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_double_double_s_arg_insufficient_fprs(float noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double noundef %f, double noundef %g, double noundef %i, [4 x i32] %h.coerce) +void f_double_double_s_arg_insufficient_fprs(float a, double b, double c, double d, + double e, double f, double g, double i, struct double_double_s h) {} + +struct double_int8_s { + double f; + int8_t i; +}; +struct double_uint8_s { + double f; + uint8_t i; +}; +struct double_int32_s { + double f; + int32_t i; +}; +struct double_int64_s { + double f; + int64_t i; +}; +struct double_int64bf_s { + double f; + int64_t i : 32; +}; +struct double_int8_zbf_s { + double f; + int8_t i; + int : 0; +}; + +// CHECK: define{{.*}} @f_double_int8_s_arg([3 x i32] %a.coerce) +void f_double_int8_s_arg(struct double_int8_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int8_s(%struct.double_int8_s* noalias sret(%struct.double_int8_s) align 4 %agg.result) +struct double_int8_s f_ret_double_int8_s(void) { + return (struct double_int8_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_uint8_s_arg([3 x i32] %a.coerce) +void f_double_uint8_s_arg(struct double_uint8_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_uint8_s(%struct.double_uint8_s* noalias sret(%struct.double_uint8_s) align 4 %agg.result) +struct double_uint8_s f_ret_double_uint8_s(void) { + return (struct double_uint8_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int32_s_arg([3 x i32] %a.coerce) +void f_double_int32_s_arg(struct double_int32_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int32_s(%struct.double_int32_s* noalias sret(%struct.double_int32_s) align 4 %agg.result) +struct double_int32_s f_ret_double_int32_s(void) { + return (struct double_int32_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int64_s_arg([4 x i32] %a.coerce) +void f_double_int64_s_arg(struct double_int64_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int64_s(%struct.double_int64_s* noalias sret(%struct.double_int64_s) align 4 %agg.result) +struct double_int64_s f_ret_double_int64_s(void) { + return (struct double_int64_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int64bf_s_arg([3 x i32] %a.coerce) +void f_double_int64bf_s_arg(struct double_int64bf_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int64bf_s(%struct.double_int64bf_s* noalias sret(%struct.double_int64bf_s) align 4 %agg.result) +struct double_int64bf_s f_ret_double_int64bf_s(void) { + return (struct double_int64bf_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int8_zbf_s([3 x i32] %a.coerce) +void f_double_int8_zbf_s(struct double_int8_zbf_s a) {} + +// CHECK: define{{.*}} void @f_ret_double_int8_zbf_s(%struct.double_int8_zbf_s* noalias sret(%struct.double_int8_zbf_s) align 4 %agg.result) +struct double_int8_zbf_s f_ret_double_int8_zbf_s(void) { + return (struct double_int8_zbf_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_double_int8_s_arg_insufficient_gprs(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, i32 noundef %h, [3 x i32] %i.coerce) +void f_double_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e, + int f, int g, int h, struct double_int8_s i) {} + +// CHECK: define{{.*}} void @f_struct_double_int8_insufficient_fprs(float noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double noundef %f, double noundef %g, double noundef %h, [3 x i32] %i.coerce) +void f_struct_double_int8_insufficient_fprs(float a, double b, double c, double d, + double e, double f, double g, double h, struct double_int8_s i) {} + +// Complex floating-point values are special in passing argument, +// and it's not same as structs containing a single complex. +// Complex floating-point value should be passed in two consecutive fprs. +// But the return process is same as struct. + +// CHECK: define{{.*}} void @f_doublecomplex(double noundef %a.coerce0, double noundef %a.coerce1) +void f_doublecomplex(double __complex__ a) {} + +// CHECK: define{{.*}} void @f_ret_doublecomplex({ double, double }* noalias sret({ double, double }) align 4 %agg.result) +double __complex__ f_ret_doublecomplex(void) { + return 1.0; +} + +struct doublecomplex_s { + double __complex__ c; +}; + +// CHECK: define{{.*}} void @f_doublecomplex_s_arg([4 x i32] %a.coerce) +void f_doublecomplex_s_arg(struct doublecomplex_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublecomplex_s(%struct.doublecomplex_s* noalias sret(%struct.doublecomplex_s) align 4 %agg.result) +struct doublecomplex_s f_ret_doublecomplex_s(void) { + return (struct doublecomplex_s){1.0}; +} + +// Test single or two-element structs that need flattening. e.g. those +// containing nested structs, doubles in small arrays, zero-length structs etc. + +struct doublearr1_s { + double a[1]; +}; + +// CHECK: define{{.*}} void @f_doublearr1_s_arg(double %a.coerce) +void f_doublearr1_s_arg(struct doublearr1_s a) {} + +// CHECK: define{{.*}} double @f_ret_doublearr1_s() +struct doublearr1_s f_ret_doublearr1_s(void) { + return (struct doublearr1_s){{1.0}}; +} + +struct doublearr2_s { + double a[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_s_arg([4 x i32] %a.coerce) +void f_doublearr2_s_arg(struct doublearr2_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_s(%struct.doublearr2_s* noalias sret(%struct.doublearr2_s) align 4 %agg.result) +struct doublearr2_s f_ret_doublearr2_s(void) { + return (struct doublearr2_s){{1.0, 2.0}}; +} + +struct doublearr2_tricky1_s { + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky1_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky1_s_arg(struct doublearr2_tricky1_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky1_s(%struct.doublearr2_tricky1_s* noalias sret(%struct.doublearr2_tricky1_s) align 4 %agg.result) +struct doublearr2_tricky1_s f_ret_doublearr2_tricky1_s(void) { + return (struct doublearr2_tricky1_s){{{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky2_s { + struct {}; + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky2_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky2_s_arg(struct doublearr2_tricky2_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky2_s(%struct.doublearr2_tricky2_s* noalias sret(%struct.doublearr2_tricky2_s) align 4 %agg.result) +struct doublearr2_tricky2_s f_ret_doublearr2_tricky2_s(void) { + return (struct doublearr2_tricky2_s){{}, {{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky3_s { + union {}; + struct { + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky3_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky3_s_arg(struct doublearr2_tricky3_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky3_s(%struct.doublearr2_tricky3_s* noalias sret(%struct.doublearr2_tricky3_s) align 4 %agg.result) +struct doublearr2_tricky3_s f_ret_doublearr2_tricky3_s(void) { + return (struct doublearr2_tricky3_s){{}, {{{1.0}}, {{2.0}}}}; +} + +struct doublearr2_tricky4_s { + union {}; + struct { + struct {}; + double f[1]; + } g[2]; +}; + +// CHECK: define{{.*}} void @f_doublearr2_tricky4_s_arg([4 x i32] %a.coerce) +void f_doublearr2_tricky4_s_arg(struct doublearr2_tricky4_s a) {} + +// CHECK: define{{.*}} void @f_ret_doublearr2_tricky4_s(%struct.doublearr2_tricky4_s* noalias sret(%struct.doublearr2_tricky4_s) align 4 %agg.result) +struct doublearr2_tricky4_s f_ret_doublearr2_tricky4_s(void) { + return (struct doublearr2_tricky4_s){{}, {{{}, {1.0}}, {{}, {2.0}}}}; +} + +struct int_double_int_s { + int a; + double b; + int c; +}; + +// CHECK: define{{.*}} void @f_int_double_int_s_arg([4 x i32] %a.coerce) +void f_int_double_int_s_arg(struct int_double_int_s a) {} + +// CHECK: define{{.*}} void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret(%struct.int_double_int_s) align 4 %agg.result) +struct int_double_int_s f_ret_int_double_int_s(void) { + return (struct int_double_int_s){1, 2.0, 3}; +} + +struct int64_double_s { + int64_t a; + double b; +}; + +// CHECK: define{{.*}} void @f_int64_double_s_arg([4 x i32] %a.coerce) +void f_int64_double_s_arg(struct int64_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_int64_double_s(%struct.int64_double_s* noalias sret(%struct.int64_double_s) align 4 %agg.result) +struct int64_double_s f_ret_int64_double_s(void) { + return (struct int64_double_s){1, 2.0}; +} + +struct char_char_double_s { + char a; + char b; + double c; +}; + +// CHECK-LABEL: define{{.*}} void @f_char_char_double_s_arg([3 x i32] %a.coerce) +void f_char_char_double_s_arg(struct char_char_double_s a) {} + +// CHECK: define{{.*}} void @f_ret_char_char_double_s(%struct.char_char_double_s* noalias sret(%struct.char_char_double_s) align 4 %agg.result) +struct char_char_double_s f_ret_char_char_double_s(void) { + return (struct char_char_double_s){1, 2, 3.0}; +} + +// A union containing just one floating-point real can not be passed as though it +// were a standalone floating-point real. +union double_u { + double a; +}; + +// CHECK: define{{.*}} void @f_double_u_arg([2 x i32] %a.coerce) +void f_double_u_arg(union double_u a) {} + +// CHECK: define{{.*}} [2 x i32] @f_ret_double_u() +union double_u f_ret_double_u(void) { + return (union double_u){1.0}; +} + +// CHECK: define{{.*}} void @f_ret_double_int32_s_double_int32_s_just_sufficient_gprs(%struct.double_int32_s* noalias sret(%struct.double_int32_s) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +struct double_int32_s f_ret_double_int32_s_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return (struct double_int32_s){1.0, 2}; +} + +// CHECK: define{{.*}} void @f_ret_double_double_s_double_int32_s_just_sufficient_gprs(%struct.double_double_s* noalias sret(%struct.double_double_s) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +struct double_double_s f_ret_double_double_s_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return (struct double_double_s){1.0, 2.0}; +} + +// CHECK: define{{.*}} void @f_ret_doublecomplex_double_int32_s_just_sufficient_gprs({ double, double }* noalias sret({ double, double }) align 4 %agg.result, i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e, i32 noundef %f, i32 noundef %g, [3 x i32] %h.coerce) +double __complex__ f_ret_doublecomplex_double_int32_s_just_sufficient_gprs( + int a, int b, int c, int d, int e, int f, int g, struct double_int32_s h) { + return 1.0; +} + +struct tiny { + uint8_t a, b, c, d; +}; + +struct small { + int32_t a, *b; +}; + +struct small_aligned { + int64_t a; +}; + +struct large { + int32_t a, b, c, d; +}; + +// Ensure that scalars passed on the stack are still determined correctly in +// the presence of large return values that consume a register due to the need +// to pass a pointer. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, float noundef %a, i64 noundef %b, double noundef %c, double noundef %d, i8 noundef zeroext %e, i8 noundef signext %f, i8 noundef zeroext %g) +struct large f_scalar_stack_2(float a, int64_t b, double c, long double d, + uint8_t e, int8_t f, uint8_t g) { + return (struct large){a, e, f, g}; +} + +// Aggregates and >=XLen scalars passed on the stack should be lowered just as +// they would be if passed via registers. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_3(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 noundef %e, i64 noundef %f, float noundef %g, double noundef %h, double noundef %i) +void f_scalar_stack_3(double a, int64_t b, double c, int64_t d, int e, + int64_t f, float g, double h, long double i) {} + +// CHECK-LABEL: define{{.*}} void @f_agg_stack(double noundef %a, i64 noundef %b, double noundef %c, i64 noundef %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, [4 x i32] %h.coerce) +void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e, + struct small f, struct small_aligned g, struct large h) {}