diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -108,8 +108,15 @@ } bool setABI(const std::string &Name) override { + if (Name == "ilp32e") { + ABI = Name; + resetDataLayout("e-m:e-p:32:32-i64:64-n32-S32"); + return true; + } + if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") { ABI = Name; + resetDataLayout("e-m:e-p:32:32-i64:64-n32-S128"); return true; } return false; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -181,6 +181,9 @@ if (ISAInfo->hasExtension("v")) Builder.defineMacro("__riscv_vector"); + + if (ISAInfo->hasExtension("e")) + Builder.defineMacro("__riscv_32e"); } const Builtin::Info RISCVTargetInfo::BuiltinInfo[] = { @@ -249,6 +252,11 @@ if (ABI.empty()) ABI = llvm::RISCV::computeDefaultABIFromArch(*ISAInfo).str(); + if (ABI == "ilp32e" && ISAInfo->hasExtension("d")) { + Diags.Report(diag::err_invalid_feature_combination) + << "ILP32E must not be used with the D ISA extension"; + return false; + } return true; } diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -10672,8 +10672,8 @@ // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target // with soft float ABI has FLen==0). unsigned FLen; - static const int NumArgGPRs = 8; - static const int NumArgFPRs = 8; + const int NumArgGPRs; + const int NumArgFPRs; bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, @@ -10681,8 +10681,10 @@ CharUnits &Field2Off) const; public: - RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) - : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen, + bool EABI) + : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen), NumArgGPRs(EABI ? 6 : 8), + NumArgFPRs(FLen != 0 ? 8 : 0) {} // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo is virtual, so we overload it. @@ -10736,7 +10738,7 @@ // different for variadic arguments, we must also track whether we are // examining a vararg or not. int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; - int ArgFPRsLeft = FLen ? NumArgFPRs : 0; + int ArgFPRsLeft = NumArgFPRs; int NumFixedArgs = FI.getNumRequiredArgs(); int ArgNum = 0; @@ -11105,8 +11107,9 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, - unsigned FLen) - : TargetCodeGenInfo(std::make_unique(CGT, XLen, FLen)) {} + unsigned FLen, bool EABI) + : TargetCodeGenInfo( + std::make_unique(CGT, XLen, FLen, EABI)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -11329,7 +11332,8 @@ ABIFLen = 32; else if (ABIStr.endswith("d")) ABIFLen = 64; - return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen)); + bool EABI = ABIStr.endswith("e"); + return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen, EABI)); } case llvm::Triple::systemz: { diff --git a/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c b/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32e-ilp32f-ilp32d-abi.c rename from clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c rename to clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32e-ilp32f-ilp32d-abi.c --- a/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c +++ b/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32e-ilp32f-ilp32d-abi.c @@ -1,235 +1,33 @@ // RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple riscv32 -emit-llvm -fforce-enable-int128 %s -o - \ -// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-FORCEINT128 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-abi ilp32f -emit-llvm %s -o - \ // RUN: | FileCheck %s // RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-feature +f -target-abi ilp32d -emit-llvm %s -o - \ // RUN: | FileCheck %s +// RUN: %clang_cc1 -triple riscv32 -target-abi ilp32e -emit-llvm %s -o - \ +// RUN: | FileCheck %s // This file contains test cases that will have the same output for the ilp32, -// ilp32f, and ilp32d ABIs. +// ilp32e, ilp32f, and ilp32d ABIs. #include #include -// CHECK-LABEL: define{{.*}} void @f_void() -void f_void(void) {} - -// Scalar arguments and return values smaller than the word size are extended -// according to the sign of their type, up to 32 bits - -// CHECK-LABEL: define{{.*}} zeroext i1 @f_scalar_0(i1 zeroext %x) -_Bool f_scalar_0(_Bool x) { return x; } - -// CHECK-LABEL: define{{.*}} signext i8 @f_scalar_1(i8 signext %x) -int8_t f_scalar_1(int8_t x) { return x; } - -// CHECK-LABEL: define{{.*}} zeroext i8 @f_scalar_2(i8 zeroext %x) -uint8_t f_scalar_2(uint8_t x) { return x; } - -// CHECK-LABEL: define{{.*}} i32 @f_scalar_3(i32 %x) -int32_t f_scalar_3(int32_t x) { return x; } - -// CHECK-LABEL: define{{.*}} i64 @f_scalar_4(i64 %x) -int64_t f_scalar_4(int64_t x) { return x; } - -#ifdef __SIZEOF_INT128__ -// CHECK-FORCEINT128-LABEL: define{{.*}} i128 @f_scalar_5(i128 %x) -__int128_t f_scalar_5(__int128_t x) { return x; } -#endif - -// CHECK-LABEL: define{{.*}} float @f_fp_scalar_1(float %x) -float f_fp_scalar_1(float x) { return x; } - -// CHECK-LABEL: define{{.*}} double @f_fp_scalar_2(double %x) -double f_fp_scalar_2(double x) { return x; } - -// Scalars larger than 2*xlen are passed/returned indirect. However, the -// RISC-V LLVM backend can handle this fine, so the function doesn't need to -// be modified. - -// CHECK-LABEL: define{{.*}} fp128 @f_fp_scalar_3(fp128 %x) -long double f_fp_scalar_3(long double x) { return x; } - -// Empty structs or unions are ignored. - -struct empty_s {}; - -// CHECK-LABEL: define{{.*}} void @f_agg_empty_struct() -struct empty_s f_agg_empty_struct(struct empty_s x) { - return x; -} - -union empty_u {}; - -// CHECK-LABEL: define{{.*}} void @f_agg_empty_union() -union empty_u f_agg_empty_union(union empty_u x) { - return x; -} - -// Aggregates <= 2*xlen may be passed in registers, so will be coerced to -// integer arguments. The rules for return are the same. - struct tiny { uint8_t a, b, c, d; }; -// CHECK-LABEL: define{{.*}} void @f_agg_tiny(i32 %x.coerce) -void f_agg_tiny(struct tiny x) { - x.a += x.b; - x.c += x.d; -} - -// CHECK-LABEL: define{{.*}} i32 @f_agg_tiny_ret() -struct tiny f_agg_tiny_ret() { - return (struct tiny){1, 2, 3, 4}; -} - -typedef uint8_t v4i8 __attribute__((vector_size(4))); -typedef int32_t v1i32 __attribute__((vector_size(4))); - -// CHECK-LABEL: define{{.*}} void @f_vec_tiny_v4i8(i32 %x.coerce) -void f_vec_tiny_v4i8(v4i8 x) { - x[0] = x[1]; - x[2] = x[3]; -} - -// CHECK-LABEL: define{{.*}} i32 @f_vec_tiny_v4i8_ret() -v4i8 f_vec_tiny_v4i8_ret() { - return (v4i8){1, 2, 3, 4}; -} - -// CHECK-LABEL: define{{.*}} void @f_vec_tiny_v1i32(i32 %x.coerce) -void f_vec_tiny_v1i32(v1i32 x) { - x[0] = 114; -} - -// CHECK-LABEL: define{{.*}} i32 @f_vec_tiny_v1i32_ret() -v1i32 f_vec_tiny_v1i32_ret() { - return (v1i32){1}; -} - struct small { int32_t a, *b; }; -// CHECK-LABEL: define{{.*}} void @f_agg_small([2 x i32] %x.coerce) -void f_agg_small(struct small x) { - x.a += *x.b; - x.b = &x.a; -} - -// CHECK-LABEL: define{{.*}} [2 x i32] @f_agg_small_ret() -struct small f_agg_small_ret() { - return (struct small){1, 0}; -} - -typedef uint8_t v8i8 __attribute__((vector_size(8))); -typedef int64_t v1i64 __attribute__((vector_size(8))); - -// CHECK-LABEL: define{{.*}} void @f_vec_small_v8i8(i64 %x.coerce) -void f_vec_small_v8i8(v8i8 x) { - x[0] = x[7]; -} - -// CHECK-LABEL: define{{.*}} i64 @f_vec_small_v8i8_ret() -v8i8 f_vec_small_v8i8_ret() { - return (v8i8){1, 2, 3, 4, 5, 6, 7, 8}; -} - -// CHECK-LABEL: define{{.*}} void @f_vec_small_v1i64(i64 %x.coerce) -void f_vec_small_v1i64(v1i64 x) { - x[0] = 114; -} - -// CHECK-LABEL: define{{.*}} i64 @f_vec_small_v1i64_ret() -v1i64 f_vec_small_v1i64_ret() { - return (v1i64){1}; -} - -// Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a -// single 2*xlen-sized argument, to ensure that alignment can be maintained if -// passed on the stack. - struct small_aligned { int64_t a; }; -// CHECK-LABEL: define{{.*}} void @f_agg_small_aligned(i64 %x.coerce) -void f_agg_small_aligned(struct small_aligned x) { - x.a += x.a; -} - -// CHECK-LABEL: define{{.*}} i64 @f_agg_small_aligned_ret(i64 %x.coerce) -struct small_aligned f_agg_small_aligned_ret(struct small_aligned x) { - return (struct small_aligned){10}; -} - -// Aggregates greater > 2*xlen will be passed and returned indirectly struct large { int32_t a, b, c, d; }; -// CHECK-LABEL: define{{.*}} void @f_agg_large(%struct.large* %x) -void f_agg_large(struct large x) { - x.a = x.b + x.c + x.d; -} - -// The address where the struct should be written to will be the first -// argument -// CHECK-LABEL: define{{.*}} void @f_agg_large_ret(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 %i, i8 signext %j) -struct large f_agg_large_ret(int32_t i, int8_t j) { - return (struct large){1, 2, 3, 4}; -} - -typedef unsigned char v16i8 __attribute__((vector_size(16))); - -// CHECK-LABEL: define{{.*}} void @f_vec_large_v16i8(<16 x i8>* %0) -void f_vec_large_v16i8(v16i8 x) { - x[0] = x[7]; -} - -// CHECK-LABEL: define{{.*}} void @f_vec_large_v16i8_ret(<16 x i8>* noalias sret(<16 x i8>) align 16 %agg.result) -v16i8 f_vec_large_v16i8_ret() { - return (v16i8){1, 2, 3, 4, 5, 6, 7, 8}; -} - -// Scalars passed on the stack should not have signext/zeroext attributes -// (they are anyext). - -// CHECK-LABEL: define{{.*}} i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, i64 %c.coerce, %struct.large* %d, i8 zeroext %e, i8 signext %f, i8 %g, i8 %h) -int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c, - struct large d, uint8_t e, int8_t f, uint8_t g, int8_t h) { - return g + h; -} - -// Ensure that scalars passed on the stack are still determined correctly in -// the presence of large return values that consume a register due to the need -// to pass a pointer. - -// CHECK-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g) -struct large f_scalar_stack_2(int32_t a, int64_t b, int64_t c, long double d, - uint8_t e, int8_t f, uint8_t g) { - return (struct large){a, e, f, g}; -} - -// CHECK-LABEL: define{{.*}} fp128 @f_scalar_stack_4(i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g) -long double f_scalar_stack_4(int32_t a, int64_t b, int64_t c, long double d, - uint8_t e, int8_t f, uint8_t g) { - return d; -} - -// Aggregates and >=XLen scalars passed on the stack should be lowered just as -// they would be if passed via registers. - -// CHECK-LABEL: define{{.*}} void @f_scalar_stack_5(double %a, i64 %b, double %c, i64 %d, i32 %e, i64 %f, float %g, double %h, fp128 %i) -void f_scalar_stack_5(double a, int64_t b, double c, int64_t d, int e, - int64_t f, float g, double h, long double i) {} - -// CHECK-LABEL: define{{.*}} void @f_agg_stack(double %a, i64 %b, double %c, i64 %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, %struct.large* %h) -void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e, - struct small f, struct small_aligned g, struct large h) {} - // Ensure that ABI lowering happens as expected for vararg calls. For RV32 // with the base integer calling convention there will be no observable // differences in the lowered IR for a call with varargs vs without. diff --git a/clang/test/CodeGen/RISCV/riscv32-ilp32e-abi.c b/clang/test/CodeGen/RISCV/riscv32-ilp32e-abi.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv32-ilp32e-abi.c @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -triple riscv32 -emit-llvm -target-abi ilp32e %s -o - \ +// RUN: | FileCheck -check-prefix=ILP32E %s +// RUN: not %clang_cc1 -triple riscv32 -target-feature +d -target-feature +f -emit-llvm -target-abi ilp32e %s 2>&1 \ +// RUN: | FileCheck -check-prefix=ILP32E-WITH-FD %s + +// This file contains test cases for only ilp32e. + +// ILP32E-WITH-FD: error: invalid feature combination: ILP32E must not be used with the D ISA extension + +#include +#include + +struct tiny { + uint8_t a, b, c, d; +}; + +struct small { + int32_t a, *b; +}; + +struct small_aligned { + int64_t a; +}; + +struct large { + int32_t a, b, c, d; +}; + +// Scalars passed on the stack should not have signext/zeroext attributes +// (they are anyext). + +// ILP32E-LABEL: define{{.*}} i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, %struct.large* %c, i8 zeroext %d, i8 signext %e, i8 %f, i8 %g) +int f_scalar_stack_1(struct tiny a, struct small b, struct large c, + uint8_t d, int8_t e, uint8_t f, int8_t g) { + return f + g; +} + +// Ensure that scalars passed on the stack are still determined correctly in +// the presence of large return values that consume a register due to the need +// to pass a pointer. + +// ILP32E-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 %a, i64 %b, fp128 %c, i8 zeroext %d, i8 %e, i8 %f) +struct large f_scalar_stack_2(int32_t a, int64_t b, long double c, + uint8_t d, int8_t e, uint8_t f) { + return (struct large){a, d, e, f}; +} + +// ILP32E-LABEL: define{{.*}} fp128 @f_scalar_stack_4(i32 %a, i64 %b, fp128 %c, i8 zeroext %d, i8 %e, i8 %f) +long double f_scalar_stack_4(int32_t a, int64_t b, long double c, + uint8_t d, int8_t e, uint8_t f) { + return c; +} + +// Aggregates and >=XLen scalars passed on the stack should be lowered just as +// they would be if passed via registers. + +// ILP32E-LABEL: define{{.*}} void @f_scalar_stack_5(double %a, i64 %b, double %c, i32 %d, i64 %e, float %f, double %g, fp128 %h) +void f_scalar_stack_5(double a, int64_t b, double c, int d, + int64_t e, float f, double g, long double h) {} + +// ILP32E-LABEL: define{{.*}} void @f_agg_stack(double %a, i64 %b, double %c, i32 %d.coerce, [2 x i32] %e.coerce, i64 %f.coerce, %struct.large* %g) +void f_agg_stack(double a, int64_t b, double c, struct tiny d, + struct small e, struct small_aligned f, struct large g) {} diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -3,6 +3,7 @@ // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64i -x c -E -dM %s \ // RUN: -o - | FileCheck %s +// CHECK-NOT: __riscv_32e // CHECK-NOT: __riscv_div // CHECK-NOT: __riscv_m // CHECK-NOT: __riscv_mul @@ -34,6 +35,15 @@ // CHECK-NOT: __riscv_zvamo // CHECK-NOT: __riscv_zvlsseg +// RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-E-EXT %s +// RUN: %clang -target riscv64-unknown-linux-gnu -march=rv32i -mabi=ilp32e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ILP32E %s +// CHECK-E-EXT: __riscv_32e 1 +// CHECK-E-EXT: __riscv_abi_rve 1 +// CHECK-E-EXT: __riscv_e 1009000 +// CHECK-ILP32E: __riscv_abi_rve 1 + // RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32im -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-M-EXT %s // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64im -x c -E -dM %s \ diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -333,10 +333,10 @@ StringRef computeDefaultABIFromArch(const llvm::RISCVISAInfo &ISAInfo) { if (ISAInfo.getXLen() == 32) { - if (ISAInfo.hasExtension("d")) - return "ilp32d"; if (ISAInfo.hasExtension("e")) return "ilp32e"; + if (ISAInfo.hasExtension("d")) + return "ilp32d"; return "ilp32"; } else if (ISAInfo.getXLen() == 64) { if (ISAInfo.hasExtension("d")) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -36,7 +36,8 @@ TargetOptions(Options) { TargetABI = RISCVABI::computeTargetABI( STI.getTargetTriple(), STI.getFeatureBits(), Options.getABIName()); - RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits()); + RISCVFeatures::validate(STI.getTargetTriple(), TargetABI, + STI.getFeatureBits()); } ~RISCVAsmBackend() override {} diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -341,7 +341,8 @@ // Validates if the given combination of features are valid for the target // triple. Exits with report_fatal_error if not. -void validate(const Triple &TT, const FeatureBitset &FeatureBits); +void validate(const Triple &TT, const RISCVABI::ABI TargetABI, + const FeatureBitset &FeatureBits); // Convert FeatureBitset to FeatureVector. void toFeatureVector(std::vector &FeatureVector, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -97,13 +97,17 @@ namespace RISCVFeatures { -void validate(const Triple &TT, const FeatureBitset &FeatureBits) { +void validate(const Triple &TT, const RISCVABI::ABI TargetABI, + const FeatureBitset &FeatureBits) { if (TT.isArch64Bit() && !FeatureBits[RISCV::Feature64Bit]) report_fatal_error("RV64 target requires an RV64 CPU"); if (!TT.isArch64Bit() && FeatureBits[RISCV::Feature64Bit]) report_fatal_error("RV32 target requires an RV32 CPU"); if (TT.isArch64Bit() && FeatureBits[RISCV::FeatureRV32E]) report_fatal_error("RV32E can't be enabled for an RV64 target"); + if (TargetABI == RISCVABI::ABI::ABI_ILP32E && + FeatureBits[RISCV::FeatureStdExtD]) + report_fatal_error("ILP32E must not be used with the D ISA extension"); } void toFeatureVector(std::vector &FeatureVector, diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -13,8 +13,10 @@ // The RISC-V calling convention is handled with custom code in // RISCVISelLowering.cpp (CC_RISCV). +def CSR_ILP32E : CalleeSavedRegs<(add X1, X3, X4, X8, X9)>; + def CSR_ILP32_LP64 - : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; + : CalleeSavedRegs<(add CSR_ILP32E, (sequence "X%u", 18, 27))>; def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, @@ -27,8 +29,13 @@ // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; -// Interrupt handler needs to save/restore all registers that are used, -// both Caller and Callee saved registers. +// Interrupt service routines need to save/restore all physical registers that +// are used, both Caller and Callee saved registers. +// +// The only physical register that shouldn't be saved is x2 (sp), which is +// managed by the prolog/epilog inserter, even for interrupt service routines. + +// All 32-bit GP registers, excluding x0 (zero) and x2 (sp). def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 3, 9), (sequence "X%u", 10, 11), diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -21,11 +21,7 @@ class RISCVFrameLowering : public TargetFrameLowering { public: - explicit RISCVFrameLowering(const RISCVSubtarget &STI) - : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/Align(16), - /*LocalAreaOffset=*/0), - STI(STI) {} + explicit RISCVFrameLowering(const RISCVSubtarget &STI); void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -23,6 +23,19 @@ using namespace llvm; +static Align getABIStackAlignment(RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return Align(4); + + return Align(16); +} + +RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, + getABIStackAlignment(STI.getTargetABI()), + /*LocalAreaOffset=*/0), + STI(STI) {} + // For now we use x18, a.k.a s2, as pointer to shadow call stack. // User should explicitly set -ffixed-x18 and not use x18 in their asm. static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, @@ -225,12 +238,27 @@ // disabled, if it needs dynamic stack realignment, if the function has // variable sized allocas, or if the frame address is taken. bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const { - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - return MF.getTarget().Options.DisableFramePointerElim(MF) || - RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || - MFI.isFrameAddressTaken(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + // If eliminating the frame pointer is disabled, then we have a frame pointer. + if (MF.getTarget().Options.DisableFramePointerElim(MF)) + return true; + + // If we take the address of the frame, then we need to store that address + // somewhere. + if (MFI.isFrameAddressTaken()) + return true; + + // We need to save the frame pointer to realign the stack. + if (TRI->hasStackRealignment(MF)) + return true; + + // We need to save the frame pointer to allocate a variable-sized object. + if (MFI.hasVarSizedObjects()) + return true; + + return false; } bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -48,9 +48,6 @@ const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - if (Subtarget.isRV32E()) - report_fatal_error("Codegen not yet implemented for RV32E"); - RISCVABI::ABI ABI = Subtarget.getTargetABI(); assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); @@ -72,6 +69,7 @@ default: report_fatal_error("Don't know how to lower this ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64: @@ -8074,10 +8072,15 @@ // register-size fields in the same situations they would be for fixed // arguments. -static const MCPhysReg ArgGPRs[] = { - RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, - RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 -}; +// The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except +// the ILP32E ABI. +static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X16, RISCV::X17}; +// The GPRs used for passing arguments in the ILP32E ABI. +static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15}; +// The FPRs used for passing arguments in the ILP32F and LP64F ABIs. static const MCPhysReg ArgFPR16s[] = { RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H @@ -8086,6 +8089,7 @@ RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F }; +// The FPRs used for passing arguments in the ILP32D and LP64D ABIs. static const MCPhysReg ArgFPR64s[] = { RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D @@ -8102,6 +8106,20 @@ RISCV::V20M4}; static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; +static ArrayRef getCallingConvArgGPRs(const RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return makeArrayRef(ArgEGPRs); + + return makeArrayRef(ArgIGPRs); +} + +static Register getCallingConvLastArgGPR(const RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return RISCV::X15; + + return RISCV::X17; +} + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, @@ -8109,6 +8127,10 @@ MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2) { unsigned XLenInBytes = XLen / 8; + const RISCVSubtarget &STI = + State.getMachineFunction().getSubtarget(); + ArrayRef ArgGPRs = getCallingConvArgGPRs(STI.getTargetABI()); + if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, @@ -8188,6 +8210,7 @@ default: llvm_unreachable("Unexpected ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_LP64: break; case RISCVABI::ABI_ILP32F: @@ -8219,6 +8242,8 @@ LocInfo = CCValAssign::BCvt; } + ArrayRef ArgGPRs = getCallingConvArgGPRs(ABI); + // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte // alignment (RV32) or 16-byte alignment (RV64). An aligned register should @@ -8231,7 +8256,7 @@ DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); // Skip 'odd' register if necessary. - if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) + if (RegIdx != ArgGPRs.size() && RegIdx % 2 == 1) State.AllocateReg(ArgGPRs); } @@ -8551,6 +8576,7 @@ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const RISCVSubtarget &STI = MF.getSubtarget(); if (VA.isMemLoc()) { // f64 is passed on the stack. @@ -8566,7 +8592,7 @@ RegInfo.addLiveIn(VA.getLocReg(), LoVReg); SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); SDValue Hi; - if (VA.getLocReg() == RISCV::X17) { + if (VA.getLocReg() == getCallingConvLastArgGPR(STI.getTargetABI())) { // Second half of f64 is passed on the stack. int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); @@ -8767,6 +8793,7 @@ EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); unsigned XLenInBytes = Subtarget.getXLen() / 8; + RISCVABI::ABI ABI = Subtarget.getTargetABI(); // Used with vargs to acumulate store chains. std::vector OutChains; @@ -8821,7 +8848,7 @@ } if (IsVarArg) { - ArrayRef ArgRegs = makeArrayRef(ArgGPRs); + ArrayRef ArgRegs = getCallingConvArgGPRs(ABI); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -9053,7 +9080,7 @@ Register RegLo = VA.getLocReg(); RegsToPass.push_back(std::make_pair(RegLo, Lo)); - if (RegLo == RISCV::X17) { + if (RegLo == getCallingConvLastArgGPR(Subtarget.getTargetABI())) { // Second half of f64 is passed on the stack. // Work out the address of the stack slot. if (!StackPtr.getNode()) @@ -9248,9 +9275,9 @@ Glue = RetValue.getValue(2); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { - assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); + assert(VA.getLocReg() == RISCV::X10 && "Unexpected reg assignment"); SDValue RetValue2 = - DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); + DAG.getCopyFromReg(Chain, DL, RISCV::X11, MVT::i32, Glue); Chain = RetValue2.getValue(1); Glue = RetValue2.getValue(2); RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -62,6 +62,8 @@ switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + return CSR_ILP32E_SaveList; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_SaveList; @@ -75,12 +77,13 @@ } BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + const RISCVSubtarget &STI = MF.getSubtarget(); const RISCVFrameLowering *TFI = getFrameLowering(MF); BitVector Reserved(getNumRegs()); // Mark any registers requested to be reserved as such for (size_t Reg = 0; Reg < getNumRegs(); Reg++) { - if (MF.getSubtarget().isRegisterReservedByUser(Reg)) + if (STI.isRegisterReservedByUser(Reg)) markSuperRegs(Reserved, Reg); } @@ -96,6 +99,11 @@ if (TFI->hasBP(MF)) markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp + // There are only 16 GPRs for RV32E. + if (STI.isRV32E()) + for (size_t Reg = RISCV::X16; Reg <= RISCV::X31; Reg++) + markSuperRegs(Reserved, Reg); + // V registers for code generation. We handle them manually. markSuperRegs(Reserved, RISCV::VL); markSuperRegs(Reserved, RISCV::VTYPE); @@ -301,6 +309,8 @@ switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + return CSR_ILP32E_RegMask; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_RegMask; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -74,7 +74,7 @@ } TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName); - RISCVFeatures::validate(TT, getFeatureBits()); + RISCVFeatures::validate(TT, TargetABI, getFeatureBits()); return *this; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -43,10 +43,16 @@ initializeRISCVInsertVSETVLIPass(*PR); } -static StringRef computeDataLayout(const Triple &TT) { +static StringRef computeDataLayout(const Triple &TT, + const TargetOptions &Options) { if (TT.isArch64Bit()) return "e-m:e-p:64:64-i64:64-i128:128-n64-S128"; assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); + + StringRef ABIName = Options.MCOptions.getABIName(); + if (ABIName == "ilp32e") + return "e-m:e-p:32:32-i64:64-n32-S32"; + return "e-m:e-p:32:32-i64:64-n32-S128"; } @@ -63,7 +69,7 @@ Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT) - : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + : LLVMTargetMachine(T, computeDataLayout(TT, Options), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()) { diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32 +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32E ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=LP64 ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ @@ -14,8 +16,8 @@ @var = global [32 x float] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. -; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. +; All floating point registers are temporaries for the ilp32, ilp32e, and lp64 +; ABIs. fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns ; something appropriate. @@ -91,6 +93,76 @@ ; ILP32-NEXT: fsw ft0, %lo(var)(a0) ; ILP32-NEXT: ret ; +; ILP32E-LABEL: callee: +; ILP32E: # %bb.0: +; ILP32E-NEXT: lui a0, %hi(var) +; ILP32E-NEXT: flw ft0, %lo(var)(a0) +; ILP32E-NEXT: flw ft1, %lo(var+4)(a0) +; ILP32E-NEXT: flw ft2, %lo(var+8)(a0) +; ILP32E-NEXT: flw ft3, %lo(var+12)(a0) +; ILP32E-NEXT: addi a1, a0, %lo(var) +; ILP32E-NEXT: flw ft4, 16(a1) +; ILP32E-NEXT: flw ft5, 20(a1) +; ILP32E-NEXT: flw ft6, 24(a1) +; ILP32E-NEXT: flw ft7, 28(a1) +; ILP32E-NEXT: flw fa0, 32(a1) +; ILP32E-NEXT: flw fa1, 36(a1) +; ILP32E-NEXT: flw fa2, 40(a1) +; ILP32E-NEXT: flw fa3, 44(a1) +; ILP32E-NEXT: flw fa4, 48(a1) +; ILP32E-NEXT: flw fa5, 52(a1) +; ILP32E-NEXT: flw fa6, 56(a1) +; ILP32E-NEXT: flw fa7, 60(a1) +; ILP32E-NEXT: flw ft8, 64(a1) +; ILP32E-NEXT: flw ft9, 68(a1) +; ILP32E-NEXT: flw ft10, 72(a1) +; ILP32E-NEXT: flw ft11, 76(a1) +; ILP32E-NEXT: flw fs0, 80(a1) +; ILP32E-NEXT: flw fs1, 84(a1) +; ILP32E-NEXT: flw fs2, 88(a1) +; ILP32E-NEXT: flw fs3, 92(a1) +; ILP32E-NEXT: flw fs4, 96(a1) +; ILP32E-NEXT: flw fs5, 100(a1) +; ILP32E-NEXT: flw fs6, 104(a1) +; ILP32E-NEXT: flw fs7, 108(a1) +; ILP32E-NEXT: flw fs8, 124(a1) +; ILP32E-NEXT: flw fs9, 120(a1) +; ILP32E-NEXT: flw fs10, 116(a1) +; ILP32E-NEXT: flw fs11, 112(a1) +; ILP32E-NEXT: fsw fs8, 124(a1) +; ILP32E-NEXT: fsw fs9, 120(a1) +; ILP32E-NEXT: fsw fs10, 116(a1) +; ILP32E-NEXT: fsw fs11, 112(a1) +; ILP32E-NEXT: fsw fs7, 108(a1) +; ILP32E-NEXT: fsw fs6, 104(a1) +; ILP32E-NEXT: fsw fs5, 100(a1) +; ILP32E-NEXT: fsw fs4, 96(a1) +; ILP32E-NEXT: fsw fs3, 92(a1) +; ILP32E-NEXT: fsw fs2, 88(a1) +; ILP32E-NEXT: fsw fs1, 84(a1) +; ILP32E-NEXT: fsw fs0, 80(a1) +; ILP32E-NEXT: fsw ft11, 76(a1) +; ILP32E-NEXT: fsw ft10, 72(a1) +; ILP32E-NEXT: fsw ft9, 68(a1) +; ILP32E-NEXT: fsw ft8, 64(a1) +; ILP32E-NEXT: fsw fa7, 60(a1) +; ILP32E-NEXT: fsw fa6, 56(a1) +; ILP32E-NEXT: fsw fa5, 52(a1) +; ILP32E-NEXT: fsw fa4, 48(a1) +; ILP32E-NEXT: fsw fa3, 44(a1) +; ILP32E-NEXT: fsw fa2, 40(a1) +; ILP32E-NEXT: fsw fa1, 36(a1) +; ILP32E-NEXT: fsw fa0, 32(a1) +; ILP32E-NEXT: fsw ft7, 28(a1) +; ILP32E-NEXT: fsw ft6, 24(a1) +; ILP32E-NEXT: fsw ft5, 20(a1) +; ILP32E-NEXT: fsw ft4, 16(a1) +; ILP32E-NEXT: fsw ft3, %lo(var+12)(a0) +; ILP32E-NEXT: fsw ft2, %lo(var+8)(a0) +; ILP32E-NEXT: fsw ft1, %lo(var+4)(a0) +; ILP32E-NEXT: fsw ft0, %lo(var)(a0) +; ILP32E-NEXT: ret +; ; LP64-LABEL: callee: ; LP64: # %bb.0: ; LP64-NEXT: lui a0, %hi(var) @@ -700,6 +772,149 @@ ; ILP32-NEXT: addi sp, sp, 144 ; ILP32-NEXT: ret ; +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: +; ILP32E-NEXT: addi sp, sp, -140 +; ILP32E-NEXT: sw ra, 136(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 132(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s1, 128(sp) # 4-byte Folded Spill +; ILP32E-NEXT: lui s0, %hi(var) +; ILP32E-NEXT: flw ft0, %lo(var)(s0) +; ILP32E-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, %lo(var+4)(s0) +; ILP32E-NEXT: fsw ft0, 120(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, %lo(var+8)(s0) +; ILP32E-NEXT: fsw ft0, 116(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, %lo(var+12)(s0) +; ILP32E-NEXT: fsw ft0, 112(sp) # 4-byte Folded Spill +; ILP32E-NEXT: addi s1, s0, %lo(var) +; ILP32E-NEXT: flw ft0, 16(s1) +; ILP32E-NEXT: fsw ft0, 108(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 20(s1) +; ILP32E-NEXT: fsw ft0, 104(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 24(s1) +; ILP32E-NEXT: fsw ft0, 100(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 28(s1) +; ILP32E-NEXT: fsw ft0, 96(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 32(s1) +; ILP32E-NEXT: fsw ft0, 92(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 36(s1) +; ILP32E-NEXT: fsw ft0, 88(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 40(s1) +; ILP32E-NEXT: fsw ft0, 84(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 44(s1) +; ILP32E-NEXT: fsw ft0, 80(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 48(s1) +; ILP32E-NEXT: fsw ft0, 76(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 52(s1) +; ILP32E-NEXT: fsw ft0, 72(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 56(s1) +; ILP32E-NEXT: fsw ft0, 68(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 60(s1) +; ILP32E-NEXT: fsw ft0, 64(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 64(s1) +; ILP32E-NEXT: fsw ft0, 60(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 68(s1) +; ILP32E-NEXT: fsw ft0, 56(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 72(s1) +; ILP32E-NEXT: fsw ft0, 52(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 76(s1) +; ILP32E-NEXT: fsw ft0, 48(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 80(s1) +; ILP32E-NEXT: fsw ft0, 44(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 84(s1) +; ILP32E-NEXT: fsw ft0, 40(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 88(s1) +; ILP32E-NEXT: fsw ft0, 36(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 92(s1) +; ILP32E-NEXT: fsw ft0, 32(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 96(s1) +; ILP32E-NEXT: fsw ft0, 28(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 100(s1) +; ILP32E-NEXT: fsw ft0, 24(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 104(s1) +; ILP32E-NEXT: fsw ft0, 20(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 108(s1) +; ILP32E-NEXT: fsw ft0, 16(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 112(s1) +; ILP32E-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 116(s1) +; ILP32E-NEXT: fsw ft0, 8(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 120(s1) +; ILP32E-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 124(s1) +; ILP32E-NEXT: fsw ft0, 0(sp) # 4-byte Folded Spill +; ILP32E-NEXT: call callee@plt +; ILP32E-NEXT: flw ft0, 0(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 124(s1) +; ILP32E-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 120(s1) +; ILP32E-NEXT: flw ft0, 8(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 116(s1) +; ILP32E-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 112(s1) +; ILP32E-NEXT: flw ft0, 16(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 108(s1) +; ILP32E-NEXT: flw ft0, 20(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 104(s1) +; ILP32E-NEXT: flw ft0, 24(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 100(s1) +; ILP32E-NEXT: flw ft0, 28(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 96(s1) +; ILP32E-NEXT: flw ft0, 32(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 92(s1) +; ILP32E-NEXT: flw ft0, 36(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 88(s1) +; ILP32E-NEXT: flw ft0, 40(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 84(s1) +; ILP32E-NEXT: flw ft0, 44(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 80(s1) +; ILP32E-NEXT: flw ft0, 48(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 76(s1) +; ILP32E-NEXT: flw ft0, 52(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 72(s1) +; ILP32E-NEXT: flw ft0, 56(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 68(s1) +; ILP32E-NEXT: flw ft0, 60(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 64(s1) +; ILP32E-NEXT: flw ft0, 64(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 60(s1) +; ILP32E-NEXT: flw ft0, 68(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 56(s1) +; ILP32E-NEXT: flw ft0, 72(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 52(s1) +; ILP32E-NEXT: flw ft0, 76(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 48(s1) +; ILP32E-NEXT: flw ft0, 80(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 44(s1) +; ILP32E-NEXT: flw ft0, 84(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 40(s1) +; ILP32E-NEXT: flw ft0, 88(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 36(s1) +; ILP32E-NEXT: flw ft0, 92(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 32(s1) +; ILP32E-NEXT: flw ft0, 96(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 28(s1) +; ILP32E-NEXT: flw ft0, 100(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 24(s1) +; ILP32E-NEXT: flw ft0, 104(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 20(s1) +; ILP32E-NEXT: flw ft0, 108(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 16(s1) +; ILP32E-NEXT: flw ft0, 112(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, %lo(var+12)(s0) +; ILP32E-NEXT: flw ft0, 116(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, %lo(var+8)(s0) +; ILP32E-NEXT: flw ft0, 120(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, %lo(var+4)(s0) +; ILP32E-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, %lo(var)(s0) +; ILP32E-NEXT: lw ra, 136(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s0, 132(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s1, 128(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 140 +; ILP32E-NEXT: ret +; ; LP64-LABEL: caller: ; LP64: # %bb.0: ; LP64-NEXT: addi sp, sp, -160 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32f -verify-machineinstrs < %s \ @@ -136,6 +138,96 @@ ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: callee: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -36 +; RV32I-ILP32E-NEXT: sw ra, 32(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a7, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a7) +; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a7) +; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a7) +; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a7) +; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi a5, a7, %lo(var) +; RV32I-ILP32E-NEXT: lw a0, 16(a5) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 20(a5) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw t4, 24(a5) +; RV32I-ILP32E-NEXT: lw t5, 28(a5) +; RV32I-ILP32E-NEXT: lw t6, 32(a5) +; RV32I-ILP32E-NEXT: lw s2, 36(a5) +; RV32I-ILP32E-NEXT: lw s3, 40(a5) +; RV32I-ILP32E-NEXT: lw s4, 44(a5) +; RV32I-ILP32E-NEXT: lw s5, 48(a5) +; RV32I-ILP32E-NEXT: lw s6, 52(a5) +; RV32I-ILP32E-NEXT: lw s7, 56(a5) +; RV32I-ILP32E-NEXT: lw s8, 60(a5) +; RV32I-ILP32E-NEXT: lw s9, 64(a5) +; RV32I-ILP32E-NEXT: lw s10, 68(a5) +; RV32I-ILP32E-NEXT: lw s11, 72(a5) +; RV32I-ILP32E-NEXT: lw ra, 76(a5) +; RV32I-ILP32E-NEXT: lw t3, 80(a5) +; RV32I-ILP32E-NEXT: lw t2, 84(a5) +; RV32I-ILP32E-NEXT: lw t1, 88(a5) +; RV32I-ILP32E-NEXT: lw s0, 92(a5) +; RV32I-ILP32E-NEXT: lw s1, 96(a5) +; RV32I-ILP32E-NEXT: lw t0, 100(a5) +; RV32I-ILP32E-NEXT: lw a6, 104(a5) +; RV32I-ILP32E-NEXT: lw a4, 108(a5) +; RV32I-ILP32E-NEXT: lw a0, 124(a5) +; RV32I-ILP32E-NEXT: lw a1, 120(a5) +; RV32I-ILP32E-NEXT: lw a2, 116(a5) +; RV32I-ILP32E-NEXT: lw a3, 112(a5) +; RV32I-ILP32E-NEXT: sw a0, 124(a5) +; RV32I-ILP32E-NEXT: sw a1, 120(a5) +; RV32I-ILP32E-NEXT: sw a2, 116(a5) +; RV32I-ILP32E-NEXT: sw a3, 112(a5) +; RV32I-ILP32E-NEXT: sw a4, 108(a5) +; RV32I-ILP32E-NEXT: sw a6, 104(a5) +; RV32I-ILP32E-NEXT: sw t0, 100(a5) +; RV32I-ILP32E-NEXT: sw s1, 96(a5) +; RV32I-ILP32E-NEXT: sw s0, 92(a5) +; RV32I-ILP32E-NEXT: sw t1, 88(a5) +; RV32I-ILP32E-NEXT: sw t2, 84(a5) +; RV32I-ILP32E-NEXT: sw t3, 80(a5) +; RV32I-ILP32E-NEXT: sw ra, 76(a5) +; RV32I-ILP32E-NEXT: sw s11, 72(a5) +; RV32I-ILP32E-NEXT: sw s10, 68(a5) +; RV32I-ILP32E-NEXT: sw s9, 64(a5) +; RV32I-ILP32E-NEXT: sw s8, 60(a5) +; RV32I-ILP32E-NEXT: sw s7, 56(a5) +; RV32I-ILP32E-NEXT: sw s6, 52(a5) +; RV32I-ILP32E-NEXT: sw s5, 48(a5) +; RV32I-ILP32E-NEXT: sw s4, 44(a5) +; RV32I-ILP32E-NEXT: sw s3, 40(a5) +; RV32I-ILP32E-NEXT: sw s2, 36(a5) +; RV32I-ILP32E-NEXT: sw t6, 32(a5) +; RV32I-ILP32E-NEXT: sw t5, 28(a5) +; RV32I-ILP32E-NEXT: sw t4, 24(a5) +; RV32I-ILP32E-NEXT: lw a0, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 20(a5) +; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 16(a5) +; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a7) +; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a7) +; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a7) +; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a7) +; RV32I-ILP32E-NEXT: lw ra, 32(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 36 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: callee: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -80 @@ -623,6 +715,148 @@ ; RV32I-NEXT: addi sp, sp, 144 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -136 +; RV32I-ILP32E-NEXT: sw ra, 132(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 128(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 124(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a0, %hi(var) +; RV32I-ILP32E-NEXT: lw a1, %lo(var)(a0) +; RV32I-ILP32E-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+4)(a0) +; RV32I-ILP32E-NEXT: sw a1, 116(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+8)(a0) +; RV32I-ILP32E-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+12)(a0) +; RV32I-ILP32E-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi s1, a0, %lo(var) +; RV32I-ILP32E-NEXT: lw a0, 16(s1) +; RV32I-ILP32E-NEXT: sw a0, 104(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 20(s1) +; RV32I-ILP32E-NEXT: sw a0, 100(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 24(s1) +; RV32I-ILP32E-NEXT: sw a0, 96(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 28(s1) +; RV32I-ILP32E-NEXT: sw a0, 92(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 32(s1) +; RV32I-ILP32E-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 36(s1) +; RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 40(s1) +; RV32I-ILP32E-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 44(s1) +; RV32I-ILP32E-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 48(s1) +; RV32I-ILP32E-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 52(s1) +; RV32I-ILP32E-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 56(s1) +; RV32I-ILP32E-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 60(s1) +; RV32I-ILP32E-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 64(s1) +; RV32I-ILP32E-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 68(s1) +; RV32I-ILP32E-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 72(s1) +; RV32I-ILP32E-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 76(s1) +; RV32I-ILP32E-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 80(s1) +; RV32I-ILP32E-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 84(s1) +; RV32I-ILP32E-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 88(s1) +; RV32I-ILP32E-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 92(s1) +; RV32I-ILP32E-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 96(s1) +; RV32I-ILP32E-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 100(s1) +; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 104(s1) +; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 108(s1) +; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 112(s1) +; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 116(s1) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 120(s1) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw s0, 124(s1) +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: sw s0, 124(s1) +; RV32I-ILP32E-NEXT: lw a0, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 120(s1) +; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 116(s1) +; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 112(s1) +; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 108(s1) +; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 104(s1) +; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 100(s1) +; RV32I-ILP32E-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 96(s1) +; RV32I-ILP32E-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 92(s1) +; RV32I-ILP32E-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 88(s1) +; RV32I-ILP32E-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 84(s1) +; RV32I-ILP32E-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 80(s1) +; RV32I-ILP32E-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 76(s1) +; RV32I-ILP32E-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 72(s1) +; RV32I-ILP32E-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 68(s1) +; RV32I-ILP32E-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 64(s1) +; RV32I-ILP32E-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 60(s1) +; RV32I-ILP32E-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 56(s1) +; RV32I-ILP32E-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 52(s1) +; RV32I-ILP32E-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 48(s1) +; RV32I-ILP32E-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 44(s1) +; RV32I-ILP32E-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 40(s1) +; RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 36(s1) +; RV32I-ILP32E-NEXT: lw a0, 88(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 32(s1) +; RV32I-ILP32E-NEXT: lw a0, 92(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 28(s1) +; RV32I-ILP32E-NEXT: lw a0, 96(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 24(s1) +; RV32I-ILP32E-NEXT: lw a0, 100(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 20(s1) +; RV32I-ILP32E-NEXT: lw a0, 104(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 16(s1) +; RV32I-ILP32E-NEXT: lui a1, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, 108(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a1) +; RV32I-ILP32E-NEXT: lw a0, 112(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a1) +; RV32I-ILP32E-NEXT: lw a0, 116(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a1) +; RV32I-ILP32E-NEXT: lw a0, 120(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a1) +; RV32I-ILP32E-NEXT: lw ra, 132(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 128(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 124(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 136 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: caller: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -144 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -0,0 +1,1601 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s + +; As well as calling convention details, we check that ra and fp are +; consistently stored to fp-4 and fp-8. + +; Any tests that would have identical output for some combination of the ilp32* +; ABIs belong in calling-conv-*-common.ll. This file contains tests that will +; have different output across those ABIs. i.e. where some arguments would be +; passed according to the floating point ABI, or where the stack is aligned to +; a different boundary. + +; 'g' should be passed by stack. +define i32 @pass_by_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) { +; ILP32E-FPELIM-LABEL: pass_by_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a6, 0(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: add a0, a2, a0 +; ILP32E-FPELIM-NEXT: add a0, a3, a0 +; ILP32E-FPELIM-NEXT: add a0, a4, a0 +; ILP32E-FPELIM-NEXT: add a0, a5, a0 +; ILP32E-FPELIM-NEXT: add a0, a6, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: pass_by_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a6, 0(s0) +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: add a0, a2, a0 +; ILP32E-WITHFP-NEXT: add a0, a3, a0 +; ILP32E-WITHFP-NEXT: add a0, a4, a0 +; ILP32E-WITHFP-NEXT: add a0, a5, a0 +; ILP32E-WITHFP-NEXT: add a0, a6, a0 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = add i32 %a, %b + %2 = add i32 %c, %1 + %3 = add i32 %d, %2 + %4 = add i32 %e, %3 + %5 = add i32 %f, %4 + %6 = add i32 %g, %5 + ret i32 %6 +} + +define i32 @callee_float_in_regs(i32 %a, float %b) { +; ILP32E-FPELIM-LABEL: callee_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: call __fixsfsi@plt +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: call __fixsfsi@plt +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret + %b_fptosi = fptosi float %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_float_in_regs() { +; ILP32E-FPELIM-LABEL: caller_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: lui a1, 262144 +; ILP32E-FPELIM-NEXT: call callee_float_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a1, 262144 +; ILP32E-WITHFP-NEXT: call callee_float_in_regs@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_float_in_regs(i32 1, float 2.0) + ret i32 %1 +} + +define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) { +; ILP32E-FPELIM-LABEL: callee_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a0, 8(s0) +; ILP32E-FPELIM-NEXT: lw a1, 0(s0) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: addi sp, s0, -8 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a0, 8(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(s0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: addi sp, s0, -8 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = trunc i64 %d to i32 + %2 = bitcast float %e to i32 + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i32 @caller_float_on_stack() { +; ILP32E-FPELIM-LABEL: caller_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lui a0, 264704 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: li a1, 4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a2, 2 +; ILP32E-FPELIM-NEXT: li a4, 3 +; ILP32E-FPELIM-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: li a3, 0 +; ILP32E-FPELIM-NEXT: li a5, 0 +; ILP32E-FPELIM-NEXT: call callee_float_on_stack@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lui a0, 264704 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: li a1, 4 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a2, 2 +; ILP32E-WITHFP-NEXT: li a4, 3 +; ILP32E-WITHFP-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: li a3, 0 +; ILP32E-WITHFP-NEXT: li a5, 0 +; ILP32E-WITHFP-NEXT: call callee_float_on_stack@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0) + ret i32 %1 +} + +define float @callee_tiny_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a0, 260096 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a0, 260096 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + ret float 1.0 +} + +define i32 @caller_tiny_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_tiny_scalar_ret@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_tiny_scalar_ret@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + ret i32 %2 +} + +; Check that on RV32 ilp32e, double is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_double_in_regs(i32 %a, double %b) { +; ILP32E-FPELIM-LABEL: callee_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: mv a1, a2 +; ILP32E-FPELIM-NEXT: call __fixdfsi@plt +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: mv a1, a2 +; ILP32E-WITHFP-NEXT: call __fixdfsi@plt +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret + %b_fptosi = fptosi double %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_double_in_regs() { +; ILP32E-FPELIM-LABEL: caller_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: lui a2, 262144 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: call callee_double_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a2, 262144 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call callee_double_in_regs@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_double_in_regs(i32 1, double 2.0) + ret i32 %1 +} + +; Check 2x*xlen values are aligned appropriately when passed on the stack +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %f, i32 %g, i32 %h, double %i, i32 %j, [2 x i32] %k) { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: callee_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a0, 0(a2) +; ILP32E-FPELIM-NEXT: lw a1, 28(s0) +; ILP32E-FPELIM-NEXT: lw a2, 4(s0) +; ILP32E-FPELIM-NEXT: lw a3, 8(s0) +; ILP32E-FPELIM-NEXT: lw a4, 16(s0) +; ILP32E-FPELIM-NEXT: lw a5, 24(s0) +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: add a0, a0, a3 +; ILP32E-FPELIM-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -8 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a0, 0(a2) +; ILP32E-WITHFP-NEXT: lw a1, 28(s0) +; ILP32E-WITHFP-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-NEXT: lw a4, 16(s0) +; ILP32E-WITHFP-NEXT: lw a5, 24(s0) +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: add a0, a0, a3 +; ILP32E-WITHFP-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -8 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = bitcast fp128 %c to i128 + %2 = trunc i128 %1 to i32 + %3 = add i32 %2, %g + %4 = add i32 %3, %h + %5 = bitcast double %i to i64 + %6 = trunc i64 %5 to i32 + %7 = add i32 %4, %6 + %8 = add i32 %7, %j + %9 = extractvalue [2 x i32] %k, 0 + %10 = add i32 %8, %9 + ret i32 %10 +} + +define void @caller_aligned_stack() { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: caller_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -80 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 80 +; ILP32E-FPELIM-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 80 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: li a0, 18 +; ILP32E-FPELIM-NEXT: sw a0, 32(sp) +; ILP32E-FPELIM-NEXT: li a0, 17 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: li a0, 16 +; ILP32E-FPELIM-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262236 +; ILP32E-FPELIM-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-NEXT: lui a0, 377487 +; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: li a0, 15 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: li a0, 14 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a0, 4 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262153 +; ILP32E-FPELIM-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-NEXT: sw a0, 60(sp) +; ILP32E-FPELIM-NEXT: lui a0, 545260 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-NEXT: sw a0, 56(sp) +; ILP32E-FPELIM-NEXT: lui a0, 964690 +; ILP32E-FPELIM-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-NEXT: sw a0, 52(sp) +; ILP32E-FPELIM-NEXT: lui a0, 335544 +; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: lui a0, 688509 +; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 11 +; ILP32E-FPELIM-NEXT: addi a2, sp, 48 +; ILP32E-FPELIM-NEXT: li a3, 12 +; ILP32E-FPELIM-NEXT: li a4, 13 +; ILP32E-FPELIM-NEXT: sw a6, 48(sp) +; ILP32E-FPELIM-NEXT: call callee_aligned_stack@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -80 +; ILP32E-FPELIM-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 80 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -80 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 80 +; ILP32E-WITHFP-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 80 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: li a0, 18 +; ILP32E-WITHFP-NEXT: sw a0, 32(sp) +; ILP32E-WITHFP-NEXT: li a0, 17 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: li a0, 16 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: li a0, 15 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: li a0, 14 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a0, 4 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 60(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 56(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 52(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 11 +; ILP32E-WITHFP-NEXT: addi a2, sp, 48 +; ILP32E-WITHFP-NEXT: li a3, 12 +; ILP32E-WITHFP-NEXT: li a4, 13 +; ILP32E-WITHFP-NEXT: sw a6, 48(sp) +; ILP32E-WITHFP-NEXT: call callee_aligned_stack@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -80 +; ILP32E-WITHFP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 80 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_aligned_stack(i32 1, i32 11, + fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, + i64 20000000000, i32 14, i32 15, double 2.720000e+00, i32 16, + [2 x i32] [i32 17, i32 18]) + ret void +} + +define double @callee_small_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 261888 +; ILP32E-FPELIM-NEXT: li a0, 0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a1, 261888 +; ILP32E-WITHFP-NEXT: li a0, 0 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + ret double 1.0 +} + +define i64 @caller_small_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_small_scalar_ret@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_small_scalar_ret@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = call double @callee_small_scalar_ret() + %2 = bitcast double %1 to i64 + ret i64 %2 +} + +; Check that on RV32, i64 is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_i64_in_regs(i32 %a, i64 %b) { +; ILP32E-FPELIM-LABEL: callee_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %b_trunc = trunc i64 %b to i32 + %1 = add i32 %a, %b_trunc + ret i32 %1 +} + +define i32 @caller_i64_in_regs() { +; ILP32E-FPELIM-LABEL: caller_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 0 +; ILP32E-FPELIM-NEXT: call callee_i64_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call callee_i64_in_regs@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_i64_in_regs(i32 1, i64 2) + ret i32 %1 +} + +; Check that the stack is used once the GPRs are exhausted + +define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) { +; ILP32E-FPELIM-LABEL: callee_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a6, 16(s0) +; ILP32E-FPELIM-NEXT: lw a7, 0(s0) +; ILP32E-FPELIM-NEXT: lw t0, 8(s0) +; ILP32E-FPELIM-NEXT: lw t1, 12(s0) +; ILP32E-FPELIM-NEXT: andi t2, a0, 255 +; ILP32E-FPELIM-NEXT: lui a0, 16 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1 +; ILP32E-FPELIM-NEXT: and a0, a1, a0 +; ILP32E-FPELIM-NEXT: add a0, t2, a0 +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: xor a1, a4, t1 +; ILP32E-FPELIM-NEXT: xor a2, a3, t0 +; ILP32E-FPELIM-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-NEXT: seqz a1, a1 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a7 +; ILP32E-FPELIM-NEXT: add a0, a0, a6 +; ILP32E-FPELIM-NEXT: addi sp, s0, -8 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a6, 16(s0) +; ILP32E-WITHFP-NEXT: lw a7, 0(s0) +; ILP32E-WITHFP-NEXT: lw t0, 8(s0) +; ILP32E-WITHFP-NEXT: lw t1, 12(s0) +; ILP32E-WITHFP-NEXT: andi t2, a0, 255 +; ILP32E-WITHFP-NEXT: lui a0, 16 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1 +; ILP32E-WITHFP-NEXT: and a0, a1, a0 +; ILP32E-WITHFP-NEXT: add a0, t2, a0 +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: xor a1, a4, t1 +; ILP32E-WITHFP-NEXT: xor a2, a3, t0 +; ILP32E-WITHFP-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-NEXT: seqz a1, a1 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a7 +; ILP32E-WITHFP-NEXT: add a0, a0, a6 +; ILP32E-WITHFP-NEXT: addi sp, s0, -8 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %a_ext = zext i8 %a to i32 + %b_ext = zext i16 %b to i32 + %1 = add i32 %a_ext, %b_ext + %2 = add i32 %1, %c + %3 = icmp eq i64 %d, %g + %4 = zext i1 %3 to i32 + %5 = add i32 %4, %2 + %6 = add i32 %5, %e + %7 = add i32 %6, %f + %8 = add i32 %7, %h + ret i32 %8 +} + +define i32 @caller_many_scalars() { +; ILP32E-FPELIM-LABEL: caller_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: li a0, 8 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 12(sp) +; ILP32E-FPELIM-NEXT: li a0, 7 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: li a4, 6 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: li a5, 5 +; ILP32E-FPELIM-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-NEXT: li a4, 0 +; ILP32E-FPELIM-NEXT: call callee_many_scalars@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: li a0, 8 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 12(sp) +; ILP32E-WITHFP-NEXT: li a0, 7 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: li a4, 6 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: li a5, 5 +; ILP32E-WITHFP-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-NEXT: li a4, 0 +; ILP32E-WITHFP-NEXT: call callee_many_scalars@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i32 5, i32 6, i64 7, i32 8) + ret i32 %1 +} + +; Check that i128 and fp128 are passed indirectly + +define i32 @callee_large_scalars(i128 %a, fp128 %b) { +; ILP32E-FPELIM-LABEL: callee_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a6, 0(a1) +; ILP32E-FPELIM-NEXT: lw a7, 0(a0) +; ILP32E-FPELIM-NEXT: lw a4, 4(a1) +; ILP32E-FPELIM-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-NEXT: lw a2, 12(a0) +; ILP32E-FPELIM-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: xor a2, a2, a5 +; ILP32E-FPELIM-NEXT: xor a3, a3, a4 +; ILP32E-FPELIM-NEXT: or a2, a3, a2 +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a1, a7, a6 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a2 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a6, 0(a1) +; ILP32E-WITHFP-NEXT: lw a7, 0(a0) +; ILP32E-WITHFP-NEXT: lw a4, 4(a1) +; ILP32E-WITHFP-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-NEXT: lw a2, 12(a0) +; ILP32E-WITHFP-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: xor a2, a2, a5 +; ILP32E-WITHFP-NEXT: xor a3, a3, a4 +; ILP32E-WITHFP-NEXT: or a2, a3, a2 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a1, a7, a6 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a2 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %b_bitcast = bitcast fp128 %b to i128 + %1 = icmp eq i128 %a, %b_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars() { +; ILP32E-FPELIM-LABEL: caller_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 48 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: sw zero, 0(sp) +; ILP32E-FPELIM-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-NEXT: sw zero, 32(sp) +; ILP32E-FPELIM-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-NEXT: li a2, 1 +; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: mv a1, sp +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -48 +; ILP32E-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: sw zero, 0(sp) +; ILP32E-WITHFP-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-NEXT: sw zero, 32(sp) +; ILP32E-WITHFP-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-NEXT: li a2, 1 +; ILP32E-WITHFP-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-NEXT: mv a1, sp +; ILP32E-WITHFP-NEXT: sw a2, 24(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -48 +; ILP32E-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Check that arguments larger than 2*xlen are handled correctly when their +; address is passed on the stack rather than in memory + +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) { +; ILP32E-FPELIM-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a6, 0(a0) +; ILP32E-FPELIM-NEXT: lw a7, 0(a1) +; ILP32E-FPELIM-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-NEXT: lw a2, 12(a1) +; ILP32E-FPELIM-NEXT: lw a3, 4(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: xor a2, a2, a5 +; ILP32E-FPELIM-NEXT: xor a3, a3, a4 +; ILP32E-FPELIM-NEXT: or a2, a3, a2 +; ILP32E-FPELIM-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-NEXT: xor a1, a7, a6 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a2 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a0, 12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-NEXT: lw a6, 0(a0) +; ILP32E-WITHFP-NEXT: lw a7, 0(a1) +; ILP32E-WITHFP-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-NEXT: lw a2, 12(a1) +; ILP32E-WITHFP-NEXT: lw a3, 4(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: xor a2, a2, a5 +; ILP32E-WITHFP-NEXT: xor a3, a3, a4 +; ILP32E-WITHFP-NEXT: or a2, a3, a2 +; ILP32E-WITHFP-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-NEXT: xor a1, a7, a6 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a2 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %j_bitcast = bitcast fp128 %j to i128 + %1 = icmp eq i128 %h, %j_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars_exhausted_regs() { +; ILP32E-FPELIM-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: li a0, 9 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 40 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a0, 7 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 52(sp) +; ILP32E-FPELIM-NEXT: sw zero, 48(sp) +; ILP32E-FPELIM-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-NEXT: li a6, 8 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: li a4, 5 +; ILP32E-FPELIM-NEXT: li a5, 6 +; ILP32E-FPELIM-NEXT: sw a6, 40(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: li a0, 9 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, sp, 40 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a0, 7 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 52(sp) +; ILP32E-WITHFP-NEXT: sw zero, 48(sp) +; ILP32E-WITHFP-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-NEXT: li a6, 8 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: li a4, 5 +; ILP32E-WITHFP-NEXT: li a5, 6 +; ILP32E-WITHFP-NEXT: sw a6, 40(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_large_scalars_exhausted_regs( + i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, + fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Ensure that libcalls generated in the middle-end obey the calling convention + +define i32 @caller_mixed_scalar_libcalls(i64 %a) { +; ILP32E-FPELIM-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a2, a1 +; ILP32E-FPELIM-NEXT: mv a1, a0 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call __floatditf@plt +; ILP32E-FPELIM-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a2, a1 +; ILP32E-WITHFP-NEXT: mv a1, a0 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call __floatditf@plt +; ILP32E-WITHFP-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret + %1 = sitofp i64 %a to fp128 + %2 = bitcast fp128 %1 to i128 + %3 = trunc i128 %2 to i32 + ret i32 %3 +} + + +; Check passing of coerced integer arrays + +%struct.small = type { i32, i32* } + +define i32 @callee_small_coerced_struct([2 x i32] %a.coerce) { +; ILP32E-FPELIM-LABEL: callee_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = extractvalue [2 x i32] %a.coerce, 0 + %2 = extractvalue [2 x i32] %a.coerce, 1 + %3 = icmp eq i32 %1, %2 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +define i32 @caller_small_coerced_struct() { +; ILP32E-FPELIM-LABEL: caller_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: call callee_small_coerced_struct@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: call callee_small_coerced_struct@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = call i32 @callee_small_coerced_struct([2 x i32] [i32 1, i32 2]) + ret i32 %1 +} + +; Check large struct arguments, which are passed byval + +%struct.large = type { i32, i32, i32, i32 } + +define i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %a) { +; ILP32E-FPELIM-LABEL: callee_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 0 + %2 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 3 + %3 = load i32, i32* %1 + %4 = load i32, i32* %2 + %5 = add i32 %3, %4 + ret i32 %5 +} + +define i32 @caller_large_struct() { +; ILP32E-FPELIM-LABEL: caller_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -36 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-FPELIM-NEXT: sw ra, 32(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: sw a2, 8(sp) +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_struct@plt +; ILP32E-FPELIM-NEXT: lw ra, 32(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 36 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-NEXT: sw ra, 36(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 32(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: sw a0, -24(s0) +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: sw a1, -20(s0) +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: sw a0, -40(s0) +; ILP32E-WITHFP-NEXT: sw a1, -36(s0) +; ILP32E-WITHFP-NEXT: sw a2, -32(s0) +; ILP32E-WITHFP-NEXT: sw a3, -28(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, -40 +; ILP32E-WITHFP-NEXT: call callee_large_struct@plt +; ILP32E-WITHFP-NEXT: lw ra, 36(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret + %ls = alloca %struct.large, align 4 + %1 = bitcast %struct.large* %ls to i8* + %a = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 0 + store i32 1, i32* %a + %b = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 1 + store i32 2, i32* %b + %c = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 2 + store i32 3, i32* %c + %d = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 3 + store i32 4, i32* %d + %2 = call i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %ls) + ret i32 %2 +} + +; Check return of 2x xlen structs + +define %struct.small @callee_small_struct_ret() { +; ILP32E-FPELIM-LABEL: callee_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + ret %struct.small { i32 1, i32* null } +} + +define i32 @caller_small_struct_ret() { +; ILP32E-FPELIM-LABEL: caller_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_small_struct_ret@plt +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_small_struct_ret@plt +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %1 = call %struct.small @callee_small_struct_ret() + %2 = extractvalue %struct.small %1, 0 + %3 = extractvalue %struct.small %1, 1 + %4 = ptrtoint i32* %3 to i32 + %5 = add i32 %2, %4 + ret i32 %5 +} + +; Check return of >2x xlen scalars + +define fp128 @callee_large_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 524272 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a1, 524272 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + ret fp128 0xL00000000000000007FFF000000000000 +} + +define void @caller_large_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_scalar_ret@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call callee_large_scalar_ret@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret + %1 = call fp128 @callee_large_scalar_ret() + ret void +} + +; Check return of >2x xlen structs + +define void @callee_large_struct_ret(%struct.large* noalias sret(%struct.large) %agg.result) { +; ILP32E-FPELIM-LABEL: callee_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: li a1, 1 +; ILP32E-FPELIM-NEXT: sw a1, 0(a0) +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: sw a1, 4(a0) +; ILP32E-FPELIM-NEXT: li a1, 3 +; ILP32E-FPELIM-NEXT: sw a1, 8(a0) +; ILP32E-FPELIM-NEXT: li a1, 4 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a1, 1 +; ILP32E-WITHFP-NEXT: sw a1, 0(a0) +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: sw a1, 4(a0) +; ILP32E-WITHFP-NEXT: li a1, 3 +; ILP32E-WITHFP-NEXT: sw a1, 8(a0) +; ILP32E-WITHFP-NEXT: li a1, 4 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret + %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 0 + store i32 1, i32* %a, align 4 + %b = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 1 + store i32 2, i32* %b, align 4 + %c = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 2 + store i32 3, i32* %c, align 4 + %d = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 3 + store i32 4, i32* %d, align 4 + ret void +} + +define i32 @caller_large_struct_ret() { +; ILP32E-FPELIM-LABEL: caller_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_struct_ret@plt +; ILP32E-FPELIM-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call callee_large_struct_ret@plt +; ILP32E-WITHFP-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lw a1, 12(sp) +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret + %1 = alloca %struct.large + call void @callee_large_struct_ret(%struct.large* sret(%struct.large) %1) + %2 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 0 + %3 = load i32, i32* %2 + %4 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 3 + %5 = load i32, i32* %4 + %6 = add i32 %3, %5 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32IF +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IF-ILP32E -; Exercises the ILP32 calling convention code in the case that f32 is a legal +; Exercises the ILP32/ILP32E calling convention code in the case that f32 is a legal ; type. As well as testing that lowering is correct, these tests also aim to ; check that floating point load/store or integer load/store is chosen ; optimally when floats are passed on the stack. @@ -12,6 +14,20 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: lw a0, 4(sp) ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -8 +; RV32IF-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: addi s0, sp, 8 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: lw a0, 12(s0) +; RV32IF-ILP32E-NEXT: addi sp, s0, -8 +; RV32IF-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 8 +; RV32IF-ILP32E-NEXT: ret ret float %f } @@ -23,6 +39,23 @@ ; RV32IF-NEXT: fadd.s ft0, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -8 +; RV32IF-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: addi s0, sp, 8 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: flw ft0, 12(s0) +; RV32IF-ILP32E-NEXT: flw ft1, 8(s0) +; RV32IF-ILP32E-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-ILP32E-NEXT: fmv.x.w a0, ft0 +; RV32IF-ILP32E-NEXT: addi sp, s0, -8 +; RV32IF-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 8 +; RV32IF-ILP32E-NEXT: ret %1 = fadd float %e, %f ret float %1 } @@ -47,6 +80,32 @@ ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: caller_onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -24 +; RV32IF-ILP32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: addi s0, sp, 24 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: sw a0, 12(sp) +; RV32IF-ILP32E-NEXT: lui a0, 264704 +; RV32IF-ILP32E-NEXT: sw a0, 8(sp) +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: li a1, 4 +; RV32IF-ILP32E-NEXT: li a0, 1 +; RV32IF-ILP32E-NEXT: li a2, 2 +; RV32IF-ILP32E-NEXT: li a4, 3 +; RV32IF-ILP32E-NEXT: sw a1, 0(sp) +; RV32IF-ILP32E-NEXT: li a1, 0 +; RV32IF-ILP32E-NEXT: li a3, 0 +; RV32IF-ILP32E-NEXT: li a5, 0 +; RV32IF-ILP32E-NEXT: call onstack_f32_noop@plt +; RV32IF-ILP32E-NEXT: addi sp, s0, -24 +; RV32IF-ILP32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 24 +; RV32IF-ILP32E-NEXT: ret %1 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float 5.0, float %a) ret float %1 } @@ -74,6 +133,35 @@ ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: caller_onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -24 +; RV32IF-ILP32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: addi s0, sp, 24 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: fmv.w.x ft0, a1 +; RV32IF-ILP32E-NEXT: fmv.w.x ft1, a0 +; RV32IF-ILP32E-NEXT: fadd.s ft2, ft1, ft0 +; RV32IF-ILP32E-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: li a0, 4 +; RV32IF-ILP32E-NEXT: sw a0, 0(sp) +; RV32IF-ILP32E-NEXT: fsw ft0, 12(sp) +; RV32IF-ILP32E-NEXT: li a0, 1 +; RV32IF-ILP32E-NEXT: li a2, 2 +; RV32IF-ILP32E-NEXT: li a4, 3 +; RV32IF-ILP32E-NEXT: fsw ft2, 8(sp) +; RV32IF-ILP32E-NEXT: li a1, 0 +; RV32IF-ILP32E-NEXT: li a3, 0 +; RV32IF-ILP32E-NEXT: li a5, 0 +; RV32IF-ILP32E-NEXT: call onstack_f32_noop@plt +; RV32IF-ILP32E-NEXT: addi sp, s0, -24 +; RV32IF-ILP32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 24 +; RV32IF-ILP32E-NEXT: ret %1 = fadd float %a, %b %2 = fsub float %b, %a %3 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float %1, float %2) diff --git a/llvm/test/CodeGen/RISCV/rv32e.ll b/llvm/test/CodeGen/RISCV/rv32e.ll --- a/llvm/test/CodeGen/RISCV/rv32e.ll +++ b/llvm/test/CodeGen/RISCV/rv32e.ll @@ -1,7 +1,11 @@ -; RUN: not --crash llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s -; CHECK: LLVM ERROR: Codegen not yet implemented for RV32E +; TODO: Add more tests. define void @nothing() nounwind { +; CHECK-LABEL: nothing: +; CHECK: # %bb.0: +; CHECK-NEXT: ret ret void } diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I @@ -34,6 +36,33 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: .cfi_offset s1, -12 +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv s1, sp +; RV32I-ILP32E-NEXT: addi a0, a0, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: sub a0, sp, a0 +; RV32I-ILP32E-NEXT: mv sp, a0 +; RV32I-ILP32E-NEXT: mv a1, s1 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll @@ -1,11 +1,102 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I declare void @callee(i8*) +define void @caller16() { +; RV32I-LABEL: caller16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-ILP32E-LABEL: caller16: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -16 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 16 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 16 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -16 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -16 +; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 16 +; RV32I-ILP32E-NEXT: ret +; +; RV64I-LABEL: caller16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 16 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign16() "no-realign-stack" { +; RV32I-LABEL: caller_no_realign16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-ILP32E-LABEL: caller_no_realign16: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; +; RV64I-LABEL: caller_no_realign16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 16 + call void @callee(i8* %1) + ret void +} + define void @caller32() { ; RV32I-LABEL: caller32: ; RV32I: # %bb.0: @@ -26,6 +117,25 @@ ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -32 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 32 +; RV32I-ILP32E-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 32 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -32 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -32 +; RV32I-ILP32E-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 32 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -62,6 +172,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -98,6 +220,25 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 @@ -134,6 +275,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -170,6 +323,25 @@ ; RV32I-NEXT: addi sp, sp, 128 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 128 +; RV32I-ILP32E-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -128 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -128 +; RV32I-ILP32E-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 128 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -128 @@ -206,6 +378,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -242,6 +426,25 @@ ; RV32I-NEXT: addi sp, sp, 256 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -256 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 256 +; RV32I-ILP32E-NEXT: sw ra, 252(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 248(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 256 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -256 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -256 +; RV32I-ILP32E-NEXT: lw ra, 252(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 248(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 256 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -256 @@ -278,6 +481,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -314,6 +529,25 @@ ; RV32I-NEXT: addi sp, sp, 1024 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -1024 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 1024 +; RV32I-ILP32E-NEXT: sw ra, 1020(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 1016(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 1024 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -512 +; RV32I-ILP32E-NEXT: addi a0, sp, 512 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -1024 +; RV32I-ILP32E-NEXT: lw ra, 1020(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 1016(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 1024 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -1024 @@ -350,6 +584,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -388,6 +634,27 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: andi sp, sp, -1024 +; RV32I-ILP32E-NEXT: addi a0, sp, 1024 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -2048 +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -426,6 +693,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -471,6 +750,34 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: andi sp, sp, -2048 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, -2048 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: add sp, sp, a0 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -516,6 +823,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -561,6 +880,34 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: srli a0, sp, 12 +; RV32I-ILP32E-NEXT: slli sp, a0, 12 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: add sp, sp, a0 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -606,6 +953,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 diff --git a/llvm/test/CodeGen/RISCV/target-abi-valid.ll b/llvm/test/CodeGen/RISCV/target-abi-valid.ll --- a/llvm/test/CodeGen/RISCV/target-abi-valid.ll +++ b/llvm/test/CodeGen/RISCV/target-abi-valid.ll @@ -2,6 +2,8 @@ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32 < %s \ @@ -33,8 +35,3 @@ ; CHECK-IMP-NEXT: ret ret void } - -; RUN: not --crash llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ -; RUN: | FileCheck -check-prefix=CHECK-UNIMP %s - -; CHECK-UNIMP: LLVM ERROR: Don't know how to lower this ABI diff --git a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare void @abort() + +define i32 @caller(i32 %a) { +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: # %entry +; ILP32E-NEXT: addi sp, sp, -8 +; ILP32E-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-NEXT: .cfi_offset ra, -4 +; ILP32E-NEXT: .cfi_offset s0, -8 +; ILP32E-NEXT: mv s0, a0 +; ILP32E-NEXT: li a0, 1 +; ILP32E-NEXT: lui a3, 262144 +; ILP32E-NEXT: li a2, 0 +; ILP32E-NEXT: call va_double@plt +; ILP32E-NEXT: mv a0, s0 +; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 8 +; ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller: +; ILP32E-WITHFP: # %bb.0: # %entry +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a3, 262144 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call va_double@plt +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +entry: + call void (i32, ...) @va_double(i32 1, double 2.000000e+00) + ret i32 %a +} + +define void @va_double(i32 %n, ...) { +; ILP32E-LABEL: va_double: +; ILP32E: # %bb.0: # %entry +; ILP32E-NEXT: addi sp, sp, -32 +; ILP32E-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: .cfi_offset ra, -28 +; ILP32E-NEXT: sw a5, 28(sp) +; ILP32E-NEXT: sw a4, 24(sp) +; ILP32E-NEXT: sw a3, 20(sp) +; ILP32E-NEXT: sw a2, 16(sp) +; ILP32E-NEXT: sw a1, 12(sp) +; ILP32E-NEXT: addi a0, sp, 19 +; ILP32E-NEXT: andi a1, a0, -8 +; ILP32E-NEXT: addi a0, a1, 8 +; ILP32E-NEXT: sw a0, 0(sp) +; ILP32E-NEXT: lw a0, 0(a1) +; ILP32E-NEXT: ori a1, a1, 4 +; ILP32E-NEXT: lw a1, 0(a1) +; ILP32E-NEXT: lui a3, 262144 +; ILP32E-NEXT: li a2, 0 +; ILP32E-NEXT: call __eqdf2@plt +; ILP32E-NEXT: bnez a0, .LBB1_2 +; ILP32E-NEXT: # %bb.1: # %if.end +; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 32 +; ILP32E-NEXT: ret +; ILP32E-NEXT: .LBB1_2: # %if.then +; ILP32E-NEXT: call abort@plt +; +; ILP32E-WITHFP-LABEL: va_double: +; ILP32E-WITHFP: # %bb.0: # %entry +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, a1, 8 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: ori a1, a1, 4 +; ILP32E-WITHFP-NEXT: lw a1, 0(a1) +; ILP32E-WITHFP-NEXT: lui a3, 262144 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call __eqdf2@plt +; ILP32E-WITHFP-NEXT: bnez a0, .LBB1_2 +; ILP32E-WITHFP-NEXT: # %bb.1: # %if.end +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ILP32E-WITHFP-NEXT: .LBB1_2: # %if.then +; ILP32E-WITHFP-NEXT: call abort@plt +entry: + %args = alloca i8*, align 4 + %args1 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %args1) + %argp.cur = load i8*, i8** %args, align 4 + %0 = ptrtoint i8* %argp.cur to i32 + %1 = add i32 %0, 7 + %2 = and i32 %1, -8 + %argp.cur.aligned = inttoptr i32 %2 to i8* + %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 8 + store i8* %argp.next, i8** %args, align 4 + %3 = bitcast i8* %argp.cur.aligned to double* + %4 = load double, double* %3, align 8 + %cmp = fcmp une double %4, 2.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + call void @abort() + unreachable + +if.end: + %args2 = bitcast i8** %args to i8* + call void @llvm.va_end(i8* %args2) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -11,6 +11,10 @@ ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32D-ILP32-ILP32F-ILP32D-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I-ILP32E %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64f \ @@ -97,6 +101,44 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 28 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 12 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -204,6 +246,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 12 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -341,6 +416,62 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_va_arg_alloca: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -40 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi s0, sp, 16 +; RV32I-ILP32E-NEXT: mv s1, a1 +; RV32I-ILP32E-NEXT: sw a5, 20(s0) +; RV32I-ILP32E-NEXT: sw a4, 16(s0) +; RV32I-ILP32E-NEXT: sw a3, 12(s0) +; RV32I-ILP32E-NEXT: sw a2, 8(s0) +; RV32I-ILP32E-NEXT: sw a1, 4(s0) +; RV32I-ILP32E-NEXT: addi a0, s0, 8 +; RV32I-ILP32E-NEXT: sw a0, -16(s0) +; RV32I-ILP32E-NEXT: addi a0, a1, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: sub a0, sp, a0 +; RV32I-ILP32E-NEXT: mv sp, a0 +; RV32I-ILP32E-NEXT: call notdead@plt +; RV32I-ILP32E-NEXT: mv a0, s1 +; RV32I-ILP32E-NEXT: addi sp, s0, -16 +; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 40 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg_alloca: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a1, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: sub a0, sp, a0 +; ILP32E-WITHFP-NEXT: mv sp, a0 +; ILP32E-WITHFP-NEXT: call notdead@plt +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -16 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg_alloca: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -455,6 +586,33 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a3, 261888 +; RV32I-ILP32E-NEXT: li a4, 2 +; RV32I-ILP32E-NEXT: li a2, 0 +; RV32I-ILP32E-NEXT: call va1@plt +; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a3, 261888 +; ILP32E-WITHFP-NEXT: li a4, 2 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call va1@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -554,6 +712,47 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a1, a0, -8 +; RV32I-ILP32E-NEXT: addi a0, sp, 23 +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a1) +; RV32I-ILP32E-NEXT: ori a1, a1, 4 +; RV32I-ILP32E-NEXT: lw a1, 0(a1) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, s0, 19 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: ori a1, a1, 4 +; ILP32E-WITHFP-NEXT: lw a1, 0(a1) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -696,6 +895,49 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a1, a0, -8 +; RV32I-ILP32E-NEXT: ori a2, a1, 4 +; RV32I-ILP32E-NEXT: sw a2, 0(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a1) +; RV32I-ILP32E-NEXT: addi a1, a1, 8 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: lw a1, 0(a2) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: ori a2, a1, 4 +; ILP32E-WITHFP-NEXT: sw a2, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: addi a1, a1, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(a2) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -778,6 +1020,31 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a3, 261888 +; RV32I-ILP32E-NEXT: li a2, 0 +; RV32I-ILP32E-NEXT: call va2@plt +; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a3, 261888 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call va2@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -881,6 +1148,51 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -20 +; RV32I-ILP32E-NEXT: sw a5, 16(sp) +; RV32I-ILP32E-NEXT: sw a4, 12(sp) +; RV32I-ILP32E-NEXT: sw a3, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a0, a0, -8 +; RV32I-ILP32E-NEXT: addi a3, sp, 23 +; RV32I-ILP32E-NEXT: sw a3, 0(sp) +; RV32I-ILP32E-NEXT: lw a3, 0(a0) +; RV32I-ILP32E-NEXT: ori a0, a0, 4 +; RV32I-ILP32E-NEXT: lw a4, 0(a0) +; RV32I-ILP32E-NEXT: add a0, a1, a3 +; RV32I-ILP32E-NEXT: sltu a1, a0, a1 +; RV32I-ILP32E-NEXT: add a2, a2, a4 +; RV32I-ILP32E-NEXT: add a1, a2, a1 +; RV32I-ILP32E-NEXT: addi sp, sp, 20 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -28 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a3, s0, 19 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-NEXT: ori a0, a0, 4 +; ILP32E-WITHFP-NEXT: lw a4, 0(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a3 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a2, a2, a4 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 28 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -1030,6 +1342,53 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -20 +; RV32I-ILP32E-NEXT: sw a5, 16(sp) +; RV32I-ILP32E-NEXT: sw a4, 12(sp) +; RV32I-ILP32E-NEXT: sw a3, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a0, a0, -8 +; RV32I-ILP32E-NEXT: ori a3, a0, 4 +; RV32I-ILP32E-NEXT: sw a3, 0(sp) +; RV32I-ILP32E-NEXT: lw a4, 0(a0) +; RV32I-ILP32E-NEXT: addi a0, a0, 8 +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lw a3, 0(a3) +; RV32I-ILP32E-NEXT: add a0, a1, a4 +; RV32I-ILP32E-NEXT: sltu a1, a0, a1 +; RV32I-ILP32E-NEXT: add a2, a2, a3 +; RV32I-ILP32E-NEXT: add a1, a2, a1 +; RV32I-ILP32E-NEXT: addi sp, sp, 20 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -28 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: ori a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a4, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a3) +; ILP32E-WITHFP-NEXT: add a0, a1, a4 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a2, a2, a3 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 28 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -1120,6 +1479,37 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: li a0, 2 +; RV32I-ILP32E-NEXT: li a1, 1111 +; RV32I-ILP32E-NEXT: lui a5, 262144 +; RV32I-ILP32E-NEXT: li a2, 0 +; RV32I-ILP32E-NEXT: li a4, 0 +; RV32I-ILP32E-NEXT: call va3@plt +; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: li a0, 2 +; ILP32E-WITHFP-NEXT: li a1, 1111 +; ILP32E-WITHFP-NEXT: lui a5, 262144 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: li a4, 0 +; ILP32E-WITHFP-NEXT: call va3@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -1281,6 +1671,87 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va4_va_copy: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -40 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: mv s0, a1 +; RV32I-ILP32E-NEXT: sw a5, 36(sp) +; RV32I-ILP32E-NEXT: sw a4, 32(sp) +; RV32I-ILP32E-NEXT: sw a3, 28(sp) +; RV32I-ILP32E-NEXT: sw a2, 24(sp) +; RV32I-ILP32E-NEXT: sw a1, 20(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 24 +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: call notdead@plt +; RV32I-ILP32E-NEXT: lw a0, 4(sp) +; RV32I-ILP32E-NEXT: addi a0, a0, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a1, a0, 4 +; RV32I-ILP32E-NEXT: sw a1, 4(sp) +; RV32I-ILP32E-NEXT: lw a1, 0(a0) +; RV32I-ILP32E-NEXT: addi a0, a0, 7 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a2, a0, 4 +; RV32I-ILP32E-NEXT: sw a2, 4(sp) +; RV32I-ILP32E-NEXT: lw a2, 0(a0) +; RV32I-ILP32E-NEXT: addi a0, a0, 7 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a3, a0, 4 +; RV32I-ILP32E-NEXT: sw a3, 4(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a0) +; RV32I-ILP32E-NEXT: add a1, a1, s0 +; RV32I-ILP32E-NEXT: add a1, a1, a2 +; RV32I-ILP32E-NEXT: add a0, a1, a0 +; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 40 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va4_va_copy: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -44 +; ILP32E-WITHFP-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: sw a0, -20(s0) +; ILP32E-WITHFP-NEXT: call notdead@plt +; ILP32E-WITHFP-NEXT: lw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a1, a0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -16(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a2, a0, 4 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -16(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: add a1, a1, s1 +; ILP32E-WITHFP-NEXT: add a1, a1, a2 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 44 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va4_va_copy: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -1524,6 +1995,106 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va5_aligned_stack_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -80 +; RV32I-ILP32E-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi s0, sp, 80 +; RV32I-ILP32E-NEXT: andi sp, sp, -16 +; RV32I-ILP32E-NEXT: li a0, 17 +; RV32I-ILP32E-NEXT: sw a0, 32(sp) +; RV32I-ILP32E-NEXT: li a0, 16 +; RV32I-ILP32E-NEXT: sw a0, 28(sp) +; RV32I-ILP32E-NEXT: li a0, 15 +; RV32I-ILP32E-NEXT: sw a0, 24(sp) +; RV32I-ILP32E-NEXT: lui a0, 262236 +; RV32I-ILP32E-NEXT: addi a0, a0, 655 +; RV32I-ILP32E-NEXT: sw a0, 20(sp) +; RV32I-ILP32E-NEXT: lui a0, 377487 +; RV32I-ILP32E-NEXT: addi a0, a0, 1475 +; RV32I-ILP32E-NEXT: sw a0, 16(sp) +; RV32I-ILP32E-NEXT: li a0, 14 +; RV32I-ILP32E-NEXT: sw a0, 8(sp) +; RV32I-ILP32E-NEXT: li a0, 4 +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: lui a0, 688509 +; RV32I-ILP32E-NEXT: addi a0, a0, -2048 +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lui a0, 262153 +; RV32I-ILP32E-NEXT: addi a0, a0, 491 +; RV32I-ILP32E-NEXT: sw a0, 60(sp) +; RV32I-ILP32E-NEXT: lui a0, 545260 +; RV32I-ILP32E-NEXT: addi a0, a0, -1967 +; RV32I-ILP32E-NEXT: sw a0, 56(sp) +; RV32I-ILP32E-NEXT: lui a0, 964690 +; RV32I-ILP32E-NEXT: addi a0, a0, -328 +; RV32I-ILP32E-NEXT: sw a0, 52(sp) +; RV32I-ILP32E-NEXT: lui a0, 335544 +; RV32I-ILP32E-NEXT: addi a5, a0, 1311 +; RV32I-ILP32E-NEXT: li a0, 1 +; RV32I-ILP32E-NEXT: li a1, 11 +; RV32I-ILP32E-NEXT: addi a2, sp, 48 +; RV32I-ILP32E-NEXT: li a3, 12 +; RV32I-ILP32E-NEXT: li a4, 13 +; RV32I-ILP32E-NEXT: sw a5, 48(sp) +; RV32I-ILP32E-NEXT: call va5_aligned_stack_callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -80 +; RV32I-ILP32E-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 80 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va5_aligned_stack_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -80 +; ILP32E-WITHFP-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 80 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: li a0, 17 +; ILP32E-WITHFP-NEXT: sw a0, 32(sp) +; ILP32E-WITHFP-NEXT: li a0, 16 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: li a0, 15 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: li a0, 14 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: li a0, 4 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a0, a0, -2048 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 60(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 56(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 52(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a5, a0, 1311 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 11 +; ILP32E-WITHFP-NEXT: addi a2, sp, 48 +; ILP32E-WITHFP-NEXT: li a3, 12 +; ILP32E-WITHFP-NEXT: li a4, 13 +; ILP32E-WITHFP-NEXT: sw a5, 48(sp) +; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -80 +; ILP32E-WITHFP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 80 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va5_aligned_stack_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48 @@ -1688,6 +2259,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va6_no_fixed_args: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 8 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va6_no_fixed_args: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a0, 0(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -1857,6 +2461,74 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add sp, sp, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va_large_stack: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: lui a0, 24414 +; RV32I-ILP32E-NEXT: addi a0, a0, 288 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 100000032 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: lui a6, 24414 +; RV32I-ILP32E-NEXT: addi a6, a6, 284 +; RV32I-ILP32E-NEXT: add a6, sp, a6 +; RV32I-ILP32E-NEXT: sw a5, 0(a6) +; RV32I-ILP32E-NEXT: lui a5, 24414 +; RV32I-ILP32E-NEXT: addi a5, a5, 280 +; RV32I-ILP32E-NEXT: add a5, sp, a5 +; RV32I-ILP32E-NEXT: sw a4, 0(a5) +; RV32I-ILP32E-NEXT: lui a4, 24414 +; RV32I-ILP32E-NEXT: addi a4, a4, 276 +; RV32I-ILP32E-NEXT: add a4, sp, a4 +; RV32I-ILP32E-NEXT: sw a3, 0(a4) +; RV32I-ILP32E-NEXT: lui a3, 24414 +; RV32I-ILP32E-NEXT: addi a3, a3, 272 +; RV32I-ILP32E-NEXT: add a3, sp, a3 +; RV32I-ILP32E-NEXT: sw a2, 0(a3) +; RV32I-ILP32E-NEXT: lui a2, 24414 +; RV32I-ILP32E-NEXT: addi a2, a2, 268 +; RV32I-ILP32E-NEXT: add a2, sp, a2 +; RV32I-ILP32E-NEXT: sw a1, 0(a2) +; RV32I-ILP32E-NEXT: lui a1, 24414 +; RV32I-ILP32E-NEXT: addi a1, a1, 272 +; RV32I-ILP32E-NEXT: add a1, sp, a1 +; RV32I-ILP32E-NEXT: sw a1, 4(sp) +; RV32I-ILP32E-NEXT: lui a1, 24414 +; RV32I-ILP32E-NEXT: addi a1, a1, 288 +; RV32I-ILP32E-NEXT: add sp, sp, a1 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va_large_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -2044 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 2044 +; ILP32E-WITHFP-NEXT: sw ra, 2016(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 2012(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 2020 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: lui a0, 24414 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1748 +; ILP32E-WITHFP-NEXT: sub sp, sp, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: lui a2, 1024162 +; ILP32E-WITHFP-NEXT: addi a2, a2, -272 +; ILP32E-WITHFP-NEXT: add a2, s0, a2 +; ILP32E-WITHFP-NEXT: sw a1, 0(a2) +; ILP32E-WITHFP-NEXT: lui a1, 24414 +; ILP32E-WITHFP-NEXT: addi a1, a1, -1748 +; ILP32E-WITHFP-NEXT: add sp, sp, a1 +; ILP32E-WITHFP-NEXT: lw ra, 2016(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 2012(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 2044 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va_large_stack: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 diff --git a/llvm/test/MC/RISCV/target-abi-invalid.s b/llvm/test/MC/RISCV/target-abi-invalid.s --- a/llvm/test/MC/RISCV/target-abi-invalid.s +++ b/llvm/test/MC/RISCV/target-abi-invalid.s @@ -30,7 +30,7 @@ # RUN: | FileCheck -check-prefix=RV32E-LP64 %s # RUN: llvm-mc -triple=riscv32 -mattr=+e,+f -target-abi lp64f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EF-LP64F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-LP64D %s # RV32I-LP64: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) @@ -39,6 +39,7 @@ # RV32E-LP64: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32EF-LP64F: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32EFD-LP64D: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) +# RV32EFD-LP64D: LLVM ERROR: ILP32E must not be used with the D ISA extension # RUN: llvm-mc -triple=riscv32 -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32I-ILP32F %s @@ -66,14 +67,16 @@ # RUN: | FileCheck -check-prefix=RV32EF-ILP32F %s # RUN: llvm-mc -triple=riscv32 -mattr=+e,+f -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EF-ILP32F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32f < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-ILP32F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32d < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32d < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-ILP32D %s # RV32E-ILP32: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) # RV32EF-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) # RV32EFD-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) +# RV32EFD-ILP32F: LLVM ERROR: ILP32E must not be used with the D ISA extension # RV32EFD-ILP32D: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) +# RV32EFD-ILP32D: LLVM ERROR: ILP32E must not be used with the D ISA extension nop