diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -98,8 +98,15 @@ } bool setABI(const std::string &Name) override { + if (Name == "ilp32e") { + ABI = Name; + resetDataLayout("e-m:e-p:32:32-i64:64-n32-S32"); + return true; + } + if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") { ABI = Name; + resetDataLayout("e-m:e-p:32:32-i64:64-n32-S128"); return true; } return false; diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -10309,8 +10309,8 @@ // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target // with soft float ABI has FLen==0). unsigned FLen; - static const int NumArgGPRs = 8; - static const int NumArgFPRs = 8; + const int NumArgGPRs; + const int NumArgFPRs; bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, @@ -10318,8 +10318,10 @@ CharUnits &Field2Off) const; public: - RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) - : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen, + bool EABI) + : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen), NumArgGPRs(EABI ? 6 : 8), + NumArgFPRs(FLen != 0 ? 8 : 0) {} // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo is virtual, so we overload it. @@ -10373,7 +10375,7 @@ // different for variadic arguments, we must also track whether we are // examining a vararg or not. int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; - int ArgFPRsLeft = FLen ? NumArgFPRs : 0; + int ArgFPRsLeft = NumArgFPRs; int NumFixedArgs = FI.getNumRequiredArgs(); int ArgNum = 0; @@ -10742,8 +10744,9 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, - unsigned FLen) - : TargetCodeGenInfo(std::make_unique(CGT, XLen, FLen)) {} + unsigned FLen, bool EABI) + : TargetCodeGenInfo( + std::make_unique(CGT, XLen, FLen, EABI)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -10964,7 +10967,8 @@ ABIFLen = 32; else if (ABIStr.endswith("d")) ABIFLen = 64; - return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen)); + bool EABI = ABIStr.endswith("e"); + return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen, EABI)); } case llvm::Triple::systemz: { diff --git a/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c b/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32e-ilp32f-ilp32d-abi.c copy from clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c copy to clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32e-ilp32f-ilp32d-abi.c --- a/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c +++ b/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32e-ilp32f-ilp32d-abi.c @@ -5,9 +5,11 @@ // RUN: | FileCheck %s // RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-abi ilp32d -emit-llvm %s -o - \ // RUN: | FileCheck %s +// RUN: %clang_cc1 -triple riscv32 -target-abi ilp32e -emit-llvm %s -o - \ +// RUN: | FileCheck %s // This file contains test cases that will have the same output for the ilp32, -// ilp32f, and ilp32d ABIs. +// ilp32e, ilp32f, and ilp32d ABIs. #include #include @@ -194,42 +196,6 @@ return (v16i8){1, 2, 3, 4, 5, 6, 7, 8}; } -// Scalars passed on the stack should not have signext/zeroext attributes -// (they are anyext). - -// CHECK-LABEL: define{{.*}} i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, i64 %c.coerce, %struct.large* %d, i8 zeroext %e, i8 signext %f, i8 %g, i8 %h) -int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c, - struct large d, uint8_t e, int8_t f, uint8_t g, int8_t h) { - return g + h; -} - -// Ensure that scalars passed on the stack are still determined correctly in -// the presence of large return values that consume a register due to the need -// to pass a pointer. - -// CHECK-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g) -struct large f_scalar_stack_2(int32_t a, int64_t b, int64_t c, long double d, - uint8_t e, int8_t f, uint8_t g) { - return (struct large){a, e, f, g}; -} - -// CHECK-LABEL: define{{.*}} fp128 @f_scalar_stack_4(i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g) -long double f_scalar_stack_4(int32_t a, int64_t b, int64_t c, long double d, - uint8_t e, int8_t f, uint8_t g) { - return d; -} - -// Aggregates and >=XLen scalars passed on the stack should be lowered just as -// they would be if passed via registers. - -// CHECK-LABEL: define{{.*}} void @f_scalar_stack_5(double %a, i64 %b, double %c, i64 %d, i32 %e, i64 %f, float %g, double %h, fp128 %i) -void f_scalar_stack_5(double a, int64_t b, double c, int64_t d, int e, - int64_t f, float g, double h, long double i) {} - -// CHECK-LABEL: define{{.*}} void @f_agg_stack(double %a, i64 %b, double %c, i64 %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, %struct.large* %h) -void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e, - struct small f, struct small_aligned g, struct large h) {} - // Ensure that ABI lowering happens as expected for vararg calls. For RV32 // with the base integer calling convention there will be no observable // differences in the lowered IR for a call with varargs vs without. diff --git a/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c b/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c --- a/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c +++ b/clang/test/CodeGen/RISCV/riscv32-ilp32-ilp32f-ilp32d-abi.c @@ -12,188 +12,22 @@ #include #include -// CHECK-LABEL: define{{.*}} void @f_void() -void f_void(void) {} - -// Scalar arguments and return values smaller than the word size are extended -// according to the sign of their type, up to 32 bits - -// CHECK-LABEL: define{{.*}} zeroext i1 @f_scalar_0(i1 zeroext %x) -_Bool f_scalar_0(_Bool x) { return x; } - -// CHECK-LABEL: define{{.*}} signext i8 @f_scalar_1(i8 signext %x) -int8_t f_scalar_1(int8_t x) { return x; } - -// CHECK-LABEL: define{{.*}} zeroext i8 @f_scalar_2(i8 zeroext %x) -uint8_t f_scalar_2(uint8_t x) { return x; } - -// CHECK-LABEL: define{{.*}} i32 @f_scalar_3(i32 %x) -int32_t f_scalar_3(int32_t x) { return x; } - -// CHECK-LABEL: define{{.*}} i64 @f_scalar_4(i64 %x) -int64_t f_scalar_4(int64_t x) { return x; } - -#ifdef __SIZEOF_INT128__ -// CHECK-FORCEINT128-LABEL: define{{.*}} i128 @f_scalar_5(i128 %x) -__int128_t f_scalar_5(__int128_t x) { return x; } -#endif - -// CHECK-LABEL: define{{.*}} float @f_fp_scalar_1(float %x) -float f_fp_scalar_1(float x) { return x; } - -// CHECK-LABEL: define{{.*}} double @f_fp_scalar_2(double %x) -double f_fp_scalar_2(double x) { return x; } - -// Scalars larger than 2*xlen are passed/returned indirect. However, the -// RISC-V LLVM backend can handle this fine, so the function doesn't need to -// be modified. - -// CHECK-LABEL: define{{.*}} fp128 @f_fp_scalar_3(fp128 %x) -long double f_fp_scalar_3(long double x) { return x; } - -// Empty structs or unions are ignored. - -struct empty_s {}; - -// CHECK-LABEL: define{{.*}} void @f_agg_empty_struct() -struct empty_s f_agg_empty_struct(struct empty_s x) { - return x; -} - -union empty_u {}; - -// CHECK-LABEL: define{{.*}} void @f_agg_empty_union() -union empty_u f_agg_empty_union(union empty_u x) { - return x; -} - -// Aggregates <= 2*xlen may be passed in registers, so will be coerced to -// integer arguments. The rules for return are the same. - struct tiny { uint8_t a, b, c, d; }; -// CHECK-LABEL: define{{.*}} void @f_agg_tiny(i32 %x.coerce) -void f_agg_tiny(struct tiny x) { - x.a += x.b; - x.c += x.d; -} - -// CHECK-LABEL: define{{.*}} i32 @f_agg_tiny_ret() -struct tiny f_agg_tiny_ret() { - return (struct tiny){1, 2, 3, 4}; -} - -typedef uint8_t v4i8 __attribute__((vector_size(4))); -typedef int32_t v1i32 __attribute__((vector_size(4))); - -// CHECK-LABEL: define{{.*}} void @f_vec_tiny_v4i8(i32 %x.coerce) -void f_vec_tiny_v4i8(v4i8 x) { - x[0] = x[1]; - x[2] = x[3]; -} - -// CHECK-LABEL: define{{.*}} i32 @f_vec_tiny_v4i8_ret() -v4i8 f_vec_tiny_v4i8_ret() { - return (v4i8){1, 2, 3, 4}; -} - -// CHECK-LABEL: define{{.*}} void @f_vec_tiny_v1i32(i32 %x.coerce) -void f_vec_tiny_v1i32(v1i32 x) { - x[0] = 114; -} - -// CHECK-LABEL: define{{.*}} i32 @f_vec_tiny_v1i32_ret() -v1i32 f_vec_tiny_v1i32_ret() { - return (v1i32){1}; -} - struct small { int32_t a, *b; }; -// CHECK-LABEL: define{{.*}} void @f_agg_small([2 x i32] %x.coerce) -void f_agg_small(struct small x) { - x.a += *x.b; - x.b = &x.a; -} - -// CHECK-LABEL: define{{.*}} [2 x i32] @f_agg_small_ret() -struct small f_agg_small_ret() { - return (struct small){1, 0}; -} - -typedef uint8_t v8i8 __attribute__((vector_size(8))); -typedef int64_t v1i64 __attribute__((vector_size(8))); - -// CHECK-LABEL: define{{.*}} void @f_vec_small_v8i8(i64 %x.coerce) -void f_vec_small_v8i8(v8i8 x) { - x[0] = x[7]; -} - -// CHECK-LABEL: define{{.*}} i64 @f_vec_small_v8i8_ret() -v8i8 f_vec_small_v8i8_ret() { - return (v8i8){1, 2, 3, 4, 5, 6, 7, 8}; -} - -// CHECK-LABEL: define{{.*}} void @f_vec_small_v1i64(i64 %x.coerce) -void f_vec_small_v1i64(v1i64 x) { - x[0] = 114; -} - -// CHECK-LABEL: define{{.*}} i64 @f_vec_small_v1i64_ret() -v1i64 f_vec_small_v1i64_ret() { - return (v1i64){1}; -} - -// Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a -// single 2*xlen-sized argument, to ensure that alignment can be maintained if -// passed on the stack. - struct small_aligned { int64_t a; }; -// CHECK-LABEL: define{{.*}} void @f_agg_small_aligned(i64 %x.coerce) -void f_agg_small_aligned(struct small_aligned x) { - x.a += x.a; -} - -// CHECK-LABEL: define{{.*}} i64 @f_agg_small_aligned_ret(i64 %x.coerce) -struct small_aligned f_agg_small_aligned_ret(struct small_aligned x) { - return (struct small_aligned){10}; -} - -// Aggregates greater > 2*xlen will be passed and returned indirectly struct large { int32_t a, b, c, d; }; -// CHECK-LABEL: define{{.*}} void @f_agg_large(%struct.large* %x) -void f_agg_large(struct large x) { - x.a = x.b + x.c + x.d; -} - -// The address where the struct should be written to will be the first -// argument -// CHECK-LABEL: define{{.*}} void @f_agg_large_ret(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 %i, i8 signext %j) -struct large f_agg_large_ret(int32_t i, int8_t j) { - return (struct large){1, 2, 3, 4}; -} - -typedef unsigned char v16i8 __attribute__((vector_size(16))); - -// CHECK-LABEL: define{{.*}} void @f_vec_large_v16i8(<16 x i8>* %0) -void f_vec_large_v16i8(v16i8 x) { - x[0] = x[7]; -} - -// CHECK-LABEL: define{{.*}} void @f_vec_large_v16i8_ret(<16 x i8>* noalias sret(<16 x i8>) align 16 %agg.result) -v16i8 f_vec_large_v16i8_ret() { - return (v16i8){1, 2, 3, 4, 5, 6, 7, 8}; -} - // Scalars passed on the stack should not have signext/zeroext attributes // (they are anyext). diff --git a/clang/test/CodeGen/RISCV/riscv32-ilp32e-abi.c b/clang/test/CodeGen/RISCV/riscv32-ilp32e-abi.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv32-ilp32e-abi.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -triple riscv32 -emit-llvm -target-abi ilp32e %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple riscv32 -emit-llvm -target-abi ilp32e -fforce-enable-int128 %s -o - \ +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-FORCEINT128 + +// This file contains test cases for only ilp32e. + +#include +#include + +struct tiny { + uint8_t a, b, c, d; +}; + +struct small { + int32_t a, *b; +}; + +struct small_aligned { + int64_t a; +}; + +struct large { + int32_t a, b, c, d; +}; + +// Scalars passed on the stack should not have signext/zeroext attributes +// (they are anyext). + +// CHECK-LABEL: define{{.*}} i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, %struct.large* %c, i8 zeroext %d, i8 signext %e, i8 %f, i8 %g) +int f_scalar_stack_1(struct tiny a, struct small b, struct large c, + uint8_t d, int8_t e, uint8_t f, int8_t g) { + return f + g; +} + +// Ensure that scalars passed on the stack are still determined correctly in +// the presence of large return values that consume a register due to the need +// to pass a pointer. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_2(%struct.large* noalias sret(%struct.large) align 4 %agg.result, i32 %a, i64 %b, fp128 %c, i8 zeroext %d, i8 %e, i8 %f) +struct large f_scalar_stack_2(int32_t a, int64_t b, long double c, + uint8_t d, int8_t e, uint8_t f) { + return (struct large){a, d, e, f}; +} + +// CHECK-LABEL: define{{.*}} fp128 @f_scalar_stack_4(i32 %a, i64 %b, fp128 %c, i8 zeroext %d, i8 %e, i8 %f) +long double f_scalar_stack_4(int32_t a, int64_t b, long double c, + uint8_t d, int8_t e, uint8_t f) { + return c; +} + +// Aggregates and >=XLen scalars passed on the stack should be lowered just as +// they would be if passed via registers. + +// CHECK-LABEL: define{{.*}} void @f_scalar_stack_5(double %a, i64 %b, double %c, i32 %d, i64 %e, float %f, double %g, fp128 %h) +void f_scalar_stack_5(double a, int64_t b, double c, int d, + int64_t e, float f, double g, long double h) {} + +// CHECK-LABEL: define{{.*}} void @f_agg_stack(double %a, i64 %b, double %c, i32 %d.coerce, [2 x i32] %e.coerce, i64 %f.coerce, %struct.large* %g) +void f_agg_stack(double a, int64_t b, double c, struct tiny d, + struct small e, struct small_aligned f, struct large g) {} diff --git a/clang/test/Driver/riscv-abi.c b/clang/test/Driver/riscv-abi.c --- a/clang/test/Driver/riscv-abi.c +++ b/clang/test/Driver/riscv-abi.c @@ -23,6 +23,13 @@ // CHECK-ILP32D: "-target-abi" "ilp32d" +// RUN: %clang -target riscv32-unknown-elf %s -### -o %t.o -mabi=ilp32e 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ILP32E %s +// RUN: %clang -target riscv32-unknown-elf -x assembler %s -### -o %t.o \ +// RUN: -mabi=ilp32e 2>&1 | FileCheck -check-prefix=CHECK-ILP32E %s + +// CHECK-ILP32E: "-target-abi" "ilp32e" + // RUN: not %clang -target riscv32-unknown-elf %s -o %t.o -mabi=lp64 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-RV32-LP64 %s diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -13,8 +13,10 @@ // The RISC-V calling convention is handled with custom code in // RISCVISelLowering.cpp (CC_RISCV). +def CSR_ILP32E : CalleeSavedRegs<(add X1, X3, X4, X8, X9)>; + def CSR_ILP32_LP64 - : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; + : CalleeSavedRegs<(add CSR_ILP32E, (sequence "X%u", 18, 27))>; def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, @@ -27,8 +29,13 @@ // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; -// Interrupt handler needs to save/restore all registers that are used, -// both Caller and Callee saved registers. +// Interrupt service routines need to save/restore all physical registers that +// are used, both Caller and Callee saved registers. +// +// The only physical register that shouldn't be saved is x2 (sp), which is +// managed by the prolog/epilog inserter, even for interrupt service routines. + +// All 32-bit GP registers, excluding x0 (zero) and x2 (sp). def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 3, 9), (sequence "X%u", 10, 11), diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -21,11 +21,7 @@ class RISCVFrameLowering : public TargetFrameLowering { public: - explicit RISCVFrameLowering(const RISCVSubtarget &STI) - : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/Align(16), - /*LocalAreaOffset=*/0), - STI(STI) {} + explicit RISCVFrameLowering(const RISCVSubtarget &STI); void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -23,6 +23,19 @@ using namespace llvm; +static Align getABIStackAlignment(RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return Align(4); + + return Align(16); +} + +RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, + getABIStackAlignment(STI.getTargetABI()), + /*LocalAreaOffset=*/0), + STI(STI) {} + // For now we use x18, a.k.a s2, as pointer to shadow call stack. // User should explicitly set -ffixed-x18 and not use x18 in their asm. static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, @@ -217,12 +230,27 @@ } bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const { - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - return MF.getTarget().Options.DisableFramePointerElim(MF) || - RegInfo->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || - MFI.isFrameAddressTaken(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + // If eliminating the frame pointer is disabled, then we have a frame pointer. + if (MF.getTarget().Options.DisableFramePointerElim(MF)) + return true; + + // If we take the address of the frame, then we need to store that address + // somewhere. + if (MFI.isFrameAddressTaken()) + return true; + + // We need to save the frame pointer to realign the stack. + if (TRI->needsStackRealignment(MF)) + return true; + + // We need to save the frame pointer to allocate a variable-sized object. + if (MFI.hasVarSizedObjects()) + return true; + + return false; } bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -70,6 +70,7 @@ default: report_fatal_error("Don't know how to lower this ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64: @@ -2332,10 +2333,16 @@ // register-size fields in the same situations they would be for fixed // arguments. -static const MCPhysReg ArgGPRs[] = { - RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, - RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 -}; +// The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except +// the ILP32E ABI. +static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X16, RISCV::X17}; +// The GPRs used for passing arguments in the ILP32E ABI. +static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15}; + +// The FPRs used for passing arguments in the ILP32F and LP64F ABIs. static const MCPhysReg ArgFPR16s[] = { RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H @@ -2344,6 +2351,7 @@ RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F }; +// The FPRs used for passing arguments in the ILP32D and LP64D ABIs. static const MCPhysReg ArgFPR64s[] = { RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D @@ -2359,6 +2367,20 @@ static const MCPhysReg ArgVRM4s[] = {RISCV::V16M4, RISCV::V20M4}; static const MCPhysReg ArgVRM8s[] = {RISCV::V16M8}; +static ArrayRef getCallingConvArgGPRs(const RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return makeArrayRef(ArgEGPRs); + + return makeArrayRef(ArgIGPRs); +} + +static Register getCallingConvLastArgGPR(const RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return RISCV::X15; + + return RISCV::X17; +} + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, @@ -2366,6 +2388,10 @@ MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2) { unsigned XLenInBytes = XLen / 8; + const RISCVSubtarget &STI = + State.getMachineFunction().getSubtarget(); + ArrayRef ArgGPRs = getCallingConvArgGPRs(STI.getTargetABI()); + if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, @@ -2424,6 +2450,7 @@ default: llvm_unreachable("Unexpected ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_LP64: break; case RISCVABI::ABI_ILP32F: @@ -2455,6 +2482,8 @@ LocInfo = CCValAssign::BCvt; } + ArrayRef ArgGPRs = getCallingConvArgGPRs(ABI); + // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte // alignment (RV32) or 16-byte alignment (RV64). An aligned register should @@ -2467,7 +2496,7 @@ DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); // Skip 'odd' register if necessary. - if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) + if (RegIdx != ArgGPRs.size() && RegIdx % 2 == 1) State.AllocateReg(ArgGPRs); } @@ -2772,6 +2801,7 @@ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const RISCVSubtarget &STI = MF.getSubtarget(); if (VA.isMemLoc()) { // f64 is passed on the stack. @@ -2787,7 +2817,7 @@ RegInfo.addLiveIn(VA.getLocReg(), LoVReg); SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); SDValue Hi; - if (VA.getLocReg() == RISCV::X17) { + if (VA.getLocReg() == getCallingConvLastArgGPR(STI.getTargetABI())) { // Second half of f64 is passed on the stack. int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); @@ -2953,6 +2983,7 @@ EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); unsigned XLenInBytes = Subtarget.getXLen() / 8; + RISCVABI::ABI ABI = Subtarget.getTargetABI(); // Used with vargs to acumulate store chains. std::vector OutChains; @@ -3002,7 +3033,7 @@ } if (IsVarArg) { - ArrayRef ArgRegs = makeArrayRef(ArgGPRs); + ArrayRef ArgRegs = getCallingConvArgGPRs(ABI); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -3229,7 +3260,7 @@ Register RegLo = VA.getLocReg(); RegsToPass.push_back(std::make_pair(RegLo, Lo)); - if (RegLo == RISCV::X17) { + if (RegLo == getCallingConvLastArgGPR(Subtarget.getTargetABI())) { // Second half of f64 is passed on the stack. // Work out the address of the stack slot. if (!StackPtr.getNode()) @@ -3401,9 +3432,9 @@ Glue = RetValue.getValue(2); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { - assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); + assert(VA.getLocReg() == RISCV::X10 && "Unexpected reg assignment"); SDValue RetValue2 = - DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); + DAG.getCopyFromReg(Chain, DL, RISCV::X11, MVT::i32, Glue); Chain = RetValue2.getValue(1); Glue = RetValue2.getValue(2); RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -61,6 +61,8 @@ switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + return CSR_ILP32E_SaveList; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_SaveList; @@ -74,12 +76,13 @@ } BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + const RISCVSubtarget &STI = MF.getSubtarget(); const RISCVFrameLowering *TFI = getFrameLowering(MF); BitVector Reserved(getNumRegs()); // Mark any registers requested to be reserved as such for (size_t Reg = 0; Reg < getNumRegs(); Reg++) { - if (MF.getSubtarget().isRegisterReservedByUser(Reg)) + if (STI.isRegisterReservedByUser(Reg)) markSuperRegs(Reserved, Reg); } @@ -95,6 +98,15 @@ if (TFI->hasBP(MF)) markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp + // With ilp32e and the D extension, we have to reserve `fp` in case we need it + // later for stack realignment if a FPR64 gets spilled. This decision is made + // separately to `hasFP`, so that we don't insert `fp` manipulation code into + // functions that do not require it. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.hasCalls() && (STI.getTargetABI() == RISCVABI::ABI_ILP32E) && + STI.hasStdExtD()) + markSuperRegs(Reserved, RISCV::X8); // fp + // V registers for code generation. We handle them manually. markSuperRegs(Reserved, RISCV::VL); markSuperRegs(Reserved, RISCV::VTYPE); @@ -211,6 +223,8 @@ switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + return CSR_ILP32E_RegMask; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_RegMask; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -42,10 +42,16 @@ initializeRISCVCleanupVSETVLIPass(*PR); } -static StringRef computeDataLayout(const Triple &TT) { +static StringRef computeDataLayout(const Triple &TT, + const TargetOptions &Options) { if (TT.isArch64Bit()) return "e-m:e-p:64:64-i64:64-i128:128-n64-S128"; assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); + + StringRef ABIName = Options.MCOptions.getABIName(); + if (ABIName == "ilp32e") + return "e-m:e-p:32:32-i64:64-n32-S32"; + return "e-m:e-p:32:32-i64:64-n32-S128"; } @@ -62,7 +68,7 @@ Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT) - : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + : LLVMTargetMachine(T, computeDataLayout(TT, Options), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()) { diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32 +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32E ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=LP64 ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ @@ -14,8 +16,8 @@ @var = global [32 x float] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. -; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. +; All floating point registers are temporaries for the ilp32, ilp32e, and lp64 +; ABIs. fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns ; something appropriate. @@ -91,6 +93,76 @@ ; ILP32-NEXT: fsw ft0, %lo(var)(a0) ; ILP32-NEXT: ret ; +; ILP32E-LABEL: callee: +; ILP32E: # %bb.0: +; ILP32E-NEXT: lui a0, %hi(var) +; ILP32E-NEXT: flw ft0, %lo(var)(a0) +; ILP32E-NEXT: flw ft1, %lo(var+4)(a0) +; ILP32E-NEXT: flw ft2, %lo(var+8)(a0) +; ILP32E-NEXT: flw ft3, %lo(var+12)(a0) +; ILP32E-NEXT: addi a1, a0, %lo(var) +; ILP32E-NEXT: flw ft4, 16(a1) +; ILP32E-NEXT: flw ft5, 20(a1) +; ILP32E-NEXT: flw ft6, 24(a1) +; ILP32E-NEXT: flw ft7, 28(a1) +; ILP32E-NEXT: flw fa0, 32(a1) +; ILP32E-NEXT: flw fa1, 36(a1) +; ILP32E-NEXT: flw fa2, 40(a1) +; ILP32E-NEXT: flw fa3, 44(a1) +; ILP32E-NEXT: flw fa4, 48(a1) +; ILP32E-NEXT: flw fa5, 52(a1) +; ILP32E-NEXT: flw fa6, 56(a1) +; ILP32E-NEXT: flw fa7, 60(a1) +; ILP32E-NEXT: flw ft8, 64(a1) +; ILP32E-NEXT: flw ft9, 68(a1) +; ILP32E-NEXT: flw ft10, 72(a1) +; ILP32E-NEXT: flw ft11, 76(a1) +; ILP32E-NEXT: flw fs0, 80(a1) +; ILP32E-NEXT: flw fs1, 84(a1) +; ILP32E-NEXT: flw fs2, 88(a1) +; ILP32E-NEXT: flw fs3, 92(a1) +; ILP32E-NEXT: flw fs4, 96(a1) +; ILP32E-NEXT: flw fs5, 100(a1) +; ILP32E-NEXT: flw fs6, 104(a1) +; ILP32E-NEXT: flw fs7, 108(a1) +; ILP32E-NEXT: flw fs8, 124(a1) +; ILP32E-NEXT: flw fs9, 120(a1) +; ILP32E-NEXT: flw fs10, 116(a1) +; ILP32E-NEXT: flw fs11, 112(a1) +; ILP32E-NEXT: fsw fs8, 124(a1) +; ILP32E-NEXT: fsw fs9, 120(a1) +; ILP32E-NEXT: fsw fs10, 116(a1) +; ILP32E-NEXT: fsw fs11, 112(a1) +; ILP32E-NEXT: fsw fs7, 108(a1) +; ILP32E-NEXT: fsw fs6, 104(a1) +; ILP32E-NEXT: fsw fs5, 100(a1) +; ILP32E-NEXT: fsw fs4, 96(a1) +; ILP32E-NEXT: fsw fs3, 92(a1) +; ILP32E-NEXT: fsw fs2, 88(a1) +; ILP32E-NEXT: fsw fs1, 84(a1) +; ILP32E-NEXT: fsw fs0, 80(a1) +; ILP32E-NEXT: fsw ft11, 76(a1) +; ILP32E-NEXT: fsw ft10, 72(a1) +; ILP32E-NEXT: fsw ft9, 68(a1) +; ILP32E-NEXT: fsw ft8, 64(a1) +; ILP32E-NEXT: fsw fa7, 60(a1) +; ILP32E-NEXT: fsw fa6, 56(a1) +; ILP32E-NEXT: fsw fa5, 52(a1) +; ILP32E-NEXT: fsw fa4, 48(a1) +; ILP32E-NEXT: fsw fa3, 44(a1) +; ILP32E-NEXT: fsw fa2, 40(a1) +; ILP32E-NEXT: fsw fa1, 36(a1) +; ILP32E-NEXT: fsw fa0, 32(a1) +; ILP32E-NEXT: fsw ft7, 28(a1) +; ILP32E-NEXT: fsw ft6, 24(a1) +; ILP32E-NEXT: fsw ft5, 20(a1) +; ILP32E-NEXT: fsw ft4, 16(a1) +; ILP32E-NEXT: fsw ft3, %lo(var+12)(a0) +; ILP32E-NEXT: fsw ft2, %lo(var+8)(a0) +; ILP32E-NEXT: fsw ft1, %lo(var+4)(a0) +; ILP32E-NEXT: fsw ft0, %lo(var)(a0) +; ILP32E-NEXT: ret +; ; LP64-LABEL: callee: ; LP64: # %bb.0: ; LP64-NEXT: lui a0, %hi(var) @@ -700,6 +772,149 @@ ; ILP32-NEXT: addi sp, sp, 144 ; ILP32-NEXT: ret ; +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: +; ILP32E-NEXT: addi sp, sp, -140 +; ILP32E-NEXT: sw ra, 136(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 132(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s1, 128(sp) # 4-byte Folded Spill +; ILP32E-NEXT: lui s0, %hi(var) +; ILP32E-NEXT: flw ft0, %lo(var)(s0) +; ILP32E-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, %lo(var+4)(s0) +; ILP32E-NEXT: fsw ft0, 120(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, %lo(var+8)(s0) +; ILP32E-NEXT: fsw ft0, 116(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, %lo(var+12)(s0) +; ILP32E-NEXT: fsw ft0, 112(sp) # 4-byte Folded Spill +; ILP32E-NEXT: addi s1, s0, %lo(var) +; ILP32E-NEXT: flw ft0, 16(s1) +; ILP32E-NEXT: fsw ft0, 108(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 20(s1) +; ILP32E-NEXT: fsw ft0, 104(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 24(s1) +; ILP32E-NEXT: fsw ft0, 100(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 28(s1) +; ILP32E-NEXT: fsw ft0, 96(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 32(s1) +; ILP32E-NEXT: fsw ft0, 92(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 36(s1) +; ILP32E-NEXT: fsw ft0, 88(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 40(s1) +; ILP32E-NEXT: fsw ft0, 84(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 44(s1) +; ILP32E-NEXT: fsw ft0, 80(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 48(s1) +; ILP32E-NEXT: fsw ft0, 76(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 52(s1) +; ILP32E-NEXT: fsw ft0, 72(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 56(s1) +; ILP32E-NEXT: fsw ft0, 68(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 60(s1) +; ILP32E-NEXT: fsw ft0, 64(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 64(s1) +; ILP32E-NEXT: fsw ft0, 60(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 68(s1) +; ILP32E-NEXT: fsw ft0, 56(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 72(s1) +; ILP32E-NEXT: fsw ft0, 52(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 76(s1) +; ILP32E-NEXT: fsw ft0, 48(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 80(s1) +; ILP32E-NEXT: fsw ft0, 44(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 84(s1) +; ILP32E-NEXT: fsw ft0, 40(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 88(s1) +; ILP32E-NEXT: fsw ft0, 36(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 92(s1) +; ILP32E-NEXT: fsw ft0, 32(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 96(s1) +; ILP32E-NEXT: fsw ft0, 28(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 100(s1) +; ILP32E-NEXT: fsw ft0, 24(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 104(s1) +; ILP32E-NEXT: fsw ft0, 20(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 108(s1) +; ILP32E-NEXT: fsw ft0, 16(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 112(s1) +; ILP32E-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 116(s1) +; ILP32E-NEXT: fsw ft0, 8(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 120(s1) +; ILP32E-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw ft0, 124(s1) +; ILP32E-NEXT: fsw ft0, 0(sp) # 4-byte Folded Spill +; ILP32E-NEXT: call callee@plt +; ILP32E-NEXT: flw ft0, 0(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 124(s1) +; ILP32E-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 120(s1) +; ILP32E-NEXT: flw ft0, 8(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 116(s1) +; ILP32E-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 112(s1) +; ILP32E-NEXT: flw ft0, 16(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 108(s1) +; ILP32E-NEXT: flw ft0, 20(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 104(s1) +; ILP32E-NEXT: flw ft0, 24(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 100(s1) +; ILP32E-NEXT: flw ft0, 28(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 96(s1) +; ILP32E-NEXT: flw ft0, 32(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 92(s1) +; ILP32E-NEXT: flw ft0, 36(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 88(s1) +; ILP32E-NEXT: flw ft0, 40(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 84(s1) +; ILP32E-NEXT: flw ft0, 44(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 80(s1) +; ILP32E-NEXT: flw ft0, 48(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 76(s1) +; ILP32E-NEXT: flw ft0, 52(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 72(s1) +; ILP32E-NEXT: flw ft0, 56(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 68(s1) +; ILP32E-NEXT: flw ft0, 60(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 64(s1) +; ILP32E-NEXT: flw ft0, 64(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 60(s1) +; ILP32E-NEXT: flw ft0, 68(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 56(s1) +; ILP32E-NEXT: flw ft0, 72(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 52(s1) +; ILP32E-NEXT: flw ft0, 76(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 48(s1) +; ILP32E-NEXT: flw ft0, 80(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 44(s1) +; ILP32E-NEXT: flw ft0, 84(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 40(s1) +; ILP32E-NEXT: flw ft0, 88(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 36(s1) +; ILP32E-NEXT: flw ft0, 92(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 32(s1) +; ILP32E-NEXT: flw ft0, 96(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 28(s1) +; ILP32E-NEXT: flw ft0, 100(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 24(s1) +; ILP32E-NEXT: flw ft0, 104(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 20(s1) +; ILP32E-NEXT: flw ft0, 108(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, 16(s1) +; ILP32E-NEXT: flw ft0, 112(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, %lo(var+12)(s0) +; ILP32E-NEXT: flw ft0, 116(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, %lo(var+8)(s0) +; ILP32E-NEXT: flw ft0, 120(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, %lo(var+4)(s0) +; ILP32E-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw ft0, %lo(var)(s0) +; ILP32E-NEXT: lw s1, 128(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s0, 132(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw ra, 136(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 140 +; ILP32E-NEXT: ret +; ; LP64-LABEL: caller: ; LP64: # %bb.0: ; LP64-NEXT: addi sp, sp, -160 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32 +; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32E ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=LP64 ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d -verify-machineinstrs < %s \ @@ -10,11 +12,12 @@ @var = global [32 x double] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. -; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. +; All floating point registers are temporaries for the ilp32, ilp32e, and lp64 +; ABIs. fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns -; something appropriate. +; something appropriate. The checks ensure the function is saving the right +; registers to the stack before loading from @var. define void @callee() nounwind { ; ILP32-LABEL: callee: @@ -87,6 +90,76 @@ ; ILP32-NEXT: fsd ft0, %lo(var)(a0) ; ILP32-NEXT: ret ; +; ILP32E-LABEL: callee: +; ILP32E: # %bb.0: +; ILP32E-NEXT: lui a0, %hi(var) +; ILP32E-NEXT: fld ft0, %lo(var)(a0) +; ILP32E-NEXT: fld ft1, %lo(var+8)(a0) +; ILP32E-NEXT: addi a1, a0, %lo(var) +; ILP32E-NEXT: fld ft2, 16(a1) +; ILP32E-NEXT: fld ft3, 24(a1) +; ILP32E-NEXT: fld ft4, 32(a1) +; ILP32E-NEXT: fld ft5, 40(a1) +; ILP32E-NEXT: fld ft6, 48(a1) +; ILP32E-NEXT: fld ft7, 56(a1) +; ILP32E-NEXT: fld fa0, 64(a1) +; ILP32E-NEXT: fld fa1, 72(a1) +; ILP32E-NEXT: fld fa2, 80(a1) +; ILP32E-NEXT: fld fa3, 88(a1) +; ILP32E-NEXT: fld fa4, 96(a1) +; ILP32E-NEXT: fld fa5, 104(a1) +; ILP32E-NEXT: fld fa6, 112(a1) +; ILP32E-NEXT: fld fa7, 120(a1) +; ILP32E-NEXT: fld ft8, 128(a1) +; ILP32E-NEXT: fld ft9, 136(a1) +; ILP32E-NEXT: fld ft10, 144(a1) +; ILP32E-NEXT: fld ft11, 152(a1) +; ILP32E-NEXT: fld fs0, 160(a1) +; ILP32E-NEXT: fld fs1, 168(a1) +; ILP32E-NEXT: fld fs2, 176(a1) +; ILP32E-NEXT: fld fs3, 184(a1) +; ILP32E-NEXT: fld fs4, 192(a1) +; ILP32E-NEXT: fld fs5, 200(a1) +; ILP32E-NEXT: fld fs6, 208(a1) +; ILP32E-NEXT: fld fs7, 216(a1) +; ILP32E-NEXT: fld fs8, 248(a1) +; ILP32E-NEXT: fld fs9, 240(a1) +; ILP32E-NEXT: fld fs10, 232(a1) +; ILP32E-NEXT: fld fs11, 224(a1) +; ILP32E-NEXT: fsd fs8, 248(a1) +; ILP32E-NEXT: fsd fs9, 240(a1) +; ILP32E-NEXT: fsd fs10, 232(a1) +; ILP32E-NEXT: fsd fs11, 224(a1) +; ILP32E-NEXT: fsd fs7, 216(a1) +; ILP32E-NEXT: fsd fs6, 208(a1) +; ILP32E-NEXT: fsd fs5, 200(a1) +; ILP32E-NEXT: fsd fs4, 192(a1) +; ILP32E-NEXT: fsd fs3, 184(a1) +; ILP32E-NEXT: fsd fs2, 176(a1) +; ILP32E-NEXT: fsd fs1, 168(a1) +; ILP32E-NEXT: fsd fs0, 160(a1) +; ILP32E-NEXT: fsd ft11, 152(a1) +; ILP32E-NEXT: fsd ft10, 144(a1) +; ILP32E-NEXT: fsd ft9, 136(a1) +; ILP32E-NEXT: fsd ft8, 128(a1) +; ILP32E-NEXT: fsd fa7, 120(a1) +; ILP32E-NEXT: fsd fa6, 112(a1) +; ILP32E-NEXT: fsd fa5, 104(a1) +; ILP32E-NEXT: fsd fa4, 96(a1) +; ILP32E-NEXT: fsd fa3, 88(a1) +; ILP32E-NEXT: fsd fa2, 80(a1) +; ILP32E-NEXT: fsd fa1, 72(a1) +; ILP32E-NEXT: fsd fa0, 64(a1) +; ILP32E-NEXT: fsd ft7, 56(a1) +; ILP32E-NEXT: fsd ft6, 48(a1) +; ILP32E-NEXT: fsd ft5, 40(a1) +; ILP32E-NEXT: fsd ft4, 32(a1) +; ILP32E-NEXT: fsd ft3, 24(a1) +; ILP32E-NEXT: fsd ft2, 16(a1) +; ILP32E-NEXT: fsd ft1, %lo(var+8)(a0) +; ILP32E-NEXT: fsd ft0, %lo(var)(a0) +; ILP32E-NEXT: ret +; ; LP64-LABEL: callee: ; LP64: # %bb.0: ; LP64-NEXT: lui a0, %hi(var) @@ -504,6 +577,153 @@ ; ILP32-NEXT: addi sp, sp, 272 ; ILP32-NEXT: ret ; +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: +; ILP32E-NEXT: addi sp, sp, -272 +; ILP32E-NEXT: sw ra, 268(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 264(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s1, 260(sp) # 4-byte Folded Spill +; ILP32E-NEXT: addi s0, sp, 272 +; ILP32E-NEXT: andi sp, sp, -8 +; ILP32E-NEXT: lui a0, %hi(var) +; ILP32E-NEXT: fld ft0, %lo(var)(a0) +; ILP32E-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, %lo(var+8)(a0) +; ILP32E-NEXT: fsd ft0, 240(sp) # 8-byte Folded Spill +; ILP32E-NEXT: addi s1, a0, %lo(var) +; ILP32E-NEXT: fld ft0, 16(s1) +; ILP32E-NEXT: fsd ft0, 232(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 24(s1) +; ILP32E-NEXT: fsd ft0, 224(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 32(s1) +; ILP32E-NEXT: fsd ft0, 216(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 40(s1) +; ILP32E-NEXT: fsd ft0, 208(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 48(s1) +; ILP32E-NEXT: fsd ft0, 200(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 56(s1) +; ILP32E-NEXT: fsd ft0, 192(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 64(s1) +; ILP32E-NEXT: fsd ft0, 184(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 72(s1) +; ILP32E-NEXT: fsd ft0, 176(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 80(s1) +; ILP32E-NEXT: fsd ft0, 168(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 88(s1) +; ILP32E-NEXT: fsd ft0, 160(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 96(s1) +; ILP32E-NEXT: fsd ft0, 152(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 104(s1) +; ILP32E-NEXT: fsd ft0, 144(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 112(s1) +; ILP32E-NEXT: fsd ft0, 136(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 120(s1) +; ILP32E-NEXT: fsd ft0, 128(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 128(s1) +; ILP32E-NEXT: fsd ft0, 120(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 136(s1) +; ILP32E-NEXT: fsd ft0, 112(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 144(s1) +; ILP32E-NEXT: fsd ft0, 104(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 152(s1) +; ILP32E-NEXT: fsd ft0, 96(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 160(s1) +; ILP32E-NEXT: fsd ft0, 88(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 168(s1) +; ILP32E-NEXT: fsd ft0, 80(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 176(s1) +; ILP32E-NEXT: fsd ft0, 72(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 184(s1) +; ILP32E-NEXT: fsd ft0, 64(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 192(s1) +; ILP32E-NEXT: fsd ft0, 56(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 200(s1) +; ILP32E-NEXT: fsd ft0, 48(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 208(s1) +; ILP32E-NEXT: fsd ft0, 40(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 216(s1) +; ILP32E-NEXT: fsd ft0, 32(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 224(s1) +; ILP32E-NEXT: fsd ft0, 24(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 232(s1) +; ILP32E-NEXT: fsd ft0, 16(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 240(s1) +; ILP32E-NEXT: fsd ft0, 8(sp) # 8-byte Folded Spill +; ILP32E-NEXT: fld ft0, 248(s1) +; ILP32E-NEXT: fsd ft0, 0(sp) # 8-byte Folded Spill +; ILP32E-NEXT: call callee@plt +; ILP32E-NEXT: fld ft0, 0(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 248(s1) +; ILP32E-NEXT: fld ft0, 8(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 240(s1) +; ILP32E-NEXT: fld ft0, 16(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 232(s1) +; ILP32E-NEXT: fld ft0, 24(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 224(s1) +; ILP32E-NEXT: fld ft0, 32(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 216(s1) +; ILP32E-NEXT: fld ft0, 40(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 208(s1) +; ILP32E-NEXT: fld ft0, 48(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 200(s1) +; ILP32E-NEXT: fld ft0, 56(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 192(s1) +; ILP32E-NEXT: fld ft0, 64(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 184(s1) +; ILP32E-NEXT: fld ft0, 72(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 176(s1) +; ILP32E-NEXT: fld ft0, 80(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 168(s1) +; ILP32E-NEXT: fld ft0, 88(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 160(s1) +; ILP32E-NEXT: fld ft0, 96(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 152(s1) +; ILP32E-NEXT: fld ft0, 104(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 144(s1) +; ILP32E-NEXT: fld ft0, 112(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 136(s1) +; ILP32E-NEXT: fld ft0, 120(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 128(s1) +; ILP32E-NEXT: fld ft0, 128(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 120(s1) +; ILP32E-NEXT: fld ft0, 136(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 112(s1) +; ILP32E-NEXT: fld ft0, 144(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 104(s1) +; ILP32E-NEXT: fld ft0, 152(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 96(s1) +; ILP32E-NEXT: fld ft0, 160(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 88(s1) +; ILP32E-NEXT: fld ft0, 168(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 80(s1) +; ILP32E-NEXT: fld ft0, 176(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 72(s1) +; ILP32E-NEXT: fld ft0, 184(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 64(s1) +; ILP32E-NEXT: fld ft0, 192(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 56(s1) +; ILP32E-NEXT: fld ft0, 200(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 48(s1) +; ILP32E-NEXT: fld ft0, 208(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 40(s1) +; ILP32E-NEXT: fld ft0, 216(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 32(s1) +; ILP32E-NEXT: fld ft0, 224(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 24(s1) +; ILP32E-NEXT: fld ft0, 232(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, 16(s1) +; ILP32E-NEXT: fld ft0, 240(sp) # 8-byte Folded Reload +; ILP32E-NEXT: lui a0, %hi(var) +; ILP32E-NEXT: fsd ft0, %lo(var+8)(a0) +; ILP32E-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; ILP32E-NEXT: fsd ft0, %lo(var)(a0) +; ILP32E-NEXT: addi sp, s0, -272 +; ILP32E-NEXT: lw s1, 260(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw ra, 268(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 272 +; ILP32E-NEXT: ret +; ; LP64-LABEL: caller: ; LP64: # %bb.0: ; LP64-NEXT: addi sp, sp, -288 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32f -verify-machineinstrs < %s \ @@ -136,6 +138,96 @@ ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: callee: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -36 +; RV32I-ILP32E-NEXT: sw ra, 32(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a7, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a7) +; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a7) +; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a7) +; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a7) +; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi a5, a7, %lo(var) +; RV32I-ILP32E-NEXT: lw a0, 16(a5) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 20(a5) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw t4, 24(a5) +; RV32I-ILP32E-NEXT: lw t5, 28(a5) +; RV32I-ILP32E-NEXT: lw t6, 32(a5) +; RV32I-ILP32E-NEXT: lw s2, 36(a5) +; RV32I-ILP32E-NEXT: lw s3, 40(a5) +; RV32I-ILP32E-NEXT: lw s4, 44(a5) +; RV32I-ILP32E-NEXT: lw s5, 48(a5) +; RV32I-ILP32E-NEXT: lw s6, 52(a5) +; RV32I-ILP32E-NEXT: lw s7, 56(a5) +; RV32I-ILP32E-NEXT: lw s8, 60(a5) +; RV32I-ILP32E-NEXT: lw s9, 64(a5) +; RV32I-ILP32E-NEXT: lw s10, 68(a5) +; RV32I-ILP32E-NEXT: lw s11, 72(a5) +; RV32I-ILP32E-NEXT: lw ra, 76(a5) +; RV32I-ILP32E-NEXT: lw t3, 80(a5) +; RV32I-ILP32E-NEXT: lw t2, 84(a5) +; RV32I-ILP32E-NEXT: lw t1, 88(a5) +; RV32I-ILP32E-NEXT: lw s0, 92(a5) +; RV32I-ILP32E-NEXT: lw s1, 96(a5) +; RV32I-ILP32E-NEXT: lw t0, 100(a5) +; RV32I-ILP32E-NEXT: lw a6, 104(a5) +; RV32I-ILP32E-NEXT: lw a4, 108(a5) +; RV32I-ILP32E-NEXT: lw a0, 124(a5) +; RV32I-ILP32E-NEXT: lw a1, 120(a5) +; RV32I-ILP32E-NEXT: lw a2, 116(a5) +; RV32I-ILP32E-NEXT: lw a3, 112(a5) +; RV32I-ILP32E-NEXT: sw a0, 124(a5) +; RV32I-ILP32E-NEXT: sw a1, 120(a5) +; RV32I-ILP32E-NEXT: sw a2, 116(a5) +; RV32I-ILP32E-NEXT: sw a3, 112(a5) +; RV32I-ILP32E-NEXT: sw a4, 108(a5) +; RV32I-ILP32E-NEXT: sw a6, 104(a5) +; RV32I-ILP32E-NEXT: sw t0, 100(a5) +; RV32I-ILP32E-NEXT: sw s1, 96(a5) +; RV32I-ILP32E-NEXT: sw s0, 92(a5) +; RV32I-ILP32E-NEXT: sw t1, 88(a5) +; RV32I-ILP32E-NEXT: sw t2, 84(a5) +; RV32I-ILP32E-NEXT: sw t3, 80(a5) +; RV32I-ILP32E-NEXT: sw ra, 76(a5) +; RV32I-ILP32E-NEXT: sw s11, 72(a5) +; RV32I-ILP32E-NEXT: sw s10, 68(a5) +; RV32I-ILP32E-NEXT: sw s9, 64(a5) +; RV32I-ILP32E-NEXT: sw s8, 60(a5) +; RV32I-ILP32E-NEXT: sw s7, 56(a5) +; RV32I-ILP32E-NEXT: sw s6, 52(a5) +; RV32I-ILP32E-NEXT: sw s5, 48(a5) +; RV32I-ILP32E-NEXT: sw s4, 44(a5) +; RV32I-ILP32E-NEXT: sw s3, 40(a5) +; RV32I-ILP32E-NEXT: sw s2, 36(a5) +; RV32I-ILP32E-NEXT: sw t6, 32(a5) +; RV32I-ILP32E-NEXT: sw t5, 28(a5) +; RV32I-ILP32E-NEXT: sw t4, 24(a5) +; RV32I-ILP32E-NEXT: lw a0, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 20(a5) +; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 16(a5) +; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a7) +; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a7) +; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a7) +; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a7) +; RV32I-ILP32E-NEXT: lw s1, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 32(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 36 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: callee: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -80 @@ -623,6 +715,148 @@ ; RV32I-NEXT: addi sp, sp, 144 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -136 +; RV32I-ILP32E-NEXT: sw ra, 132(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 128(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 124(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a0, %hi(var) +; RV32I-ILP32E-NEXT: lw a1, %lo(var)(a0) +; RV32I-ILP32E-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+4)(a0) +; RV32I-ILP32E-NEXT: sw a1, 116(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+8)(a0) +; RV32I-ILP32E-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+12)(a0) +; RV32I-ILP32E-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi s1, a0, %lo(var) +; RV32I-ILP32E-NEXT: lw a0, 16(s1) +; RV32I-ILP32E-NEXT: sw a0, 104(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 20(s1) +; RV32I-ILP32E-NEXT: sw a0, 100(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 24(s1) +; RV32I-ILP32E-NEXT: sw a0, 96(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 28(s1) +; RV32I-ILP32E-NEXT: sw a0, 92(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 32(s1) +; RV32I-ILP32E-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 36(s1) +; RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 40(s1) +; RV32I-ILP32E-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 44(s1) +; RV32I-ILP32E-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 48(s1) +; RV32I-ILP32E-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 52(s1) +; RV32I-ILP32E-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 56(s1) +; RV32I-ILP32E-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 60(s1) +; RV32I-ILP32E-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 64(s1) +; RV32I-ILP32E-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 68(s1) +; RV32I-ILP32E-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 72(s1) +; RV32I-ILP32E-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 76(s1) +; RV32I-ILP32E-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 80(s1) +; RV32I-ILP32E-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 84(s1) +; RV32I-ILP32E-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 88(s1) +; RV32I-ILP32E-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 92(s1) +; RV32I-ILP32E-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 96(s1) +; RV32I-ILP32E-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 100(s1) +; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 104(s1) +; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 108(s1) +; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 112(s1) +; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 116(s1) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 120(s1) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw s0, 124(s1) +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: sw s0, 124(s1) +; RV32I-ILP32E-NEXT: lw a0, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 120(s1) +; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 116(s1) +; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 112(s1) +; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 108(s1) +; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 104(s1) +; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 100(s1) +; RV32I-ILP32E-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 96(s1) +; RV32I-ILP32E-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 92(s1) +; RV32I-ILP32E-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 88(s1) +; RV32I-ILP32E-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 84(s1) +; RV32I-ILP32E-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 80(s1) +; RV32I-ILP32E-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 76(s1) +; RV32I-ILP32E-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 72(s1) +; RV32I-ILP32E-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 68(s1) +; RV32I-ILP32E-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 64(s1) +; RV32I-ILP32E-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 60(s1) +; RV32I-ILP32E-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 56(s1) +; RV32I-ILP32E-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 52(s1) +; RV32I-ILP32E-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 48(s1) +; RV32I-ILP32E-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 44(s1) +; RV32I-ILP32E-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 40(s1) +; RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 36(s1) +; RV32I-ILP32E-NEXT: lw a0, 88(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 32(s1) +; RV32I-ILP32E-NEXT: lw a0, 92(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 28(s1) +; RV32I-ILP32E-NEXT: lw a0, 96(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 24(s1) +; RV32I-ILP32E-NEXT: lw a0, 100(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 20(s1) +; RV32I-ILP32E-NEXT: lw a0, 104(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 16(s1) +; RV32I-ILP32E-NEXT: lui a1, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, 108(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a1) +; RV32I-ILP32E-NEXT: lw a0, 112(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a1) +; RV32I-ILP32E-NEXT: lw a0, 116(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a1) +; RV32I-ILP32E-NEXT: lw a0, 120(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a1) +; RV32I-ILP32E-NEXT: lw s1, 124(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 128(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 132(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 136 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: caller: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -144 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e-double-bug.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e-double-bug.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e-double-bug.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+f -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-F %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+f,+d -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FD %s + +; ILP32E has an issue when spilling 64-bit fp registers to the stack. +; +; RISC-V compilers cannot assume that unaligned accesses will be implemented +; (even via emulation), so fp64 registers have to be 64-bit aligned on the +; stack. +; +; The problem comes that ILP32E has 32-bit stack alignment (unlike every other +; calling-convention, which has 128-bit stack alignment). +; +; If we need to realign the stack, we need the `s0` register to do so. +; +; This caused a phase-ordering issue in ILP32E, where we thought the frame +; pointer was not needed, so `s0` could be allocated by the register allocator, +; and then we might find that we needed to spill a fp64 register, and are unable +; to do so because we don't have a frame pointer. +; +; The solution is to always reserve a frame pointer if we're on ILP32E and have +; the D extension (fp64 registers). +; +; This issue could arise again for other extensions with registers that require +; an alignment larger than the calling convention stack alignment. + +declare void @callee() + +@global_double = external global double, align 8 + +define void @test_double() { +; ILP32E-F-LABEL: test_double: +; ILP32E-F: # %bb.0: +; ILP32E-F-NEXT: addi sp, sp, -24 +; ILP32E-F-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-F-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-F-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-F-NEXT: sw s1, 12(sp) # 4-byte Folded Spill +; ILP32E-F-NEXT: .cfi_offset ra, -4 +; ILP32E-F-NEXT: .cfi_offset s0, -8 +; ILP32E-F-NEXT: .cfi_offset s1, -12 +; ILP32E-F-NEXT: addi s0, sp, 24 +; ILP32E-F-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-F-NEXT: andi sp, sp, -8 +; ILP32E-F-NEXT: lui s1, %hi(global_double) +; ILP32E-F-NEXT: lw a0, %lo(global_double+4)(s1) +; ILP32E-F-NEXT: lw a1, %lo(global_double)(s1) +; ILP32E-F-NEXT: sw a0, 4(sp) +; ILP32E-F-NEXT: sw a1, 0(sp) +; ILP32E-F-NEXT: call callee@plt +; ILP32E-F-NEXT: lw a0, 4(sp) +; ILP32E-F-NEXT: lw a1, 0(sp) +; ILP32E-F-NEXT: sw a0, %lo(global_double+4)(s1) +; ILP32E-F-NEXT: sw a1, %lo(global_double)(s1) +; ILP32E-F-NEXT: addi sp, s0, -24 +; ILP32E-F-NEXT: lw s1, 12(sp) # 4-byte Folded Reload +; ILP32E-F-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-F-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-F-NEXT: addi sp, sp, 24 +; ILP32E-F-NEXT: ret +; +; ILP32E-FD-LABEL: test_double: +; ILP32E-FD: # %bb.0: +; ILP32E-FD-NEXT: addi sp, sp, -24 +; ILP32E-FD-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FD-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FD-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FD-NEXT: sw s1, 12(sp) # 4-byte Folded Spill +; ILP32E-FD-NEXT: .cfi_offset ra, -4 +; ILP32E-FD-NEXT: .cfi_offset s0, -8 +; ILP32E-FD-NEXT: .cfi_offset s1, -12 +; ILP32E-FD-NEXT: addi s0, sp, 24 +; ILP32E-FD-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FD-NEXT: andi sp, sp, -8 +; ILP32E-FD-NEXT: lui s1, %hi(global_double) +; ILP32E-FD-NEXT: fld ft0, %lo(global_double)(s1) +; ILP32E-FD-NEXT: fsd ft0, 0(sp) +; ILP32E-FD-NEXT: call callee@plt +; ILP32E-FD-NEXT: fld ft0, 0(sp) +; ILP32E-FD-NEXT: fsd ft0, %lo(global_double)(s1) +; ILP32E-FD-NEXT: addi sp, s0, -24 +; ILP32E-FD-NEXT: lw s1, 12(sp) # 4-byte Folded Reload +; ILP32E-FD-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FD-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FD-NEXT: addi sp, sp, 24 +; ILP32E-FD-NEXT: ret + %local_double = alloca double, align 8 + %1 = load double, double* @global_double, align 8 + store double %1, double* %local_double, align 8 + call void @callee() + %2 = load double, double* %local_double, align 8 + store double %2, double* @global_double, align 8 + ret void +} + +@global_float = external global float, align 4 + +define void @test_float() { +; ILP32E-F-LABEL: test_float: +; ILP32E-F: # %bb.0: +; ILP32E-F-NEXT: addi sp, sp, -16 +; ILP32E-F-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-F-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-F-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; ILP32E-F-NEXT: .cfi_offset ra, -4 +; ILP32E-F-NEXT: .cfi_offset s0, -8 +; ILP32E-F-NEXT: .cfi_offset s1, -12 +; ILP32E-F-NEXT: addi s0, sp, 16 +; ILP32E-F-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-F-NEXT: andi sp, sp, -8 +; ILP32E-F-NEXT: lui s1, %hi(global_float) +; ILP32E-F-NEXT: flw ft0, %lo(global_float)(s1) +; ILP32E-F-NEXT: fsw ft0, 0(sp) +; ILP32E-F-NEXT: call callee@plt +; ILP32E-F-NEXT: flw ft0, 0(sp) +; ILP32E-F-NEXT: fsw ft0, %lo(global_float)(s1) +; ILP32E-F-NEXT: addi sp, s0, -16 +; ILP32E-F-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; ILP32E-F-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-F-NEXT: addi sp, sp, 16 +; ILP32E-F-NEXT: ret +; +; ILP32E-FD-LABEL: test_float: +; ILP32E-FD: # %bb.0: +; ILP32E-FD-NEXT: addi sp, sp, -16 +; ILP32E-FD-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-FD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-FD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-FD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; ILP32E-FD-NEXT: .cfi_offset ra, -4 +; ILP32E-FD-NEXT: .cfi_offset s0, -8 +; ILP32E-FD-NEXT: .cfi_offset s1, -12 +; ILP32E-FD-NEXT: addi s0, sp, 16 +; ILP32E-FD-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FD-NEXT: andi sp, sp, -8 +; ILP32E-FD-NEXT: lui s1, %hi(global_float) +; ILP32E-FD-NEXT: flw ft0, %lo(global_float)(s1) +; ILP32E-FD-NEXT: fsw ft0, 0(sp) +; ILP32E-FD-NEXT: call callee@plt +; ILP32E-FD-NEXT: flw ft0, 0(sp) +; ILP32E-FD-NEXT: fsw ft0, %lo(global_float)(s1) +; ILP32E-FD-NEXT: addi sp, s0, -16 +; ILP32E-FD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; ILP32E-FD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-FD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-FD-NEXT: addi sp, sp, 16 +; ILP32E-FD-NEXT: ret + %local_float = alloca float, align 8 + %1 = load float, float* @global_float, align 8 + store float %1, float* %local_float, align 8 + call void @callee() + %2 = load float, float* %local_float, align 8 + store float %2, float* @global_float, align 8 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -0,0 +1,2146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+d \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32D-ILP32E %s + +; As well as calling convention details, we check that ra and fp are +; consistently stored to fp-4 and fp-8. + +; Any tests that would have identical output for some combination of the ilp32* +; ABIs belong in calling-conv-*-common.ll. This file contains tests that will +; have different output across those ABIs. i.e. where some arguments would be +; passed according to the floating point ABI, or where the stack is aligned to +; a different boundary. + +define i32 @callee_float_in_regs(i32 %a, float %b) { +; ILP32E-FPELIM-LABEL: callee_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: call __fixsfsi@plt +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: call __fixsfsi@plt +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_float_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: fmv.w.x ft0, a1 +; RV32D-ILP32E-NEXT: fcvt.w.s a1, ft0, rtz +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: ret + %b_fptosi = fptosi float %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_float_in_regs() { +; ILP32E-FPELIM-LABEL: caller_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: lui a1, 262144 +; ILP32E-FPELIM-NEXT: call callee_float_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: lui a1, 262144 +; ILP32E-WITHFP-NEXT: call callee_float_in_regs@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_float_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -4 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 4 +; RV32D-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: lui a1, 262144 +; RV32D-ILP32E-NEXT: call callee_float_in_regs@plt +; RV32D-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 4 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_float_in_regs(i32 1, float 2.0) + ret i32 %1 +} + +define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) { +; ILP32E-FPELIM-LABEL: callee_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a0, 8(s0) +; ILP32E-FPELIM-NEXT: lw a1, 0(s0) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: addi sp, s0, -8 +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a0, 8(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(s0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: addi sp, s0, -8 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_float_on_stack: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: lw a0, 0(s0) +; RV32D-ILP32E-NEXT: lw a1, 8(s0) +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: addi sp, s0, -8 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = trunc i64 %d to i32 + %2 = bitcast float %e to i32 + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i32 @caller_float_on_stack() { +; ILP32E-FPELIM-LABEL: caller_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lui a0, 264704 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: addi a1, zero, 4 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a2, zero, 2 +; ILP32E-FPELIM-NEXT: addi a4, zero, 3 +; ILP32E-FPELIM-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-NEXT: mv a1, zero +; ILP32E-FPELIM-NEXT: mv a3, zero +; ILP32E-FPELIM-NEXT: mv a5, zero +; ILP32E-FPELIM-NEXT: call callee_float_on_stack@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lui a0, 264704 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: addi a1, zero, 4 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a2, zero, 2 +; ILP32E-WITHFP-NEXT: addi a4, zero, 3 +; ILP32E-WITHFP-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-NEXT: mv a1, zero +; ILP32E-WITHFP-NEXT: mv a3, zero +; ILP32E-WITHFP-NEXT: mv a5, zero +; ILP32E-WITHFP-NEXT: call callee_float_on_stack@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_float_on_stack: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -24 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 24 +; RV32D-ILP32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 24 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: lui a0, 264704 +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: sw zero, 4(sp) +; RV32D-ILP32E-NEXT: addi a1, zero, 4 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a2, zero, 2 +; RV32D-ILP32E-NEXT: addi a4, zero, 3 +; RV32D-ILP32E-NEXT: sw a1, 0(sp) +; RV32D-ILP32E-NEXT: mv a1, zero +; RV32D-ILP32E-NEXT: mv a3, zero +; RV32D-ILP32E-NEXT: mv a5, zero +; RV32D-ILP32E-NEXT: call callee_float_on_stack@plt +; RV32D-ILP32E-NEXT: addi sp, s0, -24 +; RV32D-ILP32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 24 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0) + ret i32 %1 +} + +define float @callee_tiny_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a0, 260096 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a0, 260096 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_tiny_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: lui a0, 260096 +; RV32D-ILP32E-NEXT: ret + ret float 1.0 +} + +define i32 @caller_tiny_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_tiny_scalar_ret@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_tiny_scalar_ret@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_tiny_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -4 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 4 +; RV32D-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: call callee_tiny_scalar_ret@plt +; RV32D-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 4 +; RV32D-ILP32E-NEXT: ret + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + ret i32 %2 +} + +; Check that on RV32 ilp32e, double is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_double_in_regs(i32 %a, double %b) { +; ILP32E-FPELIM-LABEL: callee_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: mv a1, a2 +; ILP32E-FPELIM-NEXT: call __fixdfsi@plt +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: mv a1, a2 +; ILP32E-WITHFP-NEXT: call __fixdfsi@plt +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_double_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -16 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 16 +; RV32D-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 16 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: sw a1, 0(sp) +; RV32D-ILP32E-NEXT: sw a2, 4(sp) +; RV32D-ILP32E-NEXT: fld ft0, 0(sp) +; RV32D-ILP32E-NEXT: fcvt.w.d a1, ft0, rtz +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: addi sp, s0, -16 +; RV32D-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 16 +; RV32D-ILP32E-NEXT: ret + %b_fptosi = fptosi double %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_double_in_regs() { +; ILP32E-FPELIM-LABEL: caller_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: lui a2, 262144 +; ILP32E-FPELIM-NEXT: mv a1, zero +; ILP32E-FPELIM-NEXT: call callee_double_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: lui a2, 262144 +; ILP32E-WITHFP-NEXT: mv a1, zero +; ILP32E-WITHFP-NEXT: call callee_double_in_regs@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_double_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -4 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 4 +; RV32D-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: lui a2, 262144 +; RV32D-ILP32E-NEXT: mv a1, zero +; RV32D-ILP32E-NEXT: call callee_double_in_regs@plt +; RV32D-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 4 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_double_in_regs(i32 1, double 2.0) + ret i32 %1 +} + +; Check 2x*xlen values are aligned appropriately when passed on the stack +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %f, i32 %g, i32 %h, double %i, i32 %j, [2 x i32] %k) { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: callee_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a0, 0(a2) +; ILP32E-FPELIM-NEXT: lw a1, 28(s0) +; ILP32E-FPELIM-NEXT: lw a2, 4(s0) +; ILP32E-FPELIM-NEXT: lw a3, 8(s0) +; ILP32E-FPELIM-NEXT: lw a4, 16(s0) +; ILP32E-FPELIM-NEXT: lw a5, 24(s0) +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: add a0, a0, a3 +; ILP32E-FPELIM-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -8 +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a0, 0(a2) +; ILP32E-WITHFP-NEXT: lw a1, 28(s0) +; ILP32E-WITHFP-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-NEXT: lw a4, 16(s0) +; ILP32E-WITHFP-NEXT: lw a5, 24(s0) +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: add a0, a0, a3 +; ILP32E-WITHFP-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -8 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_aligned_stack: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: lw a0, 0(a2) +; RV32D-ILP32E-NEXT: lw a1, 28(s0) +; RV32D-ILP32E-NEXT: lw a2, 4(s0) +; RV32D-ILP32E-NEXT: lw a3, 8(s0) +; RV32D-ILP32E-NEXT: lw a4, 16(s0) +; RV32D-ILP32E-NEXT: lw a5, 24(s0) +; RV32D-ILP32E-NEXT: add a0, a0, a2 +; RV32D-ILP32E-NEXT: add a0, a0, a3 +; RV32D-ILP32E-NEXT: add a0, a0, a4 +; RV32D-ILP32E-NEXT: add a0, a0, a5 +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: addi sp, s0, -8 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %1 = bitcast fp128 %c to i128 + %2 = trunc i128 %1 to i32 + %3 = add i32 %2, %g + %4 = add i32 %3, %h + %5 = bitcast double %i to i64 + %6 = trunc i64 %5 to i32 + %7 = add i32 %4, %6 + %8 = add i32 %7, %j + %9 = extractvalue [2 x i32] %k, 0 + %10 = add i32 %8, %9 + ret i32 %10 +} + +define void @caller_aligned_stack() { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: caller_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -80 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 80 +; ILP32E-FPELIM-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 80 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: addi a0, zero, 18 +; ILP32E-FPELIM-NEXT: sw a0, 32(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 17 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 16 +; ILP32E-FPELIM-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262236 +; ILP32E-FPELIM-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-NEXT: lui a0, 377487 +; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 15 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 14 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 4 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262153 +; ILP32E-FPELIM-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-NEXT: sw a0, 60(sp) +; ILP32E-FPELIM-NEXT: lui a0, 545260 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-NEXT: sw a0, 56(sp) +; ILP32E-FPELIM-NEXT: lui a0, 964690 +; ILP32E-FPELIM-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-NEXT: sw a0, 52(sp) +; ILP32E-FPELIM-NEXT: lui a0, 335544 +; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: lui a0, 688509 +; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 11 +; ILP32E-FPELIM-NEXT: addi a2, sp, 48 +; ILP32E-FPELIM-NEXT: addi a3, zero, 12 +; ILP32E-FPELIM-NEXT: addi a4, zero, 13 +; ILP32E-FPELIM-NEXT: sw a6, 48(sp) +; ILP32E-FPELIM-NEXT: call callee_aligned_stack@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -80 +; ILP32E-FPELIM-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 80 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -80 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 80 +; ILP32E-WITHFP-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 80 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, zero, 18 +; ILP32E-WITHFP-NEXT: sw a0, 32(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 17 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 16 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 15 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 14 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 4 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 60(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 56(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 52(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 11 +; ILP32E-WITHFP-NEXT: addi a2, sp, 48 +; ILP32E-WITHFP-NEXT: addi a3, zero, 12 +; ILP32E-WITHFP-NEXT: addi a4, zero, 13 +; ILP32E-WITHFP-NEXT: sw a6, 48(sp) +; ILP32E-WITHFP-NEXT: call callee_aligned_stack@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -80 +; ILP32E-WITHFP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 80 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_aligned_stack: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -80 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 80 +; RV32D-ILP32E-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 80 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -16 +; RV32D-ILP32E-NEXT: lui a0, 262236 +; RV32D-ILP32E-NEXT: addi a0, a0, 655 +; RV32D-ILP32E-NEXT: sw a0, 20(sp) +; RV32D-ILP32E-NEXT: lui a0, 377487 +; RV32D-ILP32E-NEXT: addi a0, a0, 1475 +; RV32D-ILP32E-NEXT: sw a0, 16(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 18 +; RV32D-ILP32E-NEXT: sw a0, 32(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 17 +; RV32D-ILP32E-NEXT: sw a0, 28(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 16 +; RV32D-ILP32E-NEXT: sw a0, 24(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 15 +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 14 +; RV32D-ILP32E-NEXT: sw a0, 4(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 4 +; RV32D-ILP32E-NEXT: sw a0, 0(sp) +; RV32D-ILP32E-NEXT: lui a0, 262153 +; RV32D-ILP32E-NEXT: addi a0, a0, 491 +; RV32D-ILP32E-NEXT: sw a0, 60(sp) +; RV32D-ILP32E-NEXT: lui a0, 545260 +; RV32D-ILP32E-NEXT: addi a0, a0, -1967 +; RV32D-ILP32E-NEXT: sw a0, 56(sp) +; RV32D-ILP32E-NEXT: lui a0, 964690 +; RV32D-ILP32E-NEXT: addi a0, a0, -328 +; RV32D-ILP32E-NEXT: sw a0, 52(sp) +; RV32D-ILP32E-NEXT: lui a0, 335544 +; RV32D-ILP32E-NEXT: addi a6, a0, 1311 +; RV32D-ILP32E-NEXT: lui a0, 688509 +; RV32D-ILP32E-NEXT: addi a5, a0, -2048 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 11 +; RV32D-ILP32E-NEXT: addi a2, sp, 48 +; RV32D-ILP32E-NEXT: addi a3, zero, 12 +; RV32D-ILP32E-NEXT: addi a4, zero, 13 +; RV32D-ILP32E-NEXT: sw a6, 48(sp) +; RV32D-ILP32E-NEXT: call callee_aligned_stack@plt +; RV32D-ILP32E-NEXT: addi sp, s0, -80 +; RV32D-ILP32E-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 80 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_aligned_stack(i32 1, i32 11, + fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, + i64 20000000000, i32 14, i32 15, double 2.720000e+00, i32 16, + [2 x i32] [i32 17, i32 18]) + ret void +} + +define double @callee_small_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 261888 +; ILP32E-FPELIM-NEXT: mv a0, zero +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a1, 261888 +; ILP32E-WITHFP-NEXT: mv a0, zero +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_small_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: lui a1, 261888 +; RV32D-ILP32E-NEXT: mv a0, zero +; RV32D-ILP32E-NEXT: ret + ret double 1.0 +} + +define i64 @caller_small_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_small_scalar_ret@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_small_scalar_ret@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_small_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -24 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 24 +; RV32D-ILP32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 24 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: call callee_small_scalar_ret@plt +; RV32D-ILP32E-NEXT: sw a0, 0(sp) +; RV32D-ILP32E-NEXT: sw a1, 4(sp) +; RV32D-ILP32E-NEXT: fld ft0, 0(sp) +; RV32D-ILP32E-NEXT: fsd ft0, 8(sp) +; RV32D-ILP32E-NEXT: lw a0, 8(sp) +; RV32D-ILP32E-NEXT: lw a1, 12(sp) +; RV32D-ILP32E-NEXT: addi sp, s0, -24 +; RV32D-ILP32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 24 +; RV32D-ILP32E-NEXT: ret + %1 = call double @callee_small_scalar_ret() + %2 = bitcast double %1 to i64 + ret i64 %2 +} + +; Check that on RV32, i64 is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_i64_in_regs(i32 %a, i64 %b) { +; ILP32E-FPELIM-LABEL: callee_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_i64_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: ret + %b_trunc = trunc i64 %b to i32 + %1 = add i32 %a, %b_trunc + ret i32 %1 +} + +define i32 @caller_i64_in_regs() { +; ILP32E-FPELIM-LABEL: caller_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: mv a2, zero +; ILP32E-FPELIM-NEXT: call callee_i64_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: call callee_i64_in_regs@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_i64_in_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -4 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 4 +; RV32D-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: mv a2, zero +; RV32D-ILP32E-NEXT: call callee_i64_in_regs@plt +; RV32D-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 4 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_i64_in_regs(i32 1, i64 2) + ret i32 %1 +} + +; Check that the stack is used once the GPRs are exhausted + +define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) { +; ILP32E-FPELIM-LABEL: callee_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: lw a6, 16(s0) +; ILP32E-FPELIM-NEXT: lw a7, 0(s0) +; ILP32E-FPELIM-NEXT: lw t0, 8(s0) +; ILP32E-FPELIM-NEXT: lw t1, 12(s0) +; ILP32E-FPELIM-NEXT: andi t2, a0, 255 +; ILP32E-FPELIM-NEXT: lui a0, 16 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1 +; ILP32E-FPELIM-NEXT: and a0, a1, a0 +; ILP32E-FPELIM-NEXT: add a0, t2, a0 +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: xor a1, a4, t1 +; ILP32E-FPELIM-NEXT: xor a2, a3, t0 +; ILP32E-FPELIM-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-NEXT: seqz a1, a1 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a7 +; ILP32E-FPELIM-NEXT: add a0, a0, a6 +; ILP32E-FPELIM-NEXT: addi sp, s0, -8 +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: lw a6, 16(s0) +; ILP32E-WITHFP-NEXT: lw a7, 0(s0) +; ILP32E-WITHFP-NEXT: lw t0, 8(s0) +; ILP32E-WITHFP-NEXT: lw t1, 12(s0) +; ILP32E-WITHFP-NEXT: andi t2, a0, 255 +; ILP32E-WITHFP-NEXT: lui a0, 16 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1 +; ILP32E-WITHFP-NEXT: and a0, a1, a0 +; ILP32E-WITHFP-NEXT: add a0, t2, a0 +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: xor a1, a4, t1 +; ILP32E-WITHFP-NEXT: xor a2, a3, t0 +; ILP32E-WITHFP-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-NEXT: seqz a1, a1 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a7 +; ILP32E-WITHFP-NEXT: add a0, a0, a6 +; ILP32E-WITHFP-NEXT: addi sp, s0, -8 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_many_scalars: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -8 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32D-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 8 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: lw a6, 16(s0) +; RV32D-ILP32E-NEXT: lw a7, 0(s0) +; RV32D-ILP32E-NEXT: lw t0, 8(s0) +; RV32D-ILP32E-NEXT: lw t1, 12(s0) +; RV32D-ILP32E-NEXT: andi t2, a0, 255 +; RV32D-ILP32E-NEXT: lui a0, 16 +; RV32D-ILP32E-NEXT: addi a0, a0, -1 +; RV32D-ILP32E-NEXT: and a0, a1, a0 +; RV32D-ILP32E-NEXT: add a0, t2, a0 +; RV32D-ILP32E-NEXT: add a0, a0, a2 +; RV32D-ILP32E-NEXT: xor a1, a4, t1 +; RV32D-ILP32E-NEXT: xor a2, a3, t0 +; RV32D-ILP32E-NEXT: or a1, a2, a1 +; RV32D-ILP32E-NEXT: seqz a1, a1 +; RV32D-ILP32E-NEXT: add a0, a1, a0 +; RV32D-ILP32E-NEXT: add a0, a0, a5 +; RV32D-ILP32E-NEXT: add a0, a0, a7 +; RV32D-ILP32E-NEXT: add a0, a0, a6 +; RV32D-ILP32E-NEXT: addi sp, s0, -8 +; RV32D-ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 8 +; RV32D-ILP32E-NEXT: ret + %a_ext = zext i8 %a to i32 + %b_ext = zext i16 %b to i32 + %1 = add i32 %a_ext, %b_ext + %2 = add i32 %1, %c + %3 = icmp eq i64 %d, %g + %4 = zext i1 %3 to i32 + %5 = add i32 %4, %2 + %6 = add i32 %5, %e + %7 = add i32 %6, %f + %8 = add i32 %7, %h + ret i32 %8 +} + +define i32 @caller_many_scalars() { +; ILP32E-FPELIM-LABEL: caller_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: addi a0, zero, 8 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 7 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a4, zero, 6 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: addi a2, zero, 3 +; ILP32E-FPELIM-NEXT: addi a3, zero, 4 +; ILP32E-FPELIM-NEXT: addi a5, zero, 5 +; ILP32E-FPELIM-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-NEXT: mv a4, zero +; ILP32E-FPELIM-NEXT: call callee_many_scalars@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 8 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 12(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 7 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a4, zero, 6 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: addi a2, zero, 3 +; ILP32E-WITHFP-NEXT: addi a3, zero, 4 +; ILP32E-WITHFP-NEXT: addi a5, zero, 5 +; ILP32E-WITHFP-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-NEXT: mv a4, zero +; ILP32E-WITHFP-NEXT: call callee_many_scalars@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_many_scalars: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -32 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 32 +; RV32D-ILP32E-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 32 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: addi a0, zero, 8 +; RV32D-ILP32E-NEXT: sw a0, 16(sp) +; RV32D-ILP32E-NEXT: sw zero, 12(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 7 +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: addi a4, zero, 6 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: addi a2, zero, 3 +; RV32D-ILP32E-NEXT: addi a3, zero, 4 +; RV32D-ILP32E-NEXT: addi a5, zero, 5 +; RV32D-ILP32E-NEXT: sw a4, 0(sp) +; RV32D-ILP32E-NEXT: mv a4, zero +; RV32D-ILP32E-NEXT: call callee_many_scalars@plt +; RV32D-ILP32E-NEXT: addi sp, s0, -32 +; RV32D-ILP32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 32 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i32 5, i32 6, i64 7, i32 8) + ret i32 %1 +} + +; Check that i128 and fp128 are passed indirectly + +define i32 @callee_large_scalars(i128 %a, fp128 %b) { +; ILP32E-FPELIM-LABEL: callee_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a6, 0(a1) +; ILP32E-FPELIM-NEXT: lw a7, 0(a0) +; ILP32E-FPELIM-NEXT: lw a4, 4(a1) +; ILP32E-FPELIM-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-NEXT: lw a2, 12(a0) +; ILP32E-FPELIM-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: xor a2, a2, a5 +; ILP32E-FPELIM-NEXT: xor a3, a3, a4 +; ILP32E-FPELIM-NEXT: or a2, a3, a2 +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a1, a7, a6 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a2 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a6, 0(a1) +; ILP32E-WITHFP-NEXT: lw a7, 0(a0) +; ILP32E-WITHFP-NEXT: lw a4, 4(a1) +; ILP32E-WITHFP-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-NEXT: lw a2, 12(a0) +; ILP32E-WITHFP-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: xor a2, a2, a5 +; ILP32E-WITHFP-NEXT: xor a3, a3, a4 +; ILP32E-WITHFP-NEXT: or a2, a3, a2 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a1, a7, a6 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a2 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_scalars: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: lw a6, 0(a1) +; RV32D-ILP32E-NEXT: lw a7, 0(a0) +; RV32D-ILP32E-NEXT: lw a4, 4(a1) +; RV32D-ILP32E-NEXT: lw a5, 12(a1) +; RV32D-ILP32E-NEXT: lw a2, 12(a0) +; RV32D-ILP32E-NEXT: lw a3, 4(a0) +; RV32D-ILP32E-NEXT: lw a1, 8(a1) +; RV32D-ILP32E-NEXT: lw a0, 8(a0) +; RV32D-ILP32E-NEXT: xor a2, a2, a5 +; RV32D-ILP32E-NEXT: xor a3, a3, a4 +; RV32D-ILP32E-NEXT: or a2, a3, a2 +; RV32D-ILP32E-NEXT: xor a0, a0, a1 +; RV32D-ILP32E-NEXT: xor a1, a7, a6 +; RV32D-ILP32E-NEXT: or a0, a1, a0 +; RV32D-ILP32E-NEXT: or a0, a0, a2 +; RV32D-ILP32E-NEXT: seqz a0, a0 +; RV32D-ILP32E-NEXT: ret + %b_bitcast = bitcast fp128 %b to i128 + %1 = icmp eq i128 %a, %b_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars() { +; ILP32E-FPELIM-LABEL: caller_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 48 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: sw zero, 0(sp) +; ILP32E-FPELIM-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-NEXT: sw zero, 32(sp) +; ILP32E-FPELIM-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-NEXT: addi a2, zero, 1 +; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: mv a1, sp +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -48 +; ILP32E-FPELIM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: sw zero, 0(sp) +; ILP32E-WITHFP-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-NEXT: sw zero, 32(sp) +; ILP32E-WITHFP-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-NEXT: addi a2, zero, 1 +; ILP32E-WITHFP-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-NEXT: mv a1, sp +; ILP32E-WITHFP-NEXT: sw a2, 24(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -48 +; ILP32E-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_scalars: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -48 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 48 +; RV32D-ILP32E-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 48 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -16 +; RV32D-ILP32E-NEXT: lui a0, 524272 +; RV32D-ILP32E-NEXT: sw a0, 12(sp) +; RV32D-ILP32E-NEXT: sw zero, 8(sp) +; RV32D-ILP32E-NEXT: sw zero, 4(sp) +; RV32D-ILP32E-NEXT: sw zero, 0(sp) +; RV32D-ILP32E-NEXT: sw zero, 36(sp) +; RV32D-ILP32E-NEXT: sw zero, 32(sp) +; RV32D-ILP32E-NEXT: sw zero, 28(sp) +; RV32D-ILP32E-NEXT: addi a2, zero, 1 +; RV32D-ILP32E-NEXT: addi a0, sp, 24 +; RV32D-ILP32E-NEXT: mv a1, sp +; RV32D-ILP32E-NEXT: sw a2, 24(sp) +; RV32D-ILP32E-NEXT: call callee_large_scalars@plt +; RV32D-ILP32E-NEXT: addi sp, s0, -48 +; RV32D-ILP32E-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 48 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Check that arguments larger than 2*xlen are handled correctly when their +; address is passed on the stack rather than in memory + +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) { +; ILP32E-FPELIM-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a6, 0(a0) +; ILP32E-FPELIM-NEXT: lw a7, 0(a1) +; ILP32E-FPELIM-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-NEXT: lw a2, 12(a1) +; ILP32E-FPELIM-NEXT: lw a3, 4(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: xor a2, a2, a5 +; ILP32E-FPELIM-NEXT: xor a3, a3, a4 +; ILP32E-FPELIM-NEXT: or a2, a3, a2 +; ILP32E-FPELIM-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-NEXT: xor a1, a7, a6 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a2 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a0, 12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-NEXT: lw a6, 0(a0) +; ILP32E-WITHFP-NEXT: lw a7, 0(a1) +; ILP32E-WITHFP-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-NEXT: lw a2, 12(a1) +; ILP32E-WITHFP-NEXT: lw a3, 4(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: xor a2, a2, a5 +; ILP32E-WITHFP-NEXT: xor a3, a3, a4 +; ILP32E-WITHFP-NEXT: or a2, a3, a2 +; ILP32E-WITHFP-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-NEXT: xor a1, a7, a6 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a2 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_scalars_exhausted_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: lw a0, 12(sp) +; RV32D-ILP32E-NEXT: lw a1, 4(sp) +; RV32D-ILP32E-NEXT: lw a6, 0(a0) +; RV32D-ILP32E-NEXT: lw a7, 0(a1) +; RV32D-ILP32E-NEXT: lw a4, 4(a0) +; RV32D-ILP32E-NEXT: lw a5, 12(a0) +; RV32D-ILP32E-NEXT: lw a2, 12(a1) +; RV32D-ILP32E-NEXT: lw a3, 4(a1) +; RV32D-ILP32E-NEXT: lw a0, 8(a0) +; RV32D-ILP32E-NEXT: lw a1, 8(a1) +; RV32D-ILP32E-NEXT: xor a2, a2, a5 +; RV32D-ILP32E-NEXT: xor a3, a3, a4 +; RV32D-ILP32E-NEXT: or a2, a3, a2 +; RV32D-ILP32E-NEXT: xor a0, a1, a0 +; RV32D-ILP32E-NEXT: xor a1, a7, a6 +; RV32D-ILP32E-NEXT: or a0, a1, a0 +; RV32D-ILP32E-NEXT: or a0, a0, a2 +; RV32D-ILP32E-NEXT: seqz a0, a0 +; RV32D-ILP32E-NEXT: ret + %j_bitcast = bitcast fp128 %j to i128 + %1 = icmp eq i128 %h, %j_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars_exhausted_regs() { +; ILP32E-FPELIM-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 9 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 40 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: addi a0, zero, 7 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 52(sp) +; ILP32E-FPELIM-NEXT: sw zero, 48(sp) +; ILP32E-FPELIM-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-NEXT: addi a6, zero, 8 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: addi a2, zero, 3 +; ILP32E-FPELIM-NEXT: addi a3, zero, 4 +; ILP32E-FPELIM-NEXT: addi a4, zero, 5 +; ILP32E-FPELIM-NEXT: addi a5, zero, 6 +; ILP32E-FPELIM-NEXT: sw a6, 40(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 9 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, sp, 40 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 7 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 52(sp) +; ILP32E-WITHFP-NEXT: sw zero, 48(sp) +; ILP32E-WITHFP-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-NEXT: addi a6, zero, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: addi a2, zero, 3 +; ILP32E-WITHFP-NEXT: addi a3, zero, 4 +; ILP32E-WITHFP-NEXT: addi a4, zero, 5 +; ILP32E-WITHFP-NEXT: addi a5, zero, 6 +; ILP32E-WITHFP-NEXT: sw a6, 40(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_scalars_exhausted_regs: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -64 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32D-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 64 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -16 +; RV32D-ILP32E-NEXT: addi a0, sp, 16 +; RV32D-ILP32E-NEXT: sw a0, 12(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 9 +; RV32D-ILP32E-NEXT: sw a0, 8(sp) +; RV32D-ILP32E-NEXT: addi a0, sp, 40 +; RV32D-ILP32E-NEXT: sw a0, 4(sp) +; RV32D-ILP32E-NEXT: addi a0, zero, 7 +; RV32D-ILP32E-NEXT: sw a0, 0(sp) +; RV32D-ILP32E-NEXT: lui a0, 524272 +; RV32D-ILP32E-NEXT: sw a0, 28(sp) +; RV32D-ILP32E-NEXT: sw zero, 24(sp) +; RV32D-ILP32E-NEXT: sw zero, 20(sp) +; RV32D-ILP32E-NEXT: sw zero, 16(sp) +; RV32D-ILP32E-NEXT: sw zero, 52(sp) +; RV32D-ILP32E-NEXT: sw zero, 48(sp) +; RV32D-ILP32E-NEXT: sw zero, 44(sp) +; RV32D-ILP32E-NEXT: addi a6, zero, 8 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: addi a2, zero, 3 +; RV32D-ILP32E-NEXT: addi a3, zero, 4 +; RV32D-ILP32E-NEXT: addi a4, zero, 5 +; RV32D-ILP32E-NEXT: addi a5, zero, 6 +; RV32D-ILP32E-NEXT: sw a6, 40(sp) +; RV32D-ILP32E-NEXT: call callee_large_scalars_exhausted_regs@plt +; RV32D-ILP32E-NEXT: addi sp, s0, -64 +; RV32D-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 64 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_large_scalars_exhausted_regs( + i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, + fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Ensure that libcalls generated in the middle-end obey the calling convention + +define i32 @caller_mixed_scalar_libcalls(i64 %a) { +; ILP32E-FPELIM-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a2, a1 +; ILP32E-FPELIM-NEXT: mv a1, a0 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call __floatditf@plt +; ILP32E-FPELIM-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a2, a1 +; ILP32E-WITHFP-NEXT: mv a1, a0 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call __floatditf@plt +; ILP32E-WITHFP-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_mixed_scalar_libcalls: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -24 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 24 +; RV32D-ILP32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 24 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: mv a2, a1 +; RV32D-ILP32E-NEXT: mv a1, a0 +; RV32D-ILP32E-NEXT: mv a0, sp +; RV32D-ILP32E-NEXT: call __floatditf@plt +; RV32D-ILP32E-NEXT: lw a0, 0(sp) +; RV32D-ILP32E-NEXT: addi sp, s0, -24 +; RV32D-ILP32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 24 +; RV32D-ILP32E-NEXT: ret + %1 = sitofp i64 %a to fp128 + %2 = bitcast fp128 %1 to i128 + %3 = trunc i128 %2 to i32 + ret i32 %3 +} + + +; Check passing of coerced integer arrays + +%struct.small = type { i32, i32* } + +define i32 @callee_small_coerced_struct([2 x i32] %a.coerce) { +; ILP32E-FPELIM-LABEL: callee_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_small_coerced_struct: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: xor a0, a0, a1 +; RV32D-ILP32E-NEXT: seqz a0, a0 +; RV32D-ILP32E-NEXT: ret + %1 = extractvalue [2 x i32] %a.coerce, 0 + %2 = extractvalue [2 x i32] %a.coerce, 1 + %3 = icmp eq i32 %1, %2 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +define i32 @caller_small_coerced_struct() { +; ILP32E-FPELIM-LABEL: caller_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: call callee_small_coerced_struct@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: call callee_small_coerced_struct@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_small_coerced_struct: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -4 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 4 +; RV32D-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: call callee_small_coerced_struct@plt +; RV32D-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 4 +; RV32D-ILP32E-NEXT: ret + %1 = call i32 @callee_small_coerced_struct([2 x i32] [i32 1, i32 2]) + ret i32 %1 +} + +; Check large struct arguments, which are passed byval + +%struct.large = type { i32, i32, i32, i32 } + +define i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %a) { +; ILP32E-FPELIM-LABEL: callee_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_struct: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: lw a1, 0(a0) +; RV32D-ILP32E-NEXT: lw a0, 12(a0) +; RV32D-ILP32E-NEXT: add a0, a1, a0 +; RV32D-ILP32E-NEXT: ret + %1 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 0 + %2 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 3 + %3 = load i32, i32* %1 + %4 = load i32, i32* %2 + %5 = add i32 %3, %4 + ret i32 %5 +} + +define i32 @caller_large_struct() { +; ILP32E-FPELIM-LABEL: caller_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -36 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-FPELIM-NEXT: sw ra, 32(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-NEXT: addi a2, zero, 3 +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: addi a3, zero, 4 +; ILP32E-FPELIM-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: sw a2, 8(sp) +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_struct@plt +; ILP32E-FPELIM-NEXT: lw ra, 32(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 36 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-NEXT: sw ra, 36(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 32(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: sw a0, -24(s0) +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: sw a1, -20(s0) +; ILP32E-WITHFP-NEXT: addi a2, zero, 3 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: addi a3, zero, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: sw a0, -40(s0) +; ILP32E-WITHFP-NEXT: sw a1, -36(s0) +; ILP32E-WITHFP-NEXT: sw a2, -32(s0) +; ILP32E-WITHFP-NEXT: sw a3, -28(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, -40 +; ILP32E-WITHFP-NEXT: call callee_large_struct@plt +; ILP32E-WITHFP-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 36(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_struct: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -36 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 36 +; RV32D-ILP32E-NEXT: sw ra, 32(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: sw a0, 16(sp) +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: sw a1, 20(sp) +; RV32D-ILP32E-NEXT: addi a2, zero, 3 +; RV32D-ILP32E-NEXT: sw a2, 24(sp) +; RV32D-ILP32E-NEXT: addi a3, zero, 4 +; RV32D-ILP32E-NEXT: sw a3, 28(sp) +; RV32D-ILP32E-NEXT: sw a0, 0(sp) +; RV32D-ILP32E-NEXT: sw a1, 4(sp) +; RV32D-ILP32E-NEXT: sw a2, 8(sp) +; RV32D-ILP32E-NEXT: sw a3, 12(sp) +; RV32D-ILP32E-NEXT: mv a0, sp +; RV32D-ILP32E-NEXT: call callee_large_struct@plt +; RV32D-ILP32E-NEXT: lw ra, 32(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 36 +; RV32D-ILP32E-NEXT: ret + %ls = alloca %struct.large, align 4 + %1 = bitcast %struct.large* %ls to i8* + %a = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 0 + store i32 1, i32* %a + %b = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 1 + store i32 2, i32* %b + %c = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 2 + store i32 3, i32* %c + %d = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 3 + store i32 4, i32* %d + %2 = call i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %ls) + ret i32 %2 +} + +; Check return of 2x xlen structs + +define %struct.small @callee_small_struct_ret() { +; ILP32E-FPELIM-LABEL: callee_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi a0, zero, 1 +; ILP32E-FPELIM-NEXT: mv a1, zero +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: mv a1, zero +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_small_struct_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi a0, zero, 1 +; RV32D-ILP32E-NEXT: mv a1, zero +; RV32D-ILP32E-NEXT: ret + ret %struct.small { i32 1, i32* null } +} + +define i32 @caller_small_struct_ret() { +; ILP32E-FPELIM-LABEL: caller_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_small_struct_ret@plt +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_small_struct_ret@plt +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_small_struct_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -4 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 4 +; RV32D-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: call callee_small_struct_ret@plt +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 4 +; RV32D-ILP32E-NEXT: ret + %1 = call %struct.small @callee_small_struct_ret() + %2 = extractvalue %struct.small %1, 0 + %3 = extractvalue %struct.small %1, 1 + %4 = ptrtoint i32* %3 to i32 + %5 = add i32 %2, %4 + ret i32 %5 +} + +; Check return of >2x xlen scalars + +define fp128 @callee_large_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 524272 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a1, 524272 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: lui a1, 524272 +; RV32D-ILP32E-NEXT: sw a1, 12(a0) +; RV32D-ILP32E-NEXT: sw zero, 8(a0) +; RV32D-ILP32E-NEXT: sw zero, 4(a0) +; RV32D-ILP32E-NEXT: sw zero, 0(a0) +; RV32D-ILP32E-NEXT: ret + ret fp128 0xL00000000000000007FFF000000000000 +} + +define void @caller_large_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_scalar_ret@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call callee_large_scalar_ret@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_scalar_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -32 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 32 +; RV32D-ILP32E-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 32 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -16 +; RV32D-ILP32E-NEXT: mv a0, sp +; RV32D-ILP32E-NEXT: call callee_large_scalar_ret@plt +; RV32D-ILP32E-NEXT: addi sp, s0, -32 +; RV32D-ILP32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 32 +; RV32D-ILP32E-NEXT: ret + %1 = call fp128 @callee_large_scalar_ret() + ret void +} + +; Check return of >2x xlen structs + +define void @callee_large_struct_ret(%struct.large* noalias sret(%struct.large) %agg.result) { +; ILP32E-FPELIM-LABEL: callee_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi a1, zero, 1 +; ILP32E-FPELIM-NEXT: sw a1, 0(a0) +; ILP32E-FPELIM-NEXT: addi a1, zero, 2 +; ILP32E-FPELIM-NEXT: sw a1, 4(a0) +; ILP32E-FPELIM-NEXT: addi a1, zero, 3 +; ILP32E-FPELIM-NEXT: sw a1, 8(a0) +; ILP32E-FPELIM-NEXT: addi a1, zero, 4 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: addi a1, zero, 1 +; ILP32E-WITHFP-NEXT: sw a1, 0(a0) +; ILP32E-WITHFP-NEXT: addi a1, zero, 2 +; ILP32E-WITHFP-NEXT: sw a1, 4(a0) +; ILP32E-WITHFP-NEXT: addi a1, zero, 3 +; ILP32E-WITHFP-NEXT: sw a1, 8(a0) +; ILP32E-WITHFP-NEXT: addi a1, zero, 4 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: callee_large_struct_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi a1, zero, 1 +; RV32D-ILP32E-NEXT: sw a1, 0(a0) +; RV32D-ILP32E-NEXT: addi a1, zero, 2 +; RV32D-ILP32E-NEXT: sw a1, 4(a0) +; RV32D-ILP32E-NEXT: addi a1, zero, 3 +; RV32D-ILP32E-NEXT: sw a1, 8(a0) +; RV32D-ILP32E-NEXT: addi a1, zero, 4 +; RV32D-ILP32E-NEXT: sw a1, 12(a0) +; RV32D-ILP32E-NEXT: ret + %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 0 + store i32 1, i32* %a, align 4 + %b = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 1 + store i32 2, i32* %b, align 4 + %c = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 2 + store i32 3, i32* %c, align 4 + %d = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 3 + store i32 4, i32* %d, align 4 + ret void +} + +define i32 @caller_large_struct_ret() { +; ILP32E-FPELIM-LABEL: caller_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_struct_ret@plt +; ILP32E-FPELIM-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call callee_large_struct_ret@plt +; ILP32E-WITHFP-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lw a1, 12(sp) +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; RV32D-ILP32E-LABEL: caller_large_struct_ret: +; RV32D-ILP32E: # %bb.0: +; RV32D-ILP32E-NEXT: addi sp, sp, -24 +; RV32D-ILP32E-NEXT: .cfi_def_cfa_offset 24 +; RV32D-ILP32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32D-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32D-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32D-ILP32E-NEXT: addi s0, sp, 24 +; RV32D-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32D-ILP32E-NEXT: andi sp, sp, -8 +; RV32D-ILP32E-NEXT: mv a0, sp +; RV32D-ILP32E-NEXT: call callee_large_struct_ret@plt +; RV32D-ILP32E-NEXT: lw a0, 0(sp) +; RV32D-ILP32E-NEXT: lw a1, 12(sp) +; RV32D-ILP32E-NEXT: add a0, a0, a1 +; RV32D-ILP32E-NEXT: addi sp, s0, -24 +; RV32D-ILP32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32D-ILP32E-NEXT: addi sp, sp, 24 +; RV32D-ILP32E-NEXT: ret + %1 = alloca %struct.large + call void @callee_large_struct_ret(%struct.large* sret(%struct.large) %1) + %2 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 0 + %3 = load i32, i32* %2 + %4 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 3 + %5 = load i32, i32* %4 + %6 = add i32 %3, %5 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32IF +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IF-ILP32E ; Exercises the ILP32 calling convention code in the case that f32 is a legal ; type. As well as testing that lowering is correct, these tests also aim to @@ -12,6 +14,20 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: lw a0, 4(sp) ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -8 +; RV32IF-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: addi s0, sp, 8 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: lw a0, 12(s0) +; RV32IF-ILP32E-NEXT: addi sp, s0, -8 +; RV32IF-ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 8 +; RV32IF-ILP32E-NEXT: ret ret float %f } @@ -23,6 +39,23 @@ ; RV32IF-NEXT: fadd.s ft0, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -8 +; RV32IF-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: addi s0, sp, 8 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: flw ft0, 12(s0) +; RV32IF-ILP32E-NEXT: flw ft1, 8(s0) +; RV32IF-ILP32E-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-ILP32E-NEXT: fmv.x.w a0, ft0 +; RV32IF-ILP32E-NEXT: addi sp, s0, -8 +; RV32IF-ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 8 +; RV32IF-ILP32E-NEXT: ret %1 = fadd float %e, %f ret float %1 } @@ -47,6 +80,32 @@ ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: caller_onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -24 +; RV32IF-ILP32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: addi s0, sp, 24 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: sw a0, 12(sp) +; RV32IF-ILP32E-NEXT: lui a0, 264704 +; RV32IF-ILP32E-NEXT: sw a0, 8(sp) +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: addi a1, zero, 4 +; RV32IF-ILP32E-NEXT: addi a0, zero, 1 +; RV32IF-ILP32E-NEXT: addi a2, zero, 2 +; RV32IF-ILP32E-NEXT: addi a4, zero, 3 +; RV32IF-ILP32E-NEXT: sw a1, 0(sp) +; RV32IF-ILP32E-NEXT: mv a1, zero +; RV32IF-ILP32E-NEXT: mv a3, zero +; RV32IF-ILP32E-NEXT: mv a5, zero +; RV32IF-ILP32E-NEXT: call onstack_f32_noop@plt +; RV32IF-ILP32E-NEXT: addi sp, s0, -24 +; RV32IF-ILP32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 24 +; RV32IF-ILP32E-NEXT: ret %1 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float 5.0, float %a) ret float %1 } @@ -74,6 +133,35 @@ ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret +; +; RV32IF-ILP32E-LABEL: caller_onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -24 +; RV32IF-ILP32E-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: addi s0, sp, 24 +; RV32IF-ILP32E-NEXT: andi sp, sp, -8 +; RV32IF-ILP32E-NEXT: fmv.w.x ft0, a1 +; RV32IF-ILP32E-NEXT: fmv.w.x ft1, a0 +; RV32IF-ILP32E-NEXT: fadd.s ft2, ft1, ft0 +; RV32IF-ILP32E-NEXT: fsub.s ft0, ft0, ft1 +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: addi a0, zero, 4 +; RV32IF-ILP32E-NEXT: sw a0, 0(sp) +; RV32IF-ILP32E-NEXT: fsw ft0, 12(sp) +; RV32IF-ILP32E-NEXT: addi a0, zero, 1 +; RV32IF-ILP32E-NEXT: addi a2, zero, 2 +; RV32IF-ILP32E-NEXT: addi a4, zero, 3 +; RV32IF-ILP32E-NEXT: fsw ft2, 8(sp) +; RV32IF-ILP32E-NEXT: mv a1, zero +; RV32IF-ILP32E-NEXT: mv a3, zero +; RV32IF-ILP32E-NEXT: mv a5, zero +; RV32IF-ILP32E-NEXT: call onstack_f32_noop@plt +; RV32IF-ILP32E-NEXT: addi sp, s0, -24 +; RV32IF-ILP32E-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 24 +; RV32IF-ILP32E-NEXT: ret %1 = fadd float %a, %b %2 = fsub float %b, %a %3 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float %1, float %2) diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I @@ -34,6 +36,33 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: .cfi_offset s1, -12 +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv s1, sp +; RV32I-ILP32E-NEXT: addi a0, a0, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: sub a0, sp, a0 +; RV32I-ILP32E-NEXT: mv sp, a0 +; RV32I-ILP32E-NEXT: mv a1, s1 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll @@ -1,11 +1,102 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: | FileCheck %s -check-prefixes=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I declare void @callee(i8*) +define void @caller16() { +; RV32I-LABEL: caller16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-ILP32E-LABEL: caller16: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -16 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 16 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 16 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -16 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -16 +; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 16 +; RV32I-ILP32E-NEXT: ret +; +; RV64I-LABEL: caller16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 16 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign16() "no-realign-stack" { +; RV32I-LABEL: caller_no_realign16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-ILP32E-LABEL: caller_no_realign16: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; +; RV64I-LABEL: caller_no_realign16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 16 + call void @callee(i8* %1) + ret void +} + define void @caller32() { ; RV32I-LABEL: caller32: ; RV32I: # %bb.0: @@ -26,6 +117,25 @@ ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -32 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 32 +; RV32I-ILP32E-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 32 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -32 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -32 +; RV32I-ILP32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 32 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -62,6 +172,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -98,6 +220,25 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 @@ -134,6 +275,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -170,6 +323,25 @@ ; RV32I-NEXT: addi sp, sp, 128 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 128 +; RV32I-ILP32E-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -128 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -128 +; RV32I-ILP32E-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 128 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -128 @@ -206,6 +378,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -242,6 +426,25 @@ ; RV32I-NEXT: addi sp, sp, 256 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -256 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 256 +; RV32I-ILP32E-NEXT: sw ra, 252(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 248(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 256 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -256 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -256 +; RV32I-ILP32E-NEXT: lw s0, 248(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 252(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 256 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -256 @@ -278,6 +481,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -314,6 +529,25 @@ ; RV32I-NEXT: addi sp, sp, 1024 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -1024 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 1024 +; RV32I-ILP32E-NEXT: sw ra, 1020(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 1016(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 1024 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -512 +; RV32I-ILP32E-NEXT: addi a0, sp, 512 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -1024 +; RV32I-ILP32E-NEXT: lw s0, 1016(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 1020(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 1024 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -1024 @@ -350,6 +584,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -388,6 +634,27 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: andi sp, sp, -1024 +; RV32I-ILP32E-NEXT: addi a0, sp, 1024 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -2048 +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -426,6 +693,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -472,6 +751,35 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: andi sp, sp, -2048 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, -2048 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: mv a0, a0 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: add sp, sp, a0 +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -518,6 +826,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -564,6 +884,35 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: srli a0, sp, 12 +; RV32I-ILP32E-NEXT: slli sp, a0, 12 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: mv a0, a0 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: add sp, sp, a0 +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -610,6 +959,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 diff --git a/llvm/test/CodeGen/RISCV/target-abi-valid.ll b/llvm/test/CodeGen/RISCV/target-abi-valid.ll --- a/llvm/test/CodeGen/RISCV/target-abi-valid.ll +++ b/llvm/test/CodeGen/RISCV/target-abi-valid.ll @@ -2,6 +2,8 @@ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32 < %s \ @@ -33,8 +35,3 @@ ; CHECK-IMP-NEXT: ret ret void } - -; RUN: not --crash llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ -; RUN: | FileCheck -check-prefix=CHECK-UNIMP %s - -; CHECK-UNIMP: LLVM ERROR: Don't know how to lower this ABI diff --git a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare void @abort() + +define i32 @caller(i32 %a) { +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: # %entry +; ILP32E-NEXT: addi sp, sp, -8 +; ILP32E-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-NEXT: .cfi_offset ra, -4 +; ILP32E-NEXT: .cfi_offset s0, -8 +; ILP32E-NEXT: mv s0, a0 +; ILP32E-NEXT: addi a0, zero, 1 +; ILP32E-NEXT: lui a3, 262144 +; ILP32E-NEXT: mv a2, zero +; ILP32E-NEXT: call va_double@plt +; ILP32E-NEXT: mv a0, s0 +; ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 8 +; ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller: +; ILP32E-WITHFP: # %bb.0: # %entry +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: lui a3, 262144 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: call va_double@plt +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +entry: + call void (i32, ...) @va_double(i32 1, double 2.000000e+00) + ret i32 %a +} + +define void @va_double(i32 %n, ...) { +; ILP32E-LABEL: va_double: +; ILP32E: # %bb.0: # %entry +; ILP32E-NEXT: addi sp, sp, -32 +; ILP32E-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: .cfi_offset ra, -28 +; ILP32E-NEXT: sw a5, 28(sp) +; ILP32E-NEXT: sw a4, 24(sp) +; ILP32E-NEXT: sw a3, 20(sp) +; ILP32E-NEXT: sw a2, 16(sp) +; ILP32E-NEXT: sw a1, 12(sp) +; ILP32E-NEXT: addi a0, sp, 19 +; ILP32E-NEXT: andi a1, a0, -8 +; ILP32E-NEXT: addi a0, a1, 8 +; ILP32E-NEXT: sw a0, 0(sp) +; ILP32E-NEXT: lw a0, 0(a1) +; ILP32E-NEXT: ori a1, a1, 4 +; ILP32E-NEXT: lw a1, 0(a1) +; ILP32E-NEXT: lui a3, 262144 +; ILP32E-NEXT: mv a2, zero +; ILP32E-NEXT: call __eqdf2@plt +; ILP32E-NEXT: bnez a0, .LBB1_2 +; ILP32E-NEXT: # %bb.1: # %if.end +; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 32 +; ILP32E-NEXT: ret +; ILP32E-NEXT: .LBB1_2: # %if.then +; ILP32E-NEXT: call abort@plt +; +; ILP32E-WITHFP-LABEL: va_double: +; ILP32E-WITHFP: # %bb.0: # %entry +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, a1, 8 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: ori a1, a1, 4 +; ILP32E-WITHFP-NEXT: lw a1, 0(a1) +; ILP32E-WITHFP-NEXT: lui a3, 262144 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: call __eqdf2@plt +; ILP32E-WITHFP-NEXT: bnez a0, .LBB1_2 +; ILP32E-WITHFP-NEXT: # %bb.1: # %if.end +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ILP32E-WITHFP-NEXT: .LBB1_2: # %if.then +; ILP32E-WITHFP-NEXT: call abort@plt +entry: + %args = alloca i8*, align 4 + %args1 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %args1) + %argp.cur = load i8*, i8** %args, align 4 + %0 = ptrtoint i8* %argp.cur to i32 + %1 = add i32 %0, 7 + %2 = and i32 %1, -8 + %argp.cur.aligned = inttoptr i32 %2 to i8* + %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 8 + store i8* %argp.next, i8** %args, align 4 + %3 = bitcast i8* %argp.cur.aligned to double* + %4 = load double, double* %3, align 8 + %cmp = fcmp une double %4, 2.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + call void @abort() + unreachable + +if.end: + %args2 = bitcast i8** %args to i8* + call void @llvm.va_end(i8* %args2) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -11,6 +11,10 @@ ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32D-ILP32-ILP32F-ILP32D-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I-ILP32E %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64f \ @@ -97,6 +101,44 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 28 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 12 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -204,6 +246,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 12 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -341,6 +416,62 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_va_arg_alloca: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -40 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi s0, sp, 16 +; RV32I-ILP32E-NEXT: mv s1, a1 +; RV32I-ILP32E-NEXT: sw a5, 20(s0) +; RV32I-ILP32E-NEXT: sw a4, 16(s0) +; RV32I-ILP32E-NEXT: sw a3, 12(s0) +; RV32I-ILP32E-NEXT: sw a2, 8(s0) +; RV32I-ILP32E-NEXT: sw a1, 4(s0) +; RV32I-ILP32E-NEXT: addi a0, s0, 8 +; RV32I-ILP32E-NEXT: sw a0, -16(s0) +; RV32I-ILP32E-NEXT: addi a0, a1, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: sub a0, sp, a0 +; RV32I-ILP32E-NEXT: mv sp, a0 +; RV32I-ILP32E-NEXT: call notdead@plt +; RV32I-ILP32E-NEXT: mv a0, s1 +; RV32I-ILP32E-NEXT: addi sp, s0, -16 +; RV32I-ILP32E-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 40 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg_alloca: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a1, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: sub a0, sp, a0 +; ILP32E-WITHFP-NEXT: mv sp, a0 +; ILP32E-WITHFP-NEXT: call notdead@plt +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -16 +; ILP32E-WITHFP-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg_alloca: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -461,6 +592,33 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va1_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a3, 261888 +; RV32I-ILP32E-NEXT: addi a4, zero, 2 +; RV32I-ILP32E-NEXT: mv a2, zero +; RV32I-ILP32E-NEXT: call va1@plt +; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a3, 261888 +; ILP32E-WITHFP-NEXT: addi a4, zero, 2 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: call va1@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -560,6 +718,47 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a1, a0, -8 +; RV32I-ILP32E-NEXT: addi a0, sp, 23 +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a1) +; RV32I-ILP32E-NEXT: ori a1, a1, 4 +; RV32I-ILP32E-NEXT: lw a1, 0(a1) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, s0, 19 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: ori a1, a1, 4 +; ILP32E-WITHFP-NEXT: lw a1, 0(a1) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -702,6 +901,49 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a1, a0, -8 +; RV32I-ILP32E-NEXT: ori a2, a1, 4 +; RV32I-ILP32E-NEXT: sw a2, 0(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a1) +; RV32I-ILP32E-NEXT: addi a1, a1, 8 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: lw a1, 0(a2) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: ori a2, a1, 4 +; ILP32E-WITHFP-NEXT: sw a2, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: addi a1, a1, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(a2) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -784,6 +1026,31 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va2_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a3, 261888 +; RV32I-ILP32E-NEXT: mv a2, zero +; RV32I-ILP32E-NEXT: call va2@plt +; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a3, 261888 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: call va2@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -887,6 +1154,51 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -20 +; RV32I-ILP32E-NEXT: sw a5, 16(sp) +; RV32I-ILP32E-NEXT: sw a4, 12(sp) +; RV32I-ILP32E-NEXT: sw a3, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a0, a0, -8 +; RV32I-ILP32E-NEXT: addi a3, sp, 23 +; RV32I-ILP32E-NEXT: sw a3, 0(sp) +; RV32I-ILP32E-NEXT: lw a3, 0(a0) +; RV32I-ILP32E-NEXT: ori a0, a0, 4 +; RV32I-ILP32E-NEXT: lw a4, 0(a0) +; RV32I-ILP32E-NEXT: add a0, a1, a3 +; RV32I-ILP32E-NEXT: sltu a1, a0, a1 +; RV32I-ILP32E-NEXT: add a2, a2, a4 +; RV32I-ILP32E-NEXT: add a1, a2, a1 +; RV32I-ILP32E-NEXT: addi sp, sp, 20 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -28 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a3, s0, 19 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-NEXT: ori a0, a0, 4 +; ILP32E-WITHFP-NEXT: lw a4, 0(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a3 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a2, a2, a4 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 28 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -1036,6 +1348,53 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3_va_arg: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -20 +; RV32I-ILP32E-NEXT: sw a5, 16(sp) +; RV32I-ILP32E-NEXT: sw a4, 12(sp) +; RV32I-ILP32E-NEXT: sw a3, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 15 +; RV32I-ILP32E-NEXT: andi a0, a0, -8 +; RV32I-ILP32E-NEXT: ori a3, a0, 4 +; RV32I-ILP32E-NEXT: sw a3, 0(sp) +; RV32I-ILP32E-NEXT: lw a4, 0(a0) +; RV32I-ILP32E-NEXT: addi a0, a0, 8 +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lw a3, 0(a3) +; RV32I-ILP32E-NEXT: add a0, a1, a4 +; RV32I-ILP32E-NEXT: sltu a1, a0, a1 +; RV32I-ILP32E-NEXT: add a2, a2, a3 +; RV32I-ILP32E-NEXT: add a1, a2, a1 +; RV32I-ILP32E-NEXT: addi sp, sp, 20 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -28 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: ori a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a4, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a3) +; ILP32E-WITHFP-NEXT: add a0, a1, a4 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a2, a2, a3 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 28 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -1126,6 +1485,37 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va3_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi a0, zero, 2 +; RV32I-ILP32E-NEXT: addi a1, zero, 1111 +; RV32I-ILP32E-NEXT: lui a5, 262144 +; RV32I-ILP32E-NEXT: mv a2, zero +; RV32I-ILP32E-NEXT: mv a4, zero +; RV32I-ILP32E-NEXT: call va3@plt +; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: addi a0, zero, 2 +; ILP32E-WITHFP-NEXT: addi a1, zero, 1111 +; ILP32E-WITHFP-NEXT: lui a5, 262144 +; ILP32E-WITHFP-NEXT: mv a2, zero +; ILP32E-WITHFP-NEXT: mv a4, zero +; ILP32E-WITHFP-NEXT: call va3@plt +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -1287,6 +1677,87 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va4_va_copy: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -40 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: mv s0, a1 +; RV32I-ILP32E-NEXT: sw a5, 36(sp) +; RV32I-ILP32E-NEXT: sw a4, 32(sp) +; RV32I-ILP32E-NEXT: sw a3, 28(sp) +; RV32I-ILP32E-NEXT: sw a2, 24(sp) +; RV32I-ILP32E-NEXT: sw a1, 20(sp) +; RV32I-ILP32E-NEXT: addi a0, sp, 24 +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: call notdead@plt +; RV32I-ILP32E-NEXT: lw a0, 4(sp) +; RV32I-ILP32E-NEXT: addi a0, a0, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a1, a0, 4 +; RV32I-ILP32E-NEXT: sw a1, 4(sp) +; RV32I-ILP32E-NEXT: lw a1, 0(a0) +; RV32I-ILP32E-NEXT: addi a0, a0, 7 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a2, a0, 4 +; RV32I-ILP32E-NEXT: sw a2, 4(sp) +; RV32I-ILP32E-NEXT: lw a2, 0(a0) +; RV32I-ILP32E-NEXT: addi a0, a0, 7 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: addi a3, a0, 4 +; RV32I-ILP32E-NEXT: sw a3, 4(sp) +; RV32I-ILP32E-NEXT: lw a0, 0(a0) +; RV32I-ILP32E-NEXT: add a1, a1, s0 +; RV32I-ILP32E-NEXT: add a1, a1, a2 +; RV32I-ILP32E-NEXT: add a0, a1, a0 +; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 40 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va4_va_copy: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -44 +; ILP32E-WITHFP-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: sw a0, -20(s0) +; ILP32E-WITHFP-NEXT: call notdead@plt +; ILP32E-WITHFP-NEXT: lw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a1, a0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -16(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a2, a0, 4 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -16(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: add a1, a1, s1 +; ILP32E-WITHFP-NEXT: add a1, a1, a2 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 44 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va4_va_copy: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -1530,6 +2001,106 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va5_aligned_stack_caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -80 +; RV32I-ILP32E-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi s0, sp, 80 +; RV32I-ILP32E-NEXT: andi sp, sp, -16 +; RV32I-ILP32E-NEXT: addi a0, zero, 17 +; RV32I-ILP32E-NEXT: sw a0, 32(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 16 +; RV32I-ILP32E-NEXT: sw a0, 28(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 15 +; RV32I-ILP32E-NEXT: sw a0, 24(sp) +; RV32I-ILP32E-NEXT: lui a0, 262236 +; RV32I-ILP32E-NEXT: addi a0, a0, 655 +; RV32I-ILP32E-NEXT: sw a0, 20(sp) +; RV32I-ILP32E-NEXT: lui a0, 377487 +; RV32I-ILP32E-NEXT: addi a0, a0, 1475 +; RV32I-ILP32E-NEXT: sw a0, 16(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 14 +; RV32I-ILP32E-NEXT: sw a0, 8(sp) +; RV32I-ILP32E-NEXT: addi a0, zero, 4 +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: lui a0, 688509 +; RV32I-ILP32E-NEXT: addi a0, a0, -2048 +; RV32I-ILP32E-NEXT: sw a0, 0(sp) +; RV32I-ILP32E-NEXT: lui a0, 262153 +; RV32I-ILP32E-NEXT: addi a0, a0, 491 +; RV32I-ILP32E-NEXT: sw a0, 60(sp) +; RV32I-ILP32E-NEXT: lui a0, 545260 +; RV32I-ILP32E-NEXT: addi a0, a0, -1967 +; RV32I-ILP32E-NEXT: sw a0, 56(sp) +; RV32I-ILP32E-NEXT: lui a0, 964690 +; RV32I-ILP32E-NEXT: addi a0, a0, -328 +; RV32I-ILP32E-NEXT: sw a0, 52(sp) +; RV32I-ILP32E-NEXT: lui a0, 335544 +; RV32I-ILP32E-NEXT: addi a5, a0, 1311 +; RV32I-ILP32E-NEXT: addi a0, zero, 1 +; RV32I-ILP32E-NEXT: addi a1, zero, 11 +; RV32I-ILP32E-NEXT: addi a2, sp, 48 +; RV32I-ILP32E-NEXT: addi a3, zero, 12 +; RV32I-ILP32E-NEXT: addi a4, zero, 13 +; RV32I-ILP32E-NEXT: sw a5, 48(sp) +; RV32I-ILP32E-NEXT: call va5_aligned_stack_callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -80 +; RV32I-ILP32E-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 80 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va5_aligned_stack_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -80 +; ILP32E-WITHFP-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 80 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, zero, 17 +; ILP32E-WITHFP-NEXT: sw a0, 32(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 16 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 15 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 14 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, zero, 4 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a0, a0, -2048 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 60(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 56(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 52(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a5, a0, 1311 +; ILP32E-WITHFP-NEXT: addi a0, zero, 1 +; ILP32E-WITHFP-NEXT: addi a1, zero, 11 +; ILP32E-WITHFP-NEXT: addi a2, sp, 48 +; ILP32E-WITHFP-NEXT: addi a3, zero, 12 +; ILP32E-WITHFP-NEXT: addi a4, zero, 13 +; ILP32E-WITHFP-NEXT: sw a5, 48(sp) +; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -80 +; ILP32E-WITHFP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 80 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va5_aligned_stack_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48 @@ -1694,6 +2265,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va6_no_fixed_args: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: addi a1, sp, 8 +; RV32I-ILP32E-NEXT: sw a1, 0(sp) +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va6_no_fixed_args: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a0, 0(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -1865,6 +2469,75 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add sp, sp, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; RV32I-ILP32E-LABEL: va_large_stack: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: lui a0, 24414 +; RV32I-ILP32E-NEXT: addi a0, a0, 288 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 100000032 +; RV32I-ILP32E-NEXT: mv a0, a1 +; RV32I-ILP32E-NEXT: lui a6, 24414 +; RV32I-ILP32E-NEXT: addi a6, a6, 284 +; RV32I-ILP32E-NEXT: add a6, sp, a6 +; RV32I-ILP32E-NEXT: sw a5, 0(a6) +; RV32I-ILP32E-NEXT: lui a5, 24414 +; RV32I-ILP32E-NEXT: addi a5, a5, 280 +; RV32I-ILP32E-NEXT: add a5, sp, a5 +; RV32I-ILP32E-NEXT: sw a4, 0(a5) +; RV32I-ILP32E-NEXT: lui a4, 24414 +; RV32I-ILP32E-NEXT: addi a4, a4, 276 +; RV32I-ILP32E-NEXT: add a4, sp, a4 +; RV32I-ILP32E-NEXT: sw a3, 0(a4) +; RV32I-ILP32E-NEXT: lui a3, 24414 +; RV32I-ILP32E-NEXT: addi a3, a3, 272 +; RV32I-ILP32E-NEXT: add a3, sp, a3 +; RV32I-ILP32E-NEXT: sw a2, 0(a3) +; RV32I-ILP32E-NEXT: lui a2, 24414 +; RV32I-ILP32E-NEXT: addi a2, a2, 268 +; RV32I-ILP32E-NEXT: add a2, sp, a2 +; RV32I-ILP32E-NEXT: sw a1, 0(a2) +; RV32I-ILP32E-NEXT: lui a1, 24414 +; RV32I-ILP32E-NEXT: addi a1, a1, 272 +; RV32I-ILP32E-NEXT: add a1, sp, a1 +; RV32I-ILP32E-NEXT: mv a1, a1 +; RV32I-ILP32E-NEXT: sw a1, 4(sp) +; RV32I-ILP32E-NEXT: lui a1, 24414 +; RV32I-ILP32E-NEXT: addi a1, a1, 288 +; RV32I-ILP32E-NEXT: add sp, sp, a1 +; RV32I-ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va_large_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -2044 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 2044 +; ILP32E-WITHFP-NEXT: sw ra, 2016(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 2012(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 2020 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: lui a0, 24414 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1748 +; ILP32E-WITHFP-NEXT: sub sp, sp, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: lui a2, 1024162 +; ILP32E-WITHFP-NEXT: addi a2, a2, -272 +; ILP32E-WITHFP-NEXT: add a2, s0, a2 +; ILP32E-WITHFP-NEXT: sw a1, 0(a2) +; ILP32E-WITHFP-NEXT: lui a1, 24414 +; ILP32E-WITHFP-NEXT: addi a1, a1, -1748 +; ILP32E-WITHFP-NEXT: add sp, sp, a1 +; ILP32E-WITHFP-NEXT: lw s0, 2012(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw ra, 2016(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 2044 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va_large_stack: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414