diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1060,6 +1060,8 @@ ^^^^^^^^^^^^^^ - Unaligned memory accesses can be toggled by ``-m[no-]unaligned-access`` or the aliases ``-m[no-]strict-align``. +- CodeGen of RV32E/RV64E was supported experimentally. +- CodeGen of ilp32e/lp64e was supported experimentally. - Default ABI with F but without D was changed to ilp32f for RV32 and to lp64f for RV64. diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -132,6 +132,12 @@ } bool setABI(const std::string &Name) override { + if (Name == "ilp32e") { + ABI = Name; + resetDataLayout("e-m:e-p:32:32-i64:64-n32-S32"); + return true; + } + if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") { ABI = Name; return true; @@ -156,6 +162,12 @@ } bool setABI(const std::string &Name) override { + if (Name == "lp64e") { + ABI = Name; + resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n32:64-S64"); + return true; + } + if (Name == "lp64" || Name == "lp64f" || Name == "lp64d") { ABI = Name; return true; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -154,7 +154,7 @@ else Builder.defineMacro("__riscv_float_abi_soft"); - if (ABIName == "ilp32e") + if (ABIName == "ilp32e" || ABIName == "lp64e") Builder.defineMacro("__riscv_abi_rve"); Builder.defineMacro("__riscv_arch_test"); @@ -214,6 +214,13 @@ Builder.defineMacro("__riscv_misaligned_fast"); else Builder.defineMacro("__riscv_misaligned_avoid"); + + if (ISAInfo->hasExtension("e")) { + if (Is64Bit) + Builder.defineMacro("__riscv_64e"); + else + Builder.defineMacro("__riscv_32e"); + } } static constexpr Builtin::Info BuiltinInfo[] = { @@ -378,6 +385,11 @@ if (llvm::is_contained(Features, "+experimental")) HasExperimental = true; + if (ABI == "ilp32e" && ISAInfo->hasExtension("d")) { + Diags.Report(diag::err_invalid_feature_combination) + << "ILP32E cannot be used with the D ISA extension"; + return false; + } return true; } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -229,7 +229,8 @@ ABIFLen = 32; else if (ABIStr.ends_with("d")) ABIFLen = 64; - return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen); + bool EABI = ABIStr.ends_with("e"); + return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen, EABI); } case llvm::Triple::systemz: { diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -496,7 +496,8 @@ bool SoftFloatABI); std::unique_ptr -createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, unsigned FLen); +createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, unsigned FLen, + bool EABI); std::unique_ptr createCommonSPIRTargetCodeGenInfo(CodeGenModule &CGM); diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -25,8 +25,9 @@ // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target // with soft float ABI has FLen==0). unsigned FLen; - static const int NumArgGPRs = 8; - static const int NumArgFPRs = 8; + const int NumArgGPRs; + const int NumArgFPRs; + const bool EABI; bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, @@ -34,8 +35,10 @@ CharUnits &Field2Off) const; public: - RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) - : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen, + bool EABI) + : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen), NumArgGPRs(EABI ? 6 : 8), + NumArgFPRs(FLen != 0 ? 8 : 0), EABI(EABI) {} // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo is virtual, so we overload it. @@ -86,7 +89,7 @@ } int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; - int ArgFPRsLeft = FLen ? NumArgFPRs : 0; + int ArgFPRsLeft = NumArgFPRs; int NumFixedArgs = FI.getNumRequiredArgs(); int ArgNum = 0; @@ -396,9 +399,12 @@ // Determine the number of GPRs needed to pass the current argument // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" // register pairs, so may consume 3 registers. + // TODO: To be compatible with GCC's behaviors, we don't align registers + // currently if we are using ILP32E calling convention. This behavior may be + // changed when RV32E/ILP32E is ratified. int NeededArgGPRs = 1; if (!IsFixed && NeededAlign == 2 * XLen) - NeededArgGPRs = 2 + (ArgGPRsLeft % 2); + NeededArgGPRs = 2 + (EABI && XLen == 32 ? 0 : (ArgGPRsLeft % 2)); else if (Size > XLen && Size <= 2 * XLen) NeededArgGPRs = 2; @@ -480,6 +486,13 @@ auto TInfo = getContext().getTypeInfoInChars(Ty); + // TODO: To be compatible with GCC's behaviors, we force arguments with + // 2×XLEN-bit alignment and size at most 2×XLEN bits like `long long`, + // `unsigned long long` and `double` to have 4-byte alignment. This + // behavior may be changed when RV32E/ILP32E is ratified. + if (EABI && XLen == 32) + TInfo.Align = std::min(TInfo.Align, CharUnits::fromQuantity(4)); + // Arguments bigger than 2*Xlen bytes are passed indirectly. bool IsIndirect = TInfo.Width > 2 * SlotSize; @@ -499,8 +512,9 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, - unsigned FLen) - : TargetCodeGenInfo(std::make_unique(CGT, XLen, FLen)) {} + unsigned FLen, bool EABI) + : TargetCodeGenInfo( + std::make_unique(CGT, XLen, FLen, EABI)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -526,6 +540,7 @@ std::unique_ptr CodeGen::createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, - unsigned FLen) { - return std::make_unique(CGM.getTypes(), XLen, FLen); + unsigned FLen, bool EABI) { + return std::make_unique(CGM.getTypes(), XLen, FLen, + EABI); } diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -210,6 +210,7 @@ // rv32e -> ilp32e // rv32* -> ilp32 // rv64g | rv64*d -> lp64d + // rv64e -> lp64e // rv64* -> lp64 StringRef Arch = getRISCVArch(Args, Triple); @@ -285,6 +286,7 @@ // 3. Choose a default based on `-mabi=` // // ilp32e -> rv32e + // lp64e -> rv64e // ilp32 | ilp32f | ilp32d -> rv32imafdc // lp64 | lp64f | lp64d -> rv64imafdc if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { @@ -292,6 +294,8 @@ if (MABI.equals_insensitive("ilp32e")) return "rv32e"; + else if (MABI.equals_insensitive("lp64e")) + return "rv64e"; else if (MABI.starts_with_insensitive("ilp32")) return "rv32imafdc"; else if (MABI.starts_with_insensitive("lp64")) { diff --git a/clang/test/CodeGen/RISCV/riscv32-abi.c b/clang/test/CodeGen/RISCV/riscv32-abi.c --- a/clang/test/CodeGen/RISCV/riscv32-abi.c +++ b/clang/test/CodeGen/RISCV/riscv32-abi.c @@ -5,6 +5,8 @@ // RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32F-ILP32D,ILP32-ILP32F,ILP32F %s // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d -target-abi ilp32d -emit-llvm %s -o - \ // RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32F-ILP32D,ILP32D %s +// RUN: %clang_cc1 -triple riscv32 -emit-llvm -target-abi ilp32e %s -o - \ +// RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32-ILP32F,ILP32,ILP32E %s #include #include @@ -2064,4 +2066,5 @@ } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// ILP32E: {{.*}} // ILP32F: {{.*}} diff --git a/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c b/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c @@ -0,0 +1,4 @@ +// RUN: not %clang_cc1 -triple riscv32 -target-feature +d -emit-llvm -target-abi ilp32e %s 2>&1 \ +// RUN: | FileCheck -check-prefix=ILP32E-WITH-FD %s + +// ILP32E-WITH-FD: error: invalid feature combination: ILP32E cannot be used with the D ISA extension diff --git a/clang/test/CodeGen/RISCV/riscv32-vararg.c b/clang/test/CodeGen/RISCV/riscv32-vararg.c --- a/clang/test/CodeGen/RISCV/riscv32-vararg.c +++ b/clang/test/CodeGen/RISCV/riscv32-vararg.c @@ -1,9 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-abi ilp32f -emit-llvm %s -o - \ -// RUN: | FileCheck %s +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32F // RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-feature +f -target-abi ilp32d -emit-llvm %s -o - \ -// RUN: | FileCheck %s +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32D +// RUN: %clang_cc1 -triple riscv32 -target-abi ilp32e -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32E #include #include @@ -102,24 +104,60 @@ // used to pass varargs with 2x xlen alignment and 2x xlen size. Ensure the // correct offsets are used. -// CHECK-LABEL: define dso_local double @f_va_2 -// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[V:%.*]] = alloca double, align 8 -// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) -// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 -// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) -// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 -// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 -// CHECK-NEXT: store double [[TMP1]], ptr [[V]], align 8 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) -// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8 -// CHECK-NEXT: ret double [[TMP2]] +// CHECK-ILP32F-LABEL: define dso_local double @f_va_2 +// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32F-NEXT: entry: +// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-ILP32F-NEXT: store double [[TMP1]], ptr [[V]], align 8 +// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32F-NEXT: ret double [[TMP2]] +// +// CHECK-ILP32D-LABEL: define dso_local double @f_va_2 +// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32D-NEXT: entry: +// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-ILP32D-NEXT: store double [[TMP1]], ptr [[V]], align 8 +// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32D-NEXT: ret double [[TMP2]] +// +// CHECK-ILP32E-LABEL: define dso_local double @f_va_2 +// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32E-NEXT: entry: +// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32E-NEXT: store double [[TMP0]], ptr [[V]], align 8 +// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32E-NEXT: ret double [[TMP1]] // double f_va_2(char *fmt, ...) { __builtin_va_list va; @@ -133,40 +171,106 @@ // Two "aligned" register pairs. -// CHECK-LABEL: define dso_local double @f_va_3 -// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[V:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[W:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[X:%.*]] = alloca double, align 8 -// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) -// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 -// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) -// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 -// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 -// CHECK-NEXT: store double [[TMP1]], ptr [[V]], align 8 -// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[W]], align 4 -// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7 -// CHECK-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8) -// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8 -// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8 -// CHECK-NEXT: store double [[TMP4]], ptr [[X]], align 8 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) -// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8 -// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]] -// CHECK-NEXT: ret double [[ADD]] +// CHECK-ILP32F-LABEL: define dso_local double @f_va_3 +// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32F-NEXT: entry: +// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32F-NEXT: [[W:%.*]] = alloca i32, align 4 +// CHECK-ILP32F-NEXT: [[X:%.*]] = alloca double, align 8 +// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-ILP32F-NEXT: store double [[TMP1]], ptr [[V]], align 8 +// CHECK-ILP32F-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32F-NEXT: store i32 [[TMP2]], ptr [[W]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7 +// CHECK-ILP32F-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8) +// CHECK-ILP32F-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8 +// CHECK-ILP32F-NEXT: store double [[TMP4]], ptr [[X]], align 8 +// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32F-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8 +// CHECK-ILP32F-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]] +// CHECK-ILP32F-NEXT: ret double [[ADD]] +// +// CHECK-ILP32D-LABEL: define dso_local double @f_va_3 +// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32D-NEXT: entry: +// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32D-NEXT: [[W:%.*]] = alloca i32, align 4 +// CHECK-ILP32D-NEXT: [[X:%.*]] = alloca double, align 8 +// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-ILP32D-NEXT: store double [[TMP1]], ptr [[V]], align 8 +// CHECK-ILP32D-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32D-NEXT: store i32 [[TMP2]], ptr [[W]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7 +// CHECK-ILP32D-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8) +// CHECK-ILP32D-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8 +// CHECK-ILP32D-NEXT: store double [[TMP4]], ptr [[X]], align 8 +// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32D-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8 +// CHECK-ILP32D-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]] +// CHECK-ILP32D-NEXT: ret double [[ADD]] +// +// CHECK-ILP32E-LABEL: define dso_local double @f_va_3 +// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32E-NEXT: entry: +// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32E-NEXT: [[W:%.*]] = alloca i32, align 4 +// CHECK-ILP32E-NEXT: [[X:%.*]] = alloca double, align 8 +// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32E-NEXT: store double [[TMP0]], ptr [[V]], align 8 +// CHECK-ILP32E-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32E-NEXT: store i32 [[TMP1]], ptr [[W]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 8 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP2:%.*]] = load double, ptr [[ARGP_CUR3]], align 4 +// CHECK-ILP32E-NEXT: store double [[TMP2]], ptr [[X]], align 8 +// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load double, ptr [[X]], align 8 +// CHECK-ILP32E-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], [[TMP4]] +// CHECK-ILP32E-NEXT: ret double [[ADD]] // double f_va_3(char *fmt, ...) { __builtin_va_list va; @@ -180,93 +284,269 @@ return v + x; } -// CHECK-LABEL: define dso_local i32 @f_va_4 -// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[LD:%.*]] = alloca fp128, align 16 -// CHECK-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 -// CHECK-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 -// CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 -// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) -// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 -// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16 -// CHECK-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16 -// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false) -// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8 -// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false) -// CHECK-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 -// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 -// CHECK-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 -// CHECK-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]] -// CHECK-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32 -// CHECK-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4 -// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 -// CHECK-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32 -// CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]] -// CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1 -// CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1 -// CHECK-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32 -// CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] -// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2 -// CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1 -// CHECK-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32 -// CHECK-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]] -// CHECK-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3 -// CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1 -// CHECK-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32 -// CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]] -// CHECK-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4 -// CHECK-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4 -// CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1 -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4 -// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 -// CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]] -// CHECK-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4 -// CHECK-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4 -// CHECK-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4 -// CHECK-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]] -// CHECK-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4 -// CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]] -// CHECK-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3 -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4 -// CHECK-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]] -// CHECK-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4 -// CHECK-NEXT: ret i32 [[TMP20]] +// CHECK-ILP32F-LABEL: define dso_local i32 @f_va_4 +// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32F-NEXT: entry: +// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-ILP32F-NEXT: [[LD:%.*]] = alloca fp128, align 16 +// CHECK-ILP32F-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 +// CHECK-ILP32F-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 +// CHECK-ILP32F-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 +// CHECK-ILP32F-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32F-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16 +// CHECK-ILP32F-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16 +// CHECK-ILP32F-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false) +// CHECK-ILP32F-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false) +// CHECK-ILP32F-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) +// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 +// CHECK-ILP32F-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 +// CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 +// CHECK-ILP32F-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]] +// CHECK-ILP32F-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32 +// CHECK-ILP32F-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 +// CHECK-ILP32F-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 +// CHECK-ILP32F-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32 +// CHECK-ILP32F-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]] +// CHECK-ILP32F-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1 +// CHECK-ILP32F-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1 +// CHECK-ILP32F-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32 +// CHECK-ILP32F-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] +// CHECK-ILP32F-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2 +// CHECK-ILP32F-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1 +// CHECK-ILP32F-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32 +// CHECK-ILP32F-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]] +// CHECK-ILP32F-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3 +// CHECK-ILP32F-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1 +// CHECK-ILP32F-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32 +// CHECK-ILP32F-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]] +// CHECK-ILP32F-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0 +// CHECK-ILP32F-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4 +// CHECK-ILP32F-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-ILP32F-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1 +// CHECK-ILP32F-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4 +// CHECK-ILP32F-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 +// CHECK-ILP32F-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]] +// CHECK-ILP32F-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0 +// CHECK-ILP32F-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4 +// CHECK-ILP32F-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-ILP32F-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1 +// CHECK-ILP32F-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4 +// CHECK-ILP32F-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]] +// CHECK-ILP32F-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2 +// CHECK-ILP32F-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4 +// CHECK-ILP32F-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]] +// CHECK-ILP32F-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3 +// CHECK-ILP32F-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4 +// CHECK-ILP32F-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]] +// CHECK-ILP32F-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: ret i32 [[TMP20]] +// +// CHECK-ILP32D-LABEL: define dso_local i32 @f_va_4 +// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32D-NEXT: entry: +// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-ILP32D-NEXT: [[LD:%.*]] = alloca fp128, align 16 +// CHECK-ILP32D-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 +// CHECK-ILP32D-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 +// CHECK-ILP32D-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 +// CHECK-ILP32D-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32D-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16 +// CHECK-ILP32D-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16 +// CHECK-ILP32D-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false) +// CHECK-ILP32D-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false) +// CHECK-ILP32D-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) +// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 +// CHECK-ILP32D-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 +// CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 +// CHECK-ILP32D-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]] +// CHECK-ILP32D-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32 +// CHECK-ILP32D-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 +// CHECK-ILP32D-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 +// CHECK-ILP32D-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32 +// CHECK-ILP32D-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]] +// CHECK-ILP32D-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1 +// CHECK-ILP32D-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1 +// CHECK-ILP32D-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32 +// CHECK-ILP32D-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] +// CHECK-ILP32D-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2 +// CHECK-ILP32D-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1 +// CHECK-ILP32D-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32 +// CHECK-ILP32D-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]] +// CHECK-ILP32D-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3 +// CHECK-ILP32D-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1 +// CHECK-ILP32D-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32 +// CHECK-ILP32D-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]] +// CHECK-ILP32D-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0 +// CHECK-ILP32D-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4 +// CHECK-ILP32D-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-ILP32D-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1 +// CHECK-ILP32D-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4 +// CHECK-ILP32D-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 +// CHECK-ILP32D-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]] +// CHECK-ILP32D-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0 +// CHECK-ILP32D-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4 +// CHECK-ILP32D-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-ILP32D-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1 +// CHECK-ILP32D-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4 +// CHECK-ILP32D-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]] +// CHECK-ILP32D-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2 +// CHECK-ILP32D-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4 +// CHECK-ILP32D-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]] +// CHECK-ILP32D-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3 +// CHECK-ILP32D-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4 +// CHECK-ILP32D-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]] +// CHECK-ILP32D-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: ret i32 [[TMP20]] +// +// CHECK-ILP32E-LABEL: define dso_local i32 @f_va_4 +// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32E-NEXT: entry: +// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-ILP32E-NEXT: [[LD:%.*]] = alloca fp128, align 16 +// CHECK-ILP32E-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 +// CHECK-ILP32E-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 +// CHECK-ILP32E-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 +// CHECK-ILP32E-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32E-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32E-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 4 +// CHECK-ILP32E-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16 +// CHECK-ILP32E-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false) +// CHECK-ILP32E-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false) +// CHECK-ILP32E-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) +// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 +// CHECK-ILP32E-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 +// CHECK-ILP32E-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 +// CHECK-ILP32E-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]] +// CHECK-ILP32E-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32 +// CHECK-ILP32E-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 +// CHECK-ILP32E-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 +// CHECK-ILP32E-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32 +// CHECK-ILP32E-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]] +// CHECK-ILP32E-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1 +// CHECK-ILP32E-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1 +// CHECK-ILP32E-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32 +// CHECK-ILP32E-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] +// CHECK-ILP32E-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2 +// CHECK-ILP32E-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1 +// CHECK-ILP32E-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32 +// CHECK-ILP32E-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]] +// CHECK-ILP32E-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3 +// CHECK-ILP32E-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1 +// CHECK-ILP32E-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32 +// CHECK-ILP32E-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]] +// CHECK-ILP32E-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0 +// CHECK-ILP32E-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4 +// CHECK-ILP32E-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-ILP32E-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1 +// CHECK-ILP32E-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4 +// CHECK-ILP32E-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 +// CHECK-ILP32E-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]] +// CHECK-ILP32E-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0 +// CHECK-ILP32E-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4 +// CHECK-ILP32E-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-ILP32E-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1 +// CHECK-ILP32E-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4 +// CHECK-ILP32E-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]] +// CHECK-ILP32E-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2 +// CHECK-ILP32E-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4 +// CHECK-ILP32E-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]] +// CHECK-ILP32E-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3 +// CHECK-ILP32E-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4 +// CHECK-ILP32E-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]] +// CHECK-ILP32E-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: ret i32 [[TMP20]] // int f_va_4(char *fmt, ...) { __builtin_va_list va; diff --git a/clang/test/CodeGen/RISCV/riscv64-abi.c b/clang/test/CodeGen/RISCV/riscv64-abi.c --- a/clang/test/CodeGen/RISCV/riscv64-abi.c +++ b/clang/test/CodeGen/RISCV/riscv64-abi.c @@ -5,6 +5,8 @@ // RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64F-LP64D,LP64-LP64F,LP64F %s // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - \ // RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64F-LP64D,LP64D %s +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -target-abi lp64e %s -o - \ +// RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64-LP64F,LP64,LP64E %s #include #include @@ -2046,3 +2048,5 @@ return (union float16_u){1.0}; } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// LP64E: {{.*}} diff --git a/clang/test/CodeGen/RISCV/riscv64-vararg.c b/clang/test/CodeGen/RISCV/riscv64-vararg.c --- a/clang/test/CodeGen/RISCV/riscv64-vararg.c +++ b/clang/test/CodeGen/RISCV/riscv64-vararg.c @@ -4,6 +4,8 @@ // RUN: | FileCheck %s // RUN: %clang_cc1 -triple riscv64 -target-feature +d -target-feature +f -target-abi lp64d -emit-llvm %s -o - \ // RUN: | FileCheck %s +// RUN: %clang_cc1 -triple riscv64 -target-abi lp64e -emit-llvm %s -o - \ +// RUN: | FileCheck %s #include #include diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -3,6 +3,8 @@ // RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -x c -E -dM %s \ // RUN: -o - | FileCheck %s +// CHECK-NOT: __riscv_32e {{.*$}} +// CHECK-NOT: __riscv_64e {{.*$}} // CHECK-NOT: __riscv_a {{.*$}} // CHECK-NOT: __riscv_atomic // CHECK-NOT: __riscv_c {{.*$}} @@ -170,6 +172,17 @@ // CHECK-D-EXT: __riscv_flen 64 // CHECK-D-EXT: __riscv_fsqrt 1 +// RUN: %clang --target=riscv32-unknown-linux-gnu \ +// RUN: -march=rv32e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefixes=CHECK-E-EXT,CHECK-RV32E %s +// RUN: %clang --target=riscv64-unknown-linux-gnu \ +// RUN: -march=rv64e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefixes=CHECK-E-EXT,CHECK-RV64E %s +// CHECK-RV32E: __riscv_32e 1 +// CHECK-RV64E: __riscv_64e 1 +// CHECK-E-EXT: __riscv_abi_rve 1 +// CHECK-E-EXT: __riscv_e 2000000{{$}} + // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32if -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-F-EXT %s @@ -211,6 +224,15 @@ // CHECK-DOUBLE-NOT: __riscv_float_abi_soft // CHECK-DOUBLE-NOT: __riscv_float_abi_single +// RUN: %clang --target=riscv32-unknown-linux-gnu \ +// RUN: -march=rv32i -mabi=ilp32e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ILP32E %s +// RUN: %clang --target=riscv64-unknown-linux-gnu \ +// RUN: -march=rv64i -mabi=lp64e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-LP64E %s +// CHECK-ILP32E: __riscv_abi_rve 1 +// CHECK-LP64E: __riscv_abi_rve 1 + // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32ih -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-H-EXT %s diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -88,6 +88,7 @@ ``C`` Supported ``D`` Supported ``F`` Supported + ``E`` Supported (`See note <#riscv-rve-note>`__) ``H`` Assembly Support ``M`` Supported ``Smaia`` Supported @@ -179,6 +180,11 @@ Supported Fully supported by the compiler. This includes everything in Assembly Support, along with - if relevant - C language intrinsics for the instructions and pattern matching by the compiler to recognize idiomatic patterns which can be lowered to the associated instructions. +.. _riscv-rve-note: + +``E`` + Support of RV32E/RV64E and ilp32e/lp64e ABIs are experimental. To be compatible with the implementation of ilp32e in GCC, we don't use aligned registers to pass variadic arguments. Furthermore, we set the stack alignment to 4 bytes for types with length of 2*XLEN. + .. _riscv-scalar-crypto-note1: ``Zbkb``, ``Zbkx`` diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -155,6 +155,8 @@ needs to work with SiFive to define and document real extension names for individual CSRs and instructions. * ``-mcpu=sifive-p450`` was added. +* CodeGen of RV32E/RV64E was supported experimentally. +* CodeGen of ilp32e/lp64e was supported experimentally. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/include/llvm/Support/RISCVAttributes.h b/llvm/include/llvm/Support/RISCVAttributes.h --- a/llvm/include/llvm/Support/RISCVAttributes.h +++ b/llvm/include/llvm/Support/RISCVAttributes.h @@ -34,7 +34,7 @@ PRIV_SPEC_REVISION = 12, }; -enum StackAlign { ALIGN_4 = 4, ALIGN_16 = 16 }; +enum StackAlign { ALIGN_4 = 4, ALIGN_8 = 8, ALIGN_16 = 16 }; enum { NOT_ALLOWED = 0, ALLOWED = 1 }; diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -1278,20 +1278,20 @@ StringRef RISCVISAInfo::computeDefaultABI() const { if (XLen == 32) { + if (hasExtension("e")) + return "ilp32e"; if (hasExtension("d")) return "ilp32d"; if (hasExtension("f")) return "ilp32f"; - if (hasExtension("e")) - return "ilp32e"; return "ilp32"; } else if (XLen == 64) { + if (hasExtension("e")) + return "lp64e"; if (hasExtension("d")) return "lp64d"; if (hasExtension("f")) return "lp64f"; - if (hasExtension("e")) - return "lp64e"; return "lp64"; } llvm_unreachable("Invalid XLEN"); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -431,7 +431,7 @@ MachineFunction &MF = MIRBuilder.getMF(); const RISCVSubtarget &Subtarget = MF.getSubtarget(); unsigned XLenInBytes = Subtarget.getXLen() / 8; - ArrayRef ArgRegs = RISCV::getArgGPRs(); + ArrayRef ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI()); MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -68,6 +68,11 @@ TargetABI = ABI_Unknown; } + if ((TargetABI == RISCVABI::ABI::ABI_ILP32E || + (TargetABI == ABI_Unknown && IsRVE && !IsRV64)) && + FeatureBits[RISCV::FeatureStdExtD]) + report_fatal_error("ILP32E cannot be used with the D ISA extension"); + if (TargetABI != ABI_Unknown) return TargetABI; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -50,11 +50,14 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign) { - if (STI.hasFeature(RISCV::FeatureRVE)) - report_fatal_error("Codegen not yet implemented for RVE"); - - if (EmitStackAlign) - emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); + if (EmitStackAlign) { + if (TargetABI == RISCVABI::ABI_ILP32E) + emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_4); + else if (TargetABI == RISCVABI::ABI_LP64E) + emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_8); + else + emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); + } auto ParseResult = RISCVFeatures::parseFeatureBits( STI.hasFeature(RISCV::Feature64Bit), STI.getFeatureBits()); diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -13,8 +13,10 @@ // The RISC-V calling convention is handled with custom code in // RISCVISelLowering.cpp (CC_RISCV). +def CSR_ILP32E_LP64E : CalleeSavedRegs<(add X1, X8, X9)>; + def CSR_ILP32_LP64 - : CalleeSavedRegs<(add X1, X8, X9, (sequence "X%u", 18, 27))>; + : CalleeSavedRegs<(add CSR_ILP32E_LP64E, (sequence "X%u", 18, 27))>; def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, @@ -38,3 +40,15 @@ // Same as CSR_Interrupt, but including all 64-bit FP registers. def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, (sequence "F%u_D", 0, 31))>; + +// Same as CSR_Interrupt, but excluding X16-X31. +def CSR_Interrupt_RVE : CalleeSavedRegs<(sub CSR_Interrupt, + (sequence "X%u", 16, 31))>; + +// Same as CSR_XLEN_F32_Interrupt, but excluding X16-X31. +def CSR_XLEN_F32_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_F32_Interrupt, + (sequence "X%u", 16, 31))>; + +// Same as CSR_XLEN_F64_Interrupt, but excluding X16-X31. +def CSR_XLEN_F64_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_F64_Interrupt, + (sequence "X%u", 16, 31))>; diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -17,6 +17,13 @@ AssemblerPredicate<(all_of FeatureStdExtZicsr), "'Zicsr' (CSRs)">; +def FeatureStdExtI + : SubtargetFeature<"i", "HasStdExtI", "true", + "'I' (Base Integer Instruction Set)">; +def HasStdExtI : Predicate<"Subtarget->hasStdExtI()">, + AssemblerPredicate<(all_of FeatureStdExtI), + "'I' (Base Integer Instruction Set)">; + def FeatureStdExtM : SubtargetFeature<"m", "HasStdExtM", "true", "'M' (Integer Multiplication and Division)">; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -21,12 +21,7 @@ class RISCVFrameLowering : public TargetFrameLowering { public: - explicit RISCVFrameLowering(const RISCVSubtarget &STI) - : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/Align(16), - /*LocalAreaOffset=*/0, - /*TransientStackAlignment=*/Align(16)), - STI(STI) {} + explicit RISCVFrameLowering(const RISCVSubtarget &STI); void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -27,6 +27,21 @@ using namespace llvm; +static Align getABIStackAlignment(RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return Align(4); + if (ABI == RISCVABI::ABI_LP64E) + return Align(8); + return Align(16); +} + +RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, + getABIStackAlignment(STI.getTargetABI()), + /*LocalAreaOffset=*/0, + /*TransientStackAlignment=*/Align(16)), + STI(STI) {} + static const Register AllPopRegs[] = { RISCV::X1, RISCV::X8, RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23, RISCV::X24, @@ -497,9 +512,11 @@ // The following calculates the correct offset knowing the number of callee // saved registers spilt by the two methods. if (int LibCallRegs = getLibCallID(MF, MFI.getCalleeSavedInfo()) + 1) { - // Calculate the size of the frame managed by the libcall. The libcalls are - // implemented such that the stack will always be 16 byte aligned. - unsigned LibCallFrameSize = alignTo((STI.getXLen() / 8) * LibCallRegs, 16); + // Calculate the size of the frame managed by the libcall. The stack + // alignment of these libcalls should be the same as how we set it in + // getABIStackAlignment. + unsigned LibCallFrameSize = + alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign()); RVFI->setLibCallStackSize(LibCallFrameSize); } @@ -974,6 +991,7 @@ // unconditionally save all Caller-saved registers and // all FP registers, regardless whether they are used. MachineFrameInfo &MFI = MF.getFrameInfo(); + auto &Subtarget = MF.getSubtarget(); if (MF.getFunction().hasFnAttribute("interrupt") && MFI.hasCalls()) { @@ -985,9 +1003,20 @@ }; for (auto Reg : CSRegs) - SavedRegs.set(Reg); + // Only save x0-x15 for RVE. + if (Reg < RISCV::X16 || !Subtarget.isRVE()) + SavedRegs.set(Reg); + + // According to psABI, if ilp32e/lp64e ABIs are used with an ISA that + // has any of the registers x16-x31 and f0-f31, then these registers are + // considered temporaries, so we should also save x16-x31 here. + if (STI.getTargetABI() == RISCVABI::ABI_ILP32E || + STI.getTargetABI() == RISCVABI::ABI_LP64E) { + for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++) + SavedRegs.set(Reg); + } - if (MF.getSubtarget().hasStdExtF()) { + if (Subtarget.hasStdExtF()) { // If interrupt is enabled, this list contains all FP registers. const MCPhysReg * Regs = MF.getRegInfo().getCalleeSavedRegs(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -995,7 +995,7 @@ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); -ArrayRef getArgGPRs(); +ArrayRef getArgGPRs(const RISCVABI::ABI ABI); } // end namespace RISCV diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -83,9 +83,6 @@ const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - if (Subtarget.isRVE()) - report_fatal_error("Codegen not yet implemented for RVE"); - RISCVABI::ABI ABI = Subtarget.getTargetABI(); assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); @@ -107,6 +104,8 @@ default: report_fatal_error("Don't know how to lower this ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64: @@ -17061,12 +17060,39 @@ RISCV::V20M4}; static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; -ArrayRef RISCV::getArgGPRs() { - static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, - RISCV::X13, RISCV::X14, RISCV::X15, - RISCV::X16, RISCV::X17}; +ArrayRef RISCV::getArgGPRs(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except + // the ILP32E ABI. + static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X16, RISCV::X17}; + // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. + static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(ArgEGPRs); + + return ArrayRef(ArgIGPRs); +} + +static ArrayRef getFastCCArgGPRs(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used + // for save-restore libcall, so we don't use them. + static const MCPhysReg FastCCIGPRs[] = { + RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, + RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, + RISCV::X29, RISCV::X30, RISCV::X31}; + + // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. + static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X7}; - return ArrayRef(ArgGPRs); + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(FastCCEGPRs); + + return ArrayRef(FastCCIGPRs); } // Pass a 2*XLEN argument that has been split into two XLEN values through @@ -17074,17 +17100,23 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, - ISD::ArgFlagsTy ArgFlags2) { + ISD::ArgFlagsTy ArgFlags2, bool EABI) { unsigned XLenInBytes = XLen / 8; - ArrayRef ArgGPRs = RISCV::getArgGPRs(); + const RISCVSubtarget &STI = + State.getMachineFunction().getSubtarget(); + ArrayRef ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI()); + if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, VA1.getLocVT(), CCValAssign::Full)); } else { // Both halves must be passed on the stack, with proper alignment. - Align StackAlign = - std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); + // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte + // alignment. This behavior may be changed when RV32E/ILP32E is ratified. + Align StackAlign(XLenInBytes); + if (!EABI || XLen != 32) + StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign()); State.addLoc( CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), State.AllocateStack(XLenInBytes, StackAlign), @@ -17165,7 +17197,9 @@ default: llvm_unreachable("Unexpected ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_LP64: + case RISCVABI::ABI_LP64E: break; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: @@ -17197,7 +17231,7 @@ LocInfo = CCValAssign::BCvt; } - ArrayRef ArgGPRs = RISCV::getArgGPRs(); + ArrayRef ArgGPRs = RISCV::getArgGPRs(ABI); // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte @@ -17206,9 +17240,13 @@ // legalisation or not. The argument will not be passed by registers if the // original type is larger than 2*XLEN, so the register alignment rule does // not apply. + // TODO: To be compatible with GCC's behaviors, we don't align registers + // currently if we are using ILP32E calling convention. This behavior may be + // changed when RV32E/ILP32E is ratified. unsigned TwoXLenInBytes = (2 * XLen) / 8; if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && - DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { + DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && + ABI != RISCVABI::ABI_ILP32E) { unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); // Skip 'odd' register if necessary. if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) @@ -17281,8 +17319,9 @@ ISD::ArgFlagsTy AF = PendingArgFlags[0]; PendingLocs.clear(); PendingArgFlags.clear(); - return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, - ArgFlags); + return CC_RISCVAssign2XLen( + XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags, + ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); } // Allocate to a register if possible, or else a stack slot. @@ -17608,15 +17647,8 @@ bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, std::optional FirstMaskArgument) { - - // X5 and X6 might be used for save-restore libcall. - static const MCPhysReg GPRList[] = { - RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, - RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, - RISCV::X29, RISCV::X30, RISCV::X31}; - if (LocVT == MVT::i32 || LocVT == MVT::i64) { - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -17667,7 +17699,7 @@ (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())) { - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -17701,7 +17733,7 @@ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); } else { // Try and pass the address via a "fast" GPR. - if (unsigned GPRReg = State.AllocateReg(GPRList)) { + if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { LocInfo = CCValAssign::Indirect; LocVT = TLI.getSubtarget().getXLenVT(); State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); @@ -17802,6 +17834,8 @@ case CallingConv::GRAAL: break; case CallingConv::GHC: + if (Subtarget.isRVE()) + report_fatal_error("GHC calling convention is not supported on RVE!"); if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) report_fatal_error("GHC calling convention requires the (Zfinx/F) and " "(Zdinx/D) instruction set extensions"); @@ -17884,7 +17918,7 @@ MF.getInfo()->setIsVectorCall(); if (IsVarArg) { - ArrayRef ArgRegs = RISCV::getArgGPRs(); + ArrayRef ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI()); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -18037,9 +18071,11 @@ SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - if (CallConv == CallingConv::GHC) + if (CallConv == CallingConv::GHC) { + if (Subtarget.isRVE()) + report_fatal_error("GHC calling convention is not supported on RVE!"); ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC); - else + } else analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV); diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -63,13 +63,18 @@ if (Subtarget.hasStdExtD()) return CSR_XLEN_F64_Interrupt_SaveList; if (Subtarget.hasStdExtF()) - return CSR_XLEN_F32_Interrupt_SaveList; - return CSR_Interrupt_SaveList; + return Subtarget.isRVE() ? CSR_XLEN_F32_Interrupt_RVE_SaveList + : CSR_XLEN_F32_Interrupt_SaveList; + return Subtarget.isRVE() ? CSR_Interrupt_RVE_SaveList + : CSR_Interrupt_SaveList; } switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: + return CSR_ILP32E_LP64E_SaveList; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_SaveList; @@ -109,6 +114,11 @@ // beginning with 'x0' for instructions that take register pairs. markSuperRegs(Reserved, RISCV::DUMMY_REG_PAIR_WITH_X0); + // There are only 16 GPRs for RVE. + if (Subtarget.isRVE()) + for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++) + markSuperRegs(Reserved, Reg); + // V registers for code generation. We handle them manually. markSuperRegs(Reserved, RISCV::VL); markSuperRegs(Reserved, RISCV::VTYPE); @@ -673,6 +683,9 @@ switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: + return CSR_ILP32E_LP64E_RegMask; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_RegMask; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -128,10 +128,20 @@ initializeRISCVPushPopOptPass(*PR); } -static StringRef computeDataLayout(const Triple &TT) { - if (TT.isArch64Bit()) +static StringRef computeDataLayout(const Triple &TT, + const TargetOptions &Options) { + StringRef ABIName = Options.MCOptions.getABIName(); + if (TT.isArch64Bit()) { + if (ABIName == "lp64e") + return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S64"; + return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; + } assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); + + if (ABIName == "ilp32e") + return "e-m:e-p:32:32-i64:64-n32-S32"; + return "e-m:e-p:32:32-i64:64-n32-S128"; } @@ -146,7 +156,7 @@ std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) - : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + : LLVMTargetMachine(T, computeDataLayout(TT, Options), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()) { diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32 +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32E ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=LP64 +; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=LP64E ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32F ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \ @@ -14,8 +18,8 @@ @var = global [32 x float] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. -; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. +; All floating point registers are temporaries for the ilp32, ilp32e, lp64e and lp64 +; ABIs. fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns ; something appropriate. @@ -91,6 +95,76 @@ ; ILP32-NEXT: fsw fa5, %lo(var)(a0) ; ILP32-NEXT: ret ; +; ILP32E-LABEL: callee: +; ILP32E: # %bb.0: +; ILP32E-NEXT: lui a0, %hi(var) +; ILP32E-NEXT: flw fa5, %lo(var)(a0) +; ILP32E-NEXT: flw fa4, %lo(var+4)(a0) +; ILP32E-NEXT: flw fa3, %lo(var+8)(a0) +; ILP32E-NEXT: flw fa2, %lo(var+12)(a0) +; ILP32E-NEXT: addi a1, a0, %lo(var) +; ILP32E-NEXT: flw fa1, 16(a1) +; ILP32E-NEXT: flw fa0, 20(a1) +; ILP32E-NEXT: flw ft0, 24(a1) +; ILP32E-NEXT: flw ft1, 28(a1) +; ILP32E-NEXT: flw ft2, 32(a1) +; ILP32E-NEXT: flw ft3, 36(a1) +; ILP32E-NEXT: flw ft4, 40(a1) +; ILP32E-NEXT: flw ft5, 44(a1) +; ILP32E-NEXT: flw ft6, 48(a1) +; ILP32E-NEXT: flw ft7, 52(a1) +; ILP32E-NEXT: flw fa6, 56(a1) +; ILP32E-NEXT: flw fa7, 60(a1) +; ILP32E-NEXT: flw ft8, 64(a1) +; ILP32E-NEXT: flw ft9, 68(a1) +; ILP32E-NEXT: flw ft10, 72(a1) +; ILP32E-NEXT: flw ft11, 76(a1) +; ILP32E-NEXT: flw fs0, 80(a1) +; ILP32E-NEXT: flw fs1, 84(a1) +; ILP32E-NEXT: flw fs2, 88(a1) +; ILP32E-NEXT: flw fs3, 92(a1) +; ILP32E-NEXT: flw fs4, 96(a1) +; ILP32E-NEXT: flw fs5, 100(a1) +; ILP32E-NEXT: flw fs6, 104(a1) +; ILP32E-NEXT: flw fs7, 108(a1) +; ILP32E-NEXT: flw fs8, 124(a1) +; ILP32E-NEXT: flw fs9, 120(a1) +; ILP32E-NEXT: flw fs10, 116(a1) +; ILP32E-NEXT: flw fs11, 112(a1) +; ILP32E-NEXT: fsw fs8, 124(a1) +; ILP32E-NEXT: fsw fs9, 120(a1) +; ILP32E-NEXT: fsw fs10, 116(a1) +; ILP32E-NEXT: fsw fs11, 112(a1) +; ILP32E-NEXT: fsw fs7, 108(a1) +; ILP32E-NEXT: fsw fs6, 104(a1) +; ILP32E-NEXT: fsw fs5, 100(a1) +; ILP32E-NEXT: fsw fs4, 96(a1) +; ILP32E-NEXT: fsw fs3, 92(a1) +; ILP32E-NEXT: fsw fs2, 88(a1) +; ILP32E-NEXT: fsw fs1, 84(a1) +; ILP32E-NEXT: fsw fs0, 80(a1) +; ILP32E-NEXT: fsw ft11, 76(a1) +; ILP32E-NEXT: fsw ft10, 72(a1) +; ILP32E-NEXT: fsw ft9, 68(a1) +; ILP32E-NEXT: fsw ft8, 64(a1) +; ILP32E-NEXT: fsw fa7, 60(a1) +; ILP32E-NEXT: fsw fa6, 56(a1) +; ILP32E-NEXT: fsw ft7, 52(a1) +; ILP32E-NEXT: fsw ft6, 48(a1) +; ILP32E-NEXT: fsw ft5, 44(a1) +; ILP32E-NEXT: fsw ft4, 40(a1) +; ILP32E-NEXT: fsw ft3, 36(a1) +; ILP32E-NEXT: fsw ft2, 32(a1) +; ILP32E-NEXT: fsw ft1, 28(a1) +; ILP32E-NEXT: fsw ft0, 24(a1) +; ILP32E-NEXT: fsw fa0, 20(a1) +; ILP32E-NEXT: fsw fa1, 16(a1) +; ILP32E-NEXT: fsw fa2, %lo(var+12)(a0) +; ILP32E-NEXT: fsw fa3, %lo(var+8)(a0) +; ILP32E-NEXT: fsw fa4, %lo(var+4)(a0) +; ILP32E-NEXT: fsw fa5, %lo(var)(a0) +; ILP32E-NEXT: ret +; ; LP64-LABEL: callee: ; LP64: # %bb.0: ; LP64-NEXT: lui a0, %hi(var) @@ -161,6 +235,76 @@ ; LP64-NEXT: fsw fa5, %lo(var)(a0) ; LP64-NEXT: ret ; +; LP64E-LABEL: callee: +; LP64E: # %bb.0: +; LP64E-NEXT: lui a0, %hi(var) +; LP64E-NEXT: flw fa5, %lo(var)(a0) +; LP64E-NEXT: flw fa4, %lo(var+4)(a0) +; LP64E-NEXT: flw fa3, %lo(var+8)(a0) +; LP64E-NEXT: flw fa2, %lo(var+12)(a0) +; LP64E-NEXT: addi a1, a0, %lo(var) +; LP64E-NEXT: flw fa1, 16(a1) +; LP64E-NEXT: flw fa0, 20(a1) +; LP64E-NEXT: flw ft0, 24(a1) +; LP64E-NEXT: flw ft1, 28(a1) +; LP64E-NEXT: flw ft2, 32(a1) +; LP64E-NEXT: flw ft3, 36(a1) +; LP64E-NEXT: flw ft4, 40(a1) +; LP64E-NEXT: flw ft5, 44(a1) +; LP64E-NEXT: flw ft6, 48(a1) +; LP64E-NEXT: flw ft7, 52(a1) +; LP64E-NEXT: flw fa6, 56(a1) +; LP64E-NEXT: flw fa7, 60(a1) +; LP64E-NEXT: flw ft8, 64(a1) +; LP64E-NEXT: flw ft9, 68(a1) +; LP64E-NEXT: flw ft10, 72(a1) +; LP64E-NEXT: flw ft11, 76(a1) +; LP64E-NEXT: flw fs0, 80(a1) +; LP64E-NEXT: flw fs1, 84(a1) +; LP64E-NEXT: flw fs2, 88(a1) +; LP64E-NEXT: flw fs3, 92(a1) +; LP64E-NEXT: flw fs4, 96(a1) +; LP64E-NEXT: flw fs5, 100(a1) +; LP64E-NEXT: flw fs6, 104(a1) +; LP64E-NEXT: flw fs7, 108(a1) +; LP64E-NEXT: flw fs8, 124(a1) +; LP64E-NEXT: flw fs9, 120(a1) +; LP64E-NEXT: flw fs10, 116(a1) +; LP64E-NEXT: flw fs11, 112(a1) +; LP64E-NEXT: fsw fs8, 124(a1) +; LP64E-NEXT: fsw fs9, 120(a1) +; LP64E-NEXT: fsw fs10, 116(a1) +; LP64E-NEXT: fsw fs11, 112(a1) +; LP64E-NEXT: fsw fs7, 108(a1) +; LP64E-NEXT: fsw fs6, 104(a1) +; LP64E-NEXT: fsw fs5, 100(a1) +; LP64E-NEXT: fsw fs4, 96(a1) +; LP64E-NEXT: fsw fs3, 92(a1) +; LP64E-NEXT: fsw fs2, 88(a1) +; LP64E-NEXT: fsw fs1, 84(a1) +; LP64E-NEXT: fsw fs0, 80(a1) +; LP64E-NEXT: fsw ft11, 76(a1) +; LP64E-NEXT: fsw ft10, 72(a1) +; LP64E-NEXT: fsw ft9, 68(a1) +; LP64E-NEXT: fsw ft8, 64(a1) +; LP64E-NEXT: fsw fa7, 60(a1) +; LP64E-NEXT: fsw fa6, 56(a1) +; LP64E-NEXT: fsw ft7, 52(a1) +; LP64E-NEXT: fsw ft6, 48(a1) +; LP64E-NEXT: fsw ft5, 44(a1) +; LP64E-NEXT: fsw ft4, 40(a1) +; LP64E-NEXT: fsw ft3, 36(a1) +; LP64E-NEXT: fsw ft2, 32(a1) +; LP64E-NEXT: fsw ft1, 28(a1) +; LP64E-NEXT: fsw ft0, 24(a1) +; LP64E-NEXT: fsw fa0, 20(a1) +; LP64E-NEXT: fsw fa1, 16(a1) +; LP64E-NEXT: fsw fa2, %lo(var+12)(a0) +; LP64E-NEXT: fsw fa3, %lo(var+8)(a0) +; LP64E-NEXT: fsw fa4, %lo(var+4)(a0) +; LP64E-NEXT: fsw fa5, %lo(var)(a0) +; LP64E-NEXT: ret +; ; ILP32F-LABEL: callee: ; ILP32F: # %bb.0: ; ILP32F-NEXT: addi sp, sp, -48 @@ -700,6 +844,149 @@ ; ILP32-NEXT: addi sp, sp, 144 ; ILP32-NEXT: ret ; +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: +; ILP32E-NEXT: addi sp, sp, -140 +; ILP32E-NEXT: sw ra, 136(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 132(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s1, 128(sp) # 4-byte Folded Spill +; ILP32E-NEXT: lui s0, %hi(var) +; ILP32E-NEXT: flw fa5, %lo(var)(s0) +; ILP32E-NEXT: fsw fa5, 124(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, %lo(var+4)(s0) +; ILP32E-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, %lo(var+8)(s0) +; ILP32E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, %lo(var+12)(s0) +; ILP32E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill +; ILP32E-NEXT: addi s1, s0, %lo(var) +; ILP32E-NEXT: flw fa5, 16(s1) +; ILP32E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 20(s1) +; ILP32E-NEXT: fsw fa5, 104(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 24(s1) +; ILP32E-NEXT: fsw fa5, 100(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 28(s1) +; ILP32E-NEXT: fsw fa5, 96(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 32(s1) +; ILP32E-NEXT: fsw fa5, 92(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 36(s1) +; ILP32E-NEXT: fsw fa5, 88(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 40(s1) +; ILP32E-NEXT: fsw fa5, 84(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 44(s1) +; ILP32E-NEXT: fsw fa5, 80(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 48(s1) +; ILP32E-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 52(s1) +; ILP32E-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 56(s1) +; ILP32E-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 60(s1) +; ILP32E-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 64(s1) +; ILP32E-NEXT: fsw fa5, 60(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 68(s1) +; ILP32E-NEXT: fsw fa5, 56(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 72(s1) +; ILP32E-NEXT: fsw fa5, 52(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 76(s1) +; ILP32E-NEXT: fsw fa5, 48(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 80(s1) +; ILP32E-NEXT: fsw fa5, 44(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 84(s1) +; ILP32E-NEXT: fsw fa5, 40(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 88(s1) +; ILP32E-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 92(s1) +; ILP32E-NEXT: fsw fa5, 32(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 96(s1) +; ILP32E-NEXT: fsw fa5, 28(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 100(s1) +; ILP32E-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 104(s1) +; ILP32E-NEXT: fsw fa5, 20(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 108(s1) +; ILP32E-NEXT: fsw fa5, 16(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 112(s1) +; ILP32E-NEXT: fsw fa5, 12(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 116(s1) +; ILP32E-NEXT: fsw fa5, 8(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 120(s1) +; ILP32E-NEXT: fsw fa5, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 124(s1) +; ILP32E-NEXT: fsw fa5, 0(sp) # 4-byte Folded Spill +; ILP32E-NEXT: call callee +; ILP32E-NEXT: flw fa5, 0(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 124(s1) +; ILP32E-NEXT: flw fa5, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 120(s1) +; ILP32E-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 116(s1) +; ILP32E-NEXT: flw fa5, 12(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 112(s1) +; ILP32E-NEXT: flw fa5, 16(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 108(s1) +; ILP32E-NEXT: flw fa5, 20(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 104(s1) +; ILP32E-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 100(s1) +; ILP32E-NEXT: flw fa5, 28(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 96(s1) +; ILP32E-NEXT: flw fa5, 32(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 92(s1) +; ILP32E-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 88(s1) +; ILP32E-NEXT: flw fa5, 40(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 84(s1) +; ILP32E-NEXT: flw fa5, 44(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 80(s1) +; ILP32E-NEXT: flw fa5, 48(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 76(s1) +; ILP32E-NEXT: flw fa5, 52(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 72(s1) +; ILP32E-NEXT: flw fa5, 56(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 68(s1) +; ILP32E-NEXT: flw fa5, 60(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 64(s1) +; ILP32E-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 60(s1) +; ILP32E-NEXT: flw fa5, 68(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 56(s1) +; ILP32E-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 52(s1) +; ILP32E-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 48(s1) +; ILP32E-NEXT: flw fa5, 80(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 44(s1) +; ILP32E-NEXT: flw fa5, 84(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 40(s1) +; ILP32E-NEXT: flw fa5, 88(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 36(s1) +; ILP32E-NEXT: flw fa5, 92(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 32(s1) +; ILP32E-NEXT: flw fa5, 96(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 28(s1) +; ILP32E-NEXT: flw fa5, 100(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 24(s1) +; ILP32E-NEXT: flw fa5, 104(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 20(s1) +; ILP32E-NEXT: flw fa5, 108(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 16(s1) +; ILP32E-NEXT: flw fa5, 112(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, %lo(var+12)(s0) +; ILP32E-NEXT: flw fa5, 116(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, %lo(var+8)(s0) +; ILP32E-NEXT: flw fa5, 120(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, %lo(var+4)(s0) +; ILP32E-NEXT: flw fa5, 124(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, %lo(var)(s0) +; ILP32E-NEXT: lw ra, 136(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s0, 132(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s1, 128(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 140 +; ILP32E-NEXT: ret +; ; LP64-LABEL: caller: ; LP64: # %bb.0: ; LP64-NEXT: addi sp, sp, -160 @@ -843,6 +1130,149 @@ ; LP64-NEXT: addi sp, sp, 160 ; LP64-NEXT: ret ; +; LP64E-LABEL: caller: +; LP64E: # %bb.0: +; LP64E-NEXT: addi sp, sp, -152 +; LP64E-NEXT: sd ra, 144(sp) # 8-byte Folded Spill +; LP64E-NEXT: sd s0, 136(sp) # 8-byte Folded Spill +; LP64E-NEXT: sd s1, 128(sp) # 8-byte Folded Spill +; LP64E-NEXT: lui s0, %hi(var) +; LP64E-NEXT: flw fa5, %lo(var)(s0) +; LP64E-NEXT: fsw fa5, 124(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, %lo(var+4)(s0) +; LP64E-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, %lo(var+8)(s0) +; LP64E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, %lo(var+12)(s0) +; LP64E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill +; LP64E-NEXT: addi s1, s0, %lo(var) +; LP64E-NEXT: flw fa5, 16(s1) +; LP64E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 20(s1) +; LP64E-NEXT: fsw fa5, 104(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 24(s1) +; LP64E-NEXT: fsw fa5, 100(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 28(s1) +; LP64E-NEXT: fsw fa5, 96(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 32(s1) +; LP64E-NEXT: fsw fa5, 92(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 36(s1) +; LP64E-NEXT: fsw fa5, 88(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 40(s1) +; LP64E-NEXT: fsw fa5, 84(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 44(s1) +; LP64E-NEXT: fsw fa5, 80(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 48(s1) +; LP64E-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 52(s1) +; LP64E-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 56(s1) +; LP64E-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 60(s1) +; LP64E-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 64(s1) +; LP64E-NEXT: fsw fa5, 60(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 68(s1) +; LP64E-NEXT: fsw fa5, 56(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 72(s1) +; LP64E-NEXT: fsw fa5, 52(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 76(s1) +; LP64E-NEXT: fsw fa5, 48(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 80(s1) +; LP64E-NEXT: fsw fa5, 44(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 84(s1) +; LP64E-NEXT: fsw fa5, 40(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 88(s1) +; LP64E-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 92(s1) +; LP64E-NEXT: fsw fa5, 32(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 96(s1) +; LP64E-NEXT: fsw fa5, 28(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 100(s1) +; LP64E-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 104(s1) +; LP64E-NEXT: fsw fa5, 20(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 108(s1) +; LP64E-NEXT: fsw fa5, 16(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 112(s1) +; LP64E-NEXT: fsw fa5, 12(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 116(s1) +; LP64E-NEXT: fsw fa5, 8(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 120(s1) +; LP64E-NEXT: fsw fa5, 4(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 124(s1) +; LP64E-NEXT: fsw fa5, 0(sp) # 4-byte Folded Spill +; LP64E-NEXT: call callee +; LP64E-NEXT: flw fa5, 0(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 124(s1) +; LP64E-NEXT: flw fa5, 4(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 120(s1) +; LP64E-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 116(s1) +; LP64E-NEXT: flw fa5, 12(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 112(s1) +; LP64E-NEXT: flw fa5, 16(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 108(s1) +; LP64E-NEXT: flw fa5, 20(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 104(s1) +; LP64E-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 100(s1) +; LP64E-NEXT: flw fa5, 28(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 96(s1) +; LP64E-NEXT: flw fa5, 32(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 92(s1) +; LP64E-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 88(s1) +; LP64E-NEXT: flw fa5, 40(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 84(s1) +; LP64E-NEXT: flw fa5, 44(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 80(s1) +; LP64E-NEXT: flw fa5, 48(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 76(s1) +; LP64E-NEXT: flw fa5, 52(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 72(s1) +; LP64E-NEXT: flw fa5, 56(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 68(s1) +; LP64E-NEXT: flw fa5, 60(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 64(s1) +; LP64E-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 60(s1) +; LP64E-NEXT: flw fa5, 68(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 56(s1) +; LP64E-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 52(s1) +; LP64E-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 48(s1) +; LP64E-NEXT: flw fa5, 80(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 44(s1) +; LP64E-NEXT: flw fa5, 84(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 40(s1) +; LP64E-NEXT: flw fa5, 88(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 36(s1) +; LP64E-NEXT: flw fa5, 92(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 32(s1) +; LP64E-NEXT: flw fa5, 96(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 28(s1) +; LP64E-NEXT: flw fa5, 100(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 24(s1) +; LP64E-NEXT: flw fa5, 104(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 20(s1) +; LP64E-NEXT: flw fa5, 108(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 16(s1) +; LP64E-NEXT: flw fa5, 112(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, %lo(var+12)(s0) +; LP64E-NEXT: flw fa5, 116(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, %lo(var+8)(s0) +; LP64E-NEXT: flw fa5, 120(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, %lo(var+4)(s0) +; LP64E-NEXT: flw fa5, 124(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, %lo(var)(s0) +; LP64E-NEXT: ld ra, 144(sp) # 8-byte Folded Reload +; LP64E-NEXT: ld s0, 136(sp) # 8-byte Folded Reload +; LP64E-NEXT: ld s1, 128(sp) # 8-byte Folded Reload +; LP64E-NEXT: addi sp, sp, 152 +; LP64E-NEXT: ret +; ; ILP32F-LABEL: caller: ; ILP32F: # %bb.0: ; ILP32F-NEXT: addi sp, sp, -144 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck %s -check-prefix=ILP32 ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs< %s \ ; RUN: | FileCheck %s -check-prefix=LP64 +; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64e -verify-machineinstrs< %s \ +; RUN: | FileCheck %s -check-prefix=LP64E ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32D ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64d -verify-machineinstrs < %s \ @@ -10,7 +12,7 @@ @var = global [32 x double] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. +; All floating point registers are temporaries for the ilp32, lp64e and lp64 ABIs. ; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns @@ -157,6 +159,76 @@ ; LP64-NEXT: fsd fa5, %lo(var)(a0) ; LP64-NEXT: ret ; +; LP64E-LABEL: callee: +; LP64E: # %bb.0: +; LP64E-NEXT: lui a0, %hi(var) +; LP64E-NEXT: fld fa5, %lo(var)(a0) +; LP64E-NEXT: fld fa4, %lo(var+8)(a0) +; LP64E-NEXT: addi a1, a0, %lo(var) +; LP64E-NEXT: fld fa3, 16(a1) +; LP64E-NEXT: fld fa2, 24(a1) +; LP64E-NEXT: fld fa1, 32(a1) +; LP64E-NEXT: fld fa0, 40(a1) +; LP64E-NEXT: fld ft0, 48(a1) +; LP64E-NEXT: fld ft1, 56(a1) +; LP64E-NEXT: fld ft2, 64(a1) +; LP64E-NEXT: fld ft3, 72(a1) +; LP64E-NEXT: fld ft4, 80(a1) +; LP64E-NEXT: fld ft5, 88(a1) +; LP64E-NEXT: fld ft6, 96(a1) +; LP64E-NEXT: fld ft7, 104(a1) +; LP64E-NEXT: fld fa6, 112(a1) +; LP64E-NEXT: fld fa7, 120(a1) +; LP64E-NEXT: fld ft8, 128(a1) +; LP64E-NEXT: fld ft9, 136(a1) +; LP64E-NEXT: fld ft10, 144(a1) +; LP64E-NEXT: fld ft11, 152(a1) +; LP64E-NEXT: fld fs0, 160(a1) +; LP64E-NEXT: fld fs1, 168(a1) +; LP64E-NEXT: fld fs2, 176(a1) +; LP64E-NEXT: fld fs3, 184(a1) +; LP64E-NEXT: fld fs4, 192(a1) +; LP64E-NEXT: fld fs5, 200(a1) +; LP64E-NEXT: fld fs6, 208(a1) +; LP64E-NEXT: fld fs7, 216(a1) +; LP64E-NEXT: fld fs8, 248(a1) +; LP64E-NEXT: fld fs9, 240(a1) +; LP64E-NEXT: fld fs10, 232(a1) +; LP64E-NEXT: fld fs11, 224(a1) +; LP64E-NEXT: fsd fs8, 248(a1) +; LP64E-NEXT: fsd fs9, 240(a1) +; LP64E-NEXT: fsd fs10, 232(a1) +; LP64E-NEXT: fsd fs11, 224(a1) +; LP64E-NEXT: fsd fs7, 216(a1) +; LP64E-NEXT: fsd fs6, 208(a1) +; LP64E-NEXT: fsd fs5, 200(a1) +; LP64E-NEXT: fsd fs4, 192(a1) +; LP64E-NEXT: fsd fs3, 184(a1) +; LP64E-NEXT: fsd fs2, 176(a1) +; LP64E-NEXT: fsd fs1, 168(a1) +; LP64E-NEXT: fsd fs0, 160(a1) +; LP64E-NEXT: fsd ft11, 152(a1) +; LP64E-NEXT: fsd ft10, 144(a1) +; LP64E-NEXT: fsd ft9, 136(a1) +; LP64E-NEXT: fsd ft8, 128(a1) +; LP64E-NEXT: fsd fa7, 120(a1) +; LP64E-NEXT: fsd fa6, 112(a1) +; LP64E-NEXT: fsd ft7, 104(a1) +; LP64E-NEXT: fsd ft6, 96(a1) +; LP64E-NEXT: fsd ft5, 88(a1) +; LP64E-NEXT: fsd ft4, 80(a1) +; LP64E-NEXT: fsd ft3, 72(a1) +; LP64E-NEXT: fsd ft2, 64(a1) +; LP64E-NEXT: fsd ft1, 56(a1) +; LP64E-NEXT: fsd ft0, 48(a1) +; LP64E-NEXT: fsd fa0, 40(a1) +; LP64E-NEXT: fsd fa1, 32(a1) +; LP64E-NEXT: fsd fa2, 24(a1) +; LP64E-NEXT: fsd fa3, 16(a1) +; LP64E-NEXT: fsd fa4, %lo(var+8)(a0) +; LP64E-NEXT: fsd fa5, %lo(var)(a0) +; LP64E-NEXT: ret +; ; ILP32D-LABEL: callee: ; ILP32D: # %bb.0: ; ILP32D-NEXT: addi sp, sp, -96 @@ -647,6 +719,149 @@ ; LP64-NEXT: addi sp, sp, 288 ; LP64-NEXT: ret ; +; LP64E-LABEL: caller: +; LP64E: # %bb.0: +; LP64E-NEXT: addi sp, sp, -280 +; LP64E-NEXT: sd ra, 272(sp) # 8-byte Folded Spill +; LP64E-NEXT: sd s0, 264(sp) # 8-byte Folded Spill +; LP64E-NEXT: sd s1, 256(sp) # 8-byte Folded Spill +; LP64E-NEXT: lui s0, %hi(var) +; LP64E-NEXT: fld fa5, %lo(var)(s0) +; LP64E-NEXT: fsd fa5, 248(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, %lo(var+8)(s0) +; LP64E-NEXT: fsd fa5, 240(sp) # 8-byte Folded Spill +; LP64E-NEXT: addi s1, s0, %lo(var) +; LP64E-NEXT: fld fa5, 16(s1) +; LP64E-NEXT: fsd fa5, 232(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 24(s1) +; LP64E-NEXT: fsd fa5, 224(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 32(s1) +; LP64E-NEXT: fsd fa5, 216(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 40(s1) +; LP64E-NEXT: fsd fa5, 208(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 48(s1) +; LP64E-NEXT: fsd fa5, 200(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 56(s1) +; LP64E-NEXT: fsd fa5, 192(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 64(s1) +; LP64E-NEXT: fsd fa5, 184(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 72(s1) +; LP64E-NEXT: fsd fa5, 176(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 80(s1) +; LP64E-NEXT: fsd fa5, 168(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 88(s1) +; LP64E-NEXT: fsd fa5, 160(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 96(s1) +; LP64E-NEXT: fsd fa5, 152(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 104(s1) +; LP64E-NEXT: fsd fa5, 144(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 112(s1) +; LP64E-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 120(s1) +; LP64E-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 128(s1) +; LP64E-NEXT: fsd fa5, 120(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 136(s1) +; LP64E-NEXT: fsd fa5, 112(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 144(s1) +; LP64E-NEXT: fsd fa5, 104(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 152(s1) +; LP64E-NEXT: fsd fa5, 96(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 160(s1) +; LP64E-NEXT: fsd fa5, 88(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 168(s1) +; LP64E-NEXT: fsd fa5, 80(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 176(s1) +; LP64E-NEXT: fsd fa5, 72(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 184(s1) +; LP64E-NEXT: fsd fa5, 64(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 192(s1) +; LP64E-NEXT: fsd fa5, 56(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 200(s1) +; LP64E-NEXT: fsd fa5, 48(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 208(s1) +; LP64E-NEXT: fsd fa5, 40(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 216(s1) +; LP64E-NEXT: fsd fa5, 32(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 224(s1) +; LP64E-NEXT: fsd fa5, 24(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 232(s1) +; LP64E-NEXT: fsd fa5, 16(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 240(s1) +; LP64E-NEXT: fsd fa5, 8(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 248(s1) +; LP64E-NEXT: fsd fa5, 0(sp) # 8-byte Folded Spill +; LP64E-NEXT: call callee +; LP64E-NEXT: fld fa5, 0(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 248(s1) +; LP64E-NEXT: fld fa5, 8(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 240(s1) +; LP64E-NEXT: fld fa5, 16(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 232(s1) +; LP64E-NEXT: fld fa5, 24(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 224(s1) +; LP64E-NEXT: fld fa5, 32(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 216(s1) +; LP64E-NEXT: fld fa5, 40(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 208(s1) +; LP64E-NEXT: fld fa5, 48(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 200(s1) +; LP64E-NEXT: fld fa5, 56(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 192(s1) +; LP64E-NEXT: fld fa5, 64(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 184(s1) +; LP64E-NEXT: fld fa5, 72(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 176(s1) +; LP64E-NEXT: fld fa5, 80(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 168(s1) +; LP64E-NEXT: fld fa5, 88(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 160(s1) +; LP64E-NEXT: fld fa5, 96(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 152(s1) +; LP64E-NEXT: fld fa5, 104(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 144(s1) +; LP64E-NEXT: fld fa5, 112(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 136(s1) +; LP64E-NEXT: fld fa5, 120(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 128(s1) +; LP64E-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 120(s1) +; LP64E-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 112(s1) +; LP64E-NEXT: fld fa5, 144(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 104(s1) +; LP64E-NEXT: fld fa5, 152(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 96(s1) +; LP64E-NEXT: fld fa5, 160(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 88(s1) +; LP64E-NEXT: fld fa5, 168(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 80(s1) +; LP64E-NEXT: fld fa5, 176(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 72(s1) +; LP64E-NEXT: fld fa5, 184(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 64(s1) +; LP64E-NEXT: fld fa5, 192(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 56(s1) +; LP64E-NEXT: fld fa5, 200(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 48(s1) +; LP64E-NEXT: fld fa5, 208(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 40(s1) +; LP64E-NEXT: fld fa5, 216(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 32(s1) +; LP64E-NEXT: fld fa5, 224(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 24(s1) +; LP64E-NEXT: fld fa5, 232(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 16(s1) +; LP64E-NEXT: fld fa5, 240(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, %lo(var+8)(s0) +; LP64E-NEXT: fld fa5, 248(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, %lo(var)(s0) +; LP64E-NEXT: ld ra, 272(sp) # 8-byte Folded Reload +; LP64E-NEXT: ld s0, 264(sp) # 8-byte Folded Reload +; LP64E-NEXT: ld s1, 256(sp) # 8-byte Folded Reload +; LP64E-NEXT: addi sp, sp, 280 +; LP64E-NEXT: ret +; ; ILP32D-LABEL: caller: ; ILP32D: # %bb.0: ; ILP32D-NEXT: addi sp, sp, -272 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32f -verify-machineinstrs < %s \ @@ -15,6 +17,8 @@ ; RUN: -frame-pointer=all < %s | FileCheck %s -check-prefixes=RV32IZCMP-WITH-FP ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-LP64E ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64f -verify-machineinstrs < %s \ @@ -144,6 +148,96 @@ ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: callee: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -48 +; RV32I-ILP32E-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a6, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a6) +; RV32I-ILP32E-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a6) +; RV32I-ILP32E-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a6) +; RV32I-ILP32E-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a6) +; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi a5, a6, %lo(var) +; RV32I-ILP32E-NEXT: lw a0, 16(a5) +; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 20(a5) +; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw t0, 24(a5) +; RV32I-ILP32E-NEXT: lw t1, 28(a5) +; RV32I-ILP32E-NEXT: lw t2, 32(a5) +; RV32I-ILP32E-NEXT: lw t3, 36(a5) +; RV32I-ILP32E-NEXT: lw t4, 40(a5) +; RV32I-ILP32E-NEXT: lw t5, 44(a5) +; RV32I-ILP32E-NEXT: lw t6, 48(a5) +; RV32I-ILP32E-NEXT: lw s2, 52(a5) +; RV32I-ILP32E-NEXT: lw s3, 56(a5) +; RV32I-ILP32E-NEXT: lw s4, 60(a5) +; RV32I-ILP32E-NEXT: lw s5, 64(a5) +; RV32I-ILP32E-NEXT: lw s6, 68(a5) +; RV32I-ILP32E-NEXT: lw s7, 72(a5) +; RV32I-ILP32E-NEXT: lw s8, 76(a5) +; RV32I-ILP32E-NEXT: lw s9, 80(a5) +; RV32I-ILP32E-NEXT: lw s10, 84(a5) +; RV32I-ILP32E-NEXT: lw s11, 88(a5) +; RV32I-ILP32E-NEXT: lw s0, 92(a5) +; RV32I-ILP32E-NEXT: lw s1, 96(a5) +; RV32I-ILP32E-NEXT: lw ra, 100(a5) +; RV32I-ILP32E-NEXT: lw a7, 104(a5) +; RV32I-ILP32E-NEXT: lw a4, 108(a5) +; RV32I-ILP32E-NEXT: lw a0, 124(a5) +; RV32I-ILP32E-NEXT: lw a1, 120(a5) +; RV32I-ILP32E-NEXT: lw a2, 116(a5) +; RV32I-ILP32E-NEXT: lw a3, 112(a5) +; RV32I-ILP32E-NEXT: sw a0, 124(a5) +; RV32I-ILP32E-NEXT: sw a1, 120(a5) +; RV32I-ILP32E-NEXT: sw a2, 116(a5) +; RV32I-ILP32E-NEXT: sw a3, 112(a5) +; RV32I-ILP32E-NEXT: sw a4, 108(a5) +; RV32I-ILP32E-NEXT: sw a7, 104(a5) +; RV32I-ILP32E-NEXT: sw ra, 100(a5) +; RV32I-ILP32E-NEXT: sw s1, 96(a5) +; RV32I-ILP32E-NEXT: sw s0, 92(a5) +; RV32I-ILP32E-NEXT: sw s11, 88(a5) +; RV32I-ILP32E-NEXT: sw s10, 84(a5) +; RV32I-ILP32E-NEXT: sw s9, 80(a5) +; RV32I-ILP32E-NEXT: sw s8, 76(a5) +; RV32I-ILP32E-NEXT: sw s7, 72(a5) +; RV32I-ILP32E-NEXT: sw s6, 68(a5) +; RV32I-ILP32E-NEXT: sw s5, 64(a5) +; RV32I-ILP32E-NEXT: sw s4, 60(a5) +; RV32I-ILP32E-NEXT: sw s3, 56(a5) +; RV32I-ILP32E-NEXT: sw s2, 52(a5) +; RV32I-ILP32E-NEXT: sw t6, 48(a5) +; RV32I-ILP32E-NEXT: sw t5, 44(a5) +; RV32I-ILP32E-NEXT: sw t4, 40(a5) +; RV32I-ILP32E-NEXT: sw t3, 36(a5) +; RV32I-ILP32E-NEXT: sw t2, 32(a5) +; RV32I-ILP32E-NEXT: sw t1, 28(a5) +; RV32I-ILP32E-NEXT: sw t0, 24(a5) +; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 20(a5) +; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 16(a5) +; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a6) +; RV32I-ILP32E-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a6) +; RV32I-ILP32E-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a6) +; RV32I-ILP32E-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a6) +; RV32I-ILP32E-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 48 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: callee: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -80 @@ -563,6 +657,96 @@ ; RV64I-NEXT: addi sp, sp, 160 ; RV64I-NEXT: ret ; +; RV64I-LP64E-LABEL: callee: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -80 +; RV64I-LP64E-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lui a6, %hi(var) +; RV64I-LP64E-NEXT: lw a0, %lo(var)(a6) +; RV64I-LP64E-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a6) +; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a6) +; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a6) +; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: addi a5, a6, %lo(var) +; RV64I-LP64E-NEXT: lw a0, 16(a5) +; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 20(a5) +; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw t0, 24(a5) +; RV64I-LP64E-NEXT: lw t1, 28(a5) +; RV64I-LP64E-NEXT: lw t2, 32(a5) +; RV64I-LP64E-NEXT: lw t3, 36(a5) +; RV64I-LP64E-NEXT: lw t4, 40(a5) +; RV64I-LP64E-NEXT: lw t5, 44(a5) +; RV64I-LP64E-NEXT: lw t6, 48(a5) +; RV64I-LP64E-NEXT: lw s2, 52(a5) +; RV64I-LP64E-NEXT: lw s3, 56(a5) +; RV64I-LP64E-NEXT: lw s4, 60(a5) +; RV64I-LP64E-NEXT: lw s5, 64(a5) +; RV64I-LP64E-NEXT: lw s6, 68(a5) +; RV64I-LP64E-NEXT: lw s7, 72(a5) +; RV64I-LP64E-NEXT: lw s8, 76(a5) +; RV64I-LP64E-NEXT: lw s9, 80(a5) +; RV64I-LP64E-NEXT: lw s10, 84(a5) +; RV64I-LP64E-NEXT: lw s11, 88(a5) +; RV64I-LP64E-NEXT: lw s0, 92(a5) +; RV64I-LP64E-NEXT: lw s1, 96(a5) +; RV64I-LP64E-NEXT: lw ra, 100(a5) +; RV64I-LP64E-NEXT: lw a7, 104(a5) +; RV64I-LP64E-NEXT: lw a4, 108(a5) +; RV64I-LP64E-NEXT: lw a0, 124(a5) +; RV64I-LP64E-NEXT: lw a1, 120(a5) +; RV64I-LP64E-NEXT: lw a2, 116(a5) +; RV64I-LP64E-NEXT: lw a3, 112(a5) +; RV64I-LP64E-NEXT: sw a0, 124(a5) +; RV64I-LP64E-NEXT: sw a1, 120(a5) +; RV64I-LP64E-NEXT: sw a2, 116(a5) +; RV64I-LP64E-NEXT: sw a3, 112(a5) +; RV64I-LP64E-NEXT: sw a4, 108(a5) +; RV64I-LP64E-NEXT: sw a7, 104(a5) +; RV64I-LP64E-NEXT: sw ra, 100(a5) +; RV64I-LP64E-NEXT: sw s1, 96(a5) +; RV64I-LP64E-NEXT: sw s0, 92(a5) +; RV64I-LP64E-NEXT: sw s11, 88(a5) +; RV64I-LP64E-NEXT: sw s10, 84(a5) +; RV64I-LP64E-NEXT: sw s9, 80(a5) +; RV64I-LP64E-NEXT: sw s8, 76(a5) +; RV64I-LP64E-NEXT: sw s7, 72(a5) +; RV64I-LP64E-NEXT: sw s6, 68(a5) +; RV64I-LP64E-NEXT: sw s5, 64(a5) +; RV64I-LP64E-NEXT: sw s4, 60(a5) +; RV64I-LP64E-NEXT: sw s3, 56(a5) +; RV64I-LP64E-NEXT: sw s2, 52(a5) +; RV64I-LP64E-NEXT: sw t6, 48(a5) +; RV64I-LP64E-NEXT: sw t5, 44(a5) +; RV64I-LP64E-NEXT: sw t4, 40(a5) +; RV64I-LP64E-NEXT: sw t3, 36(a5) +; RV64I-LP64E-NEXT: sw t2, 32(a5) +; RV64I-LP64E-NEXT: sw t1, 28(a5) +; RV64I-LP64E-NEXT: sw t0, 24(a5) +; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 20(a5) +; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 16(a5) +; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a6) +; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a6) +; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a6) +; RV64I-LP64E-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var)(a6) +; RV64I-LP64E-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 80 +; RV64I-LP64E-NEXT: ret +; ; RV64I-WITH-FP-LABEL: callee: ; RV64I-WITH-FP: # %bb.0: ; RV64I-WITH-FP-NEXT: addi sp, sp, -160 @@ -1023,6 +1207,148 @@ ; RV32I-NEXT: addi sp, sp, 144 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -136 +; RV32I-ILP32E-NEXT: sw ra, 132(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 128(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 124(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a0, %hi(var) +; RV32I-ILP32E-NEXT: lw a1, %lo(var)(a0) +; RV32I-ILP32E-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+4)(a0) +; RV32I-ILP32E-NEXT: sw a1, 116(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+8)(a0) +; RV32I-ILP32E-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+12)(a0) +; RV32I-ILP32E-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi s1, a0, %lo(var) +; RV32I-ILP32E-NEXT: lw a0, 16(s1) +; RV32I-ILP32E-NEXT: sw a0, 104(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 20(s1) +; RV32I-ILP32E-NEXT: sw a0, 100(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 24(s1) +; RV32I-ILP32E-NEXT: sw a0, 96(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 28(s1) +; RV32I-ILP32E-NEXT: sw a0, 92(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 32(s1) +; RV32I-ILP32E-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 36(s1) +; RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 40(s1) +; RV32I-ILP32E-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 44(s1) +; RV32I-ILP32E-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 48(s1) +; RV32I-ILP32E-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 52(s1) +; RV32I-ILP32E-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 56(s1) +; RV32I-ILP32E-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 60(s1) +; RV32I-ILP32E-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 64(s1) +; RV32I-ILP32E-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 68(s1) +; RV32I-ILP32E-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 72(s1) +; RV32I-ILP32E-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 76(s1) +; RV32I-ILP32E-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 80(s1) +; RV32I-ILP32E-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 84(s1) +; RV32I-ILP32E-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 88(s1) +; RV32I-ILP32E-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 92(s1) +; RV32I-ILP32E-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 96(s1) +; RV32I-ILP32E-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 100(s1) +; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 104(s1) +; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 108(s1) +; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 112(s1) +; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 116(s1) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 120(s1) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw s0, 124(s1) +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: sw s0, 124(s1) +; RV32I-ILP32E-NEXT: lw a0, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 120(s1) +; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 116(s1) +; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 112(s1) +; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 108(s1) +; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 104(s1) +; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 100(s1) +; RV32I-ILP32E-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 96(s1) +; RV32I-ILP32E-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 92(s1) +; RV32I-ILP32E-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 88(s1) +; RV32I-ILP32E-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 84(s1) +; RV32I-ILP32E-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 80(s1) +; RV32I-ILP32E-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 76(s1) +; RV32I-ILP32E-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 72(s1) +; RV32I-ILP32E-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 68(s1) +; RV32I-ILP32E-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 64(s1) +; RV32I-ILP32E-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 60(s1) +; RV32I-ILP32E-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 56(s1) +; RV32I-ILP32E-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 52(s1) +; RV32I-ILP32E-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 48(s1) +; RV32I-ILP32E-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 44(s1) +; RV32I-ILP32E-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 40(s1) +; RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 36(s1) +; RV32I-ILP32E-NEXT: lw a0, 88(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 32(s1) +; RV32I-ILP32E-NEXT: lw a0, 92(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 28(s1) +; RV32I-ILP32E-NEXT: lw a0, 96(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 24(s1) +; RV32I-ILP32E-NEXT: lw a0, 100(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 20(s1) +; RV32I-ILP32E-NEXT: lw a0, 104(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 16(s1) +; RV32I-ILP32E-NEXT: lui a1, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, 108(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a1) +; RV32I-ILP32E-NEXT: lw a0, 112(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a1) +; RV32I-ILP32E-NEXT: lw a0, 116(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a1) +; RV32I-ILP32E-NEXT: lw a0, 120(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a1) +; RV32I-ILP32E-NEXT: lw ra, 132(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 128(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 124(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 136 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: caller: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -144 @@ -1576,6 +1902,148 @@ ; RV64I-NEXT: addi sp, sp, 288 ; RV64I-NEXT: ret ; +; RV64I-LP64E-LABEL: caller: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -272 +; RV64I-LP64E-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s1, 248(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lui a0, %hi(var) +; RV64I-LP64E-NEXT: lw a1, %lo(var)(a0) +; RV64I-LP64E-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a1, %lo(var+4)(a0) +; RV64I-LP64E-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a1, %lo(var+8)(a0) +; RV64I-LP64E-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a1, %lo(var+12)(a0) +; RV64I-LP64E-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: addi s1, a0, %lo(var) +; RV64I-LP64E-NEXT: lw a0, 16(s1) +; RV64I-LP64E-NEXT: sd a0, 208(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 20(s1) +; RV64I-LP64E-NEXT: sd a0, 200(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 24(s1) +; RV64I-LP64E-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 28(s1) +; RV64I-LP64E-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 32(s1) +; RV64I-LP64E-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 36(s1) +; RV64I-LP64E-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 40(s1) +; RV64I-LP64E-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 44(s1) +; RV64I-LP64E-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 48(s1) +; RV64I-LP64E-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 52(s1) +; RV64I-LP64E-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 56(s1) +; RV64I-LP64E-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 60(s1) +; RV64I-LP64E-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 64(s1) +; RV64I-LP64E-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 68(s1) +; RV64I-LP64E-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 72(s1) +; RV64I-LP64E-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 76(s1) +; RV64I-LP64E-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 80(s1) +; RV64I-LP64E-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 84(s1) +; RV64I-LP64E-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 88(s1) +; RV64I-LP64E-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 92(s1) +; RV64I-LP64E-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 96(s1) +; RV64I-LP64E-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 100(s1) +; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 104(s1) +; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 108(s1) +; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 112(s1) +; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 116(s1) +; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 120(s1) +; RV64I-LP64E-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw s0, 124(s1) +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: sw s0, 124(s1) +; RV64I-LP64E-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 120(s1) +; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 116(s1) +; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 112(s1) +; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 108(s1) +; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 104(s1) +; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 100(s1) +; RV64I-LP64E-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 96(s1) +; RV64I-LP64E-NEXT: ld a0, 56(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 92(s1) +; RV64I-LP64E-NEXT: ld a0, 64(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 88(s1) +; RV64I-LP64E-NEXT: ld a0, 72(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 84(s1) +; RV64I-LP64E-NEXT: ld a0, 80(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 80(s1) +; RV64I-LP64E-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 76(s1) +; RV64I-LP64E-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 72(s1) +; RV64I-LP64E-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 68(s1) +; RV64I-LP64E-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 64(s1) +; RV64I-LP64E-NEXT: ld a0, 120(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 60(s1) +; RV64I-LP64E-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 56(s1) +; RV64I-LP64E-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 52(s1) +; RV64I-LP64E-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 48(s1) +; RV64I-LP64E-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 44(s1) +; RV64I-LP64E-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 40(s1) +; RV64I-LP64E-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 36(s1) +; RV64I-LP64E-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 32(s1) +; RV64I-LP64E-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 28(s1) +; RV64I-LP64E-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 24(s1) +; RV64I-LP64E-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 20(s1) +; RV64I-LP64E-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 16(s1) +; RV64I-LP64E-NEXT: lui a1, %hi(var) +; RV64I-LP64E-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a1) +; RV64I-LP64E-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a1) +; RV64I-LP64E-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a1) +; RV64I-LP64E-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var)(a1) +; RV64I-LP64E-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s1, 248(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 272 +; RV64I-LP64E-NEXT: ret +; ; RV64I-WITH-FP-LABEL: caller: ; RV64I-WITH-FP: # %bb.0: ; RV64I-WITH-FP-NEXT: addi sp, sp, -288 @@ -2007,6 +2475,13 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: foo: +; RV32I-ILP32E: # %bb.0: # %entry +; RV32I-ILP32E-NEXT: #APP +; RV32I-ILP32E-NEXT: li s4, 0 +; RV32I-ILP32E-NEXT: #NO_APP +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: foo: ; RV32I-WITH-FP: # %bb.0: # %entry ; RV32I-WITH-FP-NEXT: addi sp, sp, -16 @@ -2072,6 +2547,13 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-LP64E-LABEL: foo: +; RV64I-LP64E: # %bb.0: # %entry +; RV64I-LP64E-NEXT: #APP +; RV64I-LP64E-NEXT: li s4, 0 +; RV64I-LP64E-NEXT: #NO_APP +; RV64I-LP64E-NEXT: ret +; ; RV64I-WITH-FP-LABEL: foo: ; RV64I-WITH-FP: # %bb.0: # %entry ; RV64I-WITH-FP-NEXT: addi sp, sp, -32 @@ -2143,6 +2625,13 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: bar: +; RV32I-ILP32E: # %bb.0: # %entry +; RV32I-ILP32E-NEXT: #APP +; RV32I-ILP32E-NEXT: li s11, 0 +; RV32I-ILP32E-NEXT: #NO_APP +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: bar: ; RV32I-WITH-FP: # %bb.0: # %entry ; RV32I-WITH-FP-NEXT: addi sp, sp, -16 @@ -2208,6 +2697,13 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; +; RV64I-LP64E-LABEL: bar: +; RV64I-LP64E: # %bb.0: # %entry +; RV64I-LP64E-NEXT: #APP +; RV64I-LP64E-NEXT: li s11, 0 +; RV64I-LP64E-NEXT: #NO_APP +; RV64I-LP64E-NEXT: ret +; ; RV64I-WITH-FP-LABEL: bar: ; RV64I-WITH-FP: # %bb.0: # %entry ; RV64I-WITH-FP-NEXT: addi sp, sp, -32 @@ -2284,6 +2780,23 @@ ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: varargs: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -28 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 28 +; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -28 +; RV32I-ILP32E-NEXT: sw a5, 24(sp) +; RV32I-ILP32E-NEXT: sw a4, 20(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 28 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: varargs: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -48 @@ -2370,6 +2883,23 @@ ; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: ret ; +; RV64I-LP64E-LABEL: varargs: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -56 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 56 +; RV64I-LP64E-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -56 +; RV64I-LP64E-NEXT: sd a5, 48(sp) +; RV64I-LP64E-NEXT: sd a4, 40(sp) +; RV64I-LP64E-NEXT: sd a3, 32(sp) +; RV64I-LP64E-NEXT: sd a2, 24(sp) +; RV64I-LP64E-NEXT: sd a1, 16(sp) +; RV64I-LP64E-NEXT: sd a0, 8(sp) +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 56 +; RV64I-LP64E-NEXT: ret +; ; RV64I-WITH-FP-LABEL: varargs: ; RV64I-WITH-FP: # %bb.0: ; RV64I-WITH-FP-NEXT: addi sp, sp, -80 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -0,0 +1,2549 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+save-restore -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM-SAVE-RESTORE %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+save-restore -frame-pointer=all \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP-SAVE-RESTORE %s + +; This file contains tests that will have differing output for the ilp32e ABIs. + +define i32 @callee_float_in_regs(i32 %a, float %b) { +; ILP32E-FPELIM-LABEL: callee_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: call __fixsfsi +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: call __fixsfsi +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_float_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv s0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __fixsfsi +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_float_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv s1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __fixsfsi +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_2 + %b_fptosi = fptosi float %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_float_in_regs() { +; ILP32E-FPELIM-LABEL: caller_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: lui a1, 262144 +; ILP32E-FPELIM-NEXT: call callee_float_in_regs +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a1, 262144 +; ILP32E-WITHFP-NEXT: call callee_float_in_regs +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_float_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 262144 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_float_in_regs +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_float_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 262144 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_float_in_regs +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_float_in_regs(i32 1, float 2.0) + ret i32 %1 +} + +define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) { +; ILP32E-FPELIM-LABEL: callee_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 8(sp) +; ILP32E-FPELIM-NEXT: lw a1, 0(sp) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a0, 8(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(s0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_float_on_stack: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_float_on_stack: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 0(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = trunc i64 %d to i32 + %2 = bitcast float %e to i32 + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i32 @caller_float_on_stack() { +; ILP32E-FPELIM-LABEL: caller_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: lui a0, 264704 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: li a1, 4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a2, 2 +; ILP32E-FPELIM-NEXT: li a4, 3 +; ILP32E-FPELIM-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: li a3, 0 +; ILP32E-FPELIM-NEXT: li a5, 0 +; ILP32E-FPELIM-NEXT: call callee_float_on_stack +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -20 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 20 +; ILP32E-WITHFP-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a0, 264704 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: li a1, 4 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a2, 2 +; ILP32E-WITHFP-NEXT: li a4, 3 +; ILP32E-WITHFP-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: li a3, 0 +; ILP32E-WITHFP-NEXT: li a5, 0 +; ILP32E-WITHFP-NEXT: call callee_float_on_stack +; ILP32E-WITHFP-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 20 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_float_on_stack: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -12 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 264704 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_float_on_stack +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 12 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_float_on_stack: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 264704 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_float_on_stack +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0) + ret i32 %1 +} + +define float @callee_tiny_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a0, 260096 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a0, 260096 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_tiny_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 260096 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_tiny_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 260096 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + ret float 1.0 +} + +define i32 @caller_tiny_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_tiny_scalar_ret +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_tiny_scalar_ret +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_tiny_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_tiny_scalar_ret +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_tiny_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_tiny_scalar_ret +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + ret i32 %2 +} + +; Check that on RV32 ilp32e, double is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_double_in_regs(i32 %a, double %b) { +; ILP32E-FPELIM-LABEL: callee_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: mv a1, a2 +; ILP32E-FPELIM-NEXT: call __fixdfsi +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: mv a1, a2 +; ILP32E-WITHFP-NEXT: call __fixdfsi +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_double_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv s0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __fixdfsi +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_double_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv s1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __fixdfsi +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_2 + %b_fptosi = fptosi double %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_double_in_regs() { +; ILP32E-FPELIM-LABEL: caller_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: lui a2, 262144 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: call callee_double_in_regs +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a2, 262144 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call callee_double_in_regs +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_double_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a2, 262144 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_double_in_regs +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_double_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a2, 262144 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_double_in_regs +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_double_in_regs(i32 1, double 2.0) + ret i32 %1 +} + +; Check 2x*xlen values are aligned appropriately when passed on the stack +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %f, i32 %g, i32 %h, double %i, i32 %j, [2 x i32] %k) { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: callee_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 0(a2) +; ILP32E-FPELIM-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-NEXT: lw a2, 4(sp) +; ILP32E-FPELIM-NEXT: lw a3, 8(sp) +; ILP32E-FPELIM-NEXT: lw a4, 24(sp) +; ILP32E-FPELIM-NEXT: lw a5, 20(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: add a1, a3, a1 +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: add a4, a5, a4 +; ILP32E-FPELIM-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a0, 0(a2) +; ILP32E-WITHFP-NEXT: lw a1, 12(s0) +; ILP32E-WITHFP-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-NEXT: lw a4, 24(s0) +; ILP32E-WITHFP-NEXT: lw a5, 20(s0) +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: add a1, a3, a1 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: add a4, a5, a4 +; ILP32E-WITHFP-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_aligned_stack: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(a2) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a1, a3, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a4, a5, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_aligned_stack: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(a2) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 24(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 20(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a1, a3, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a4, a5, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = bitcast fp128 %c to i128 + %2 = trunc i128 %1 to i32 + %3 = add i32 %2, %g + %4 = add i32 %3, %h + %5 = bitcast double %i to i64 + %6 = trunc i64 %5 to i32 + %7 = add i32 %4, %6 + %8 = add i32 %7, %j + %9 = extractvalue [2 x i32] %k, 0 + %10 = add i32 %8, %9 + ret i32 %10 +} + +define void @caller_aligned_stack() { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: caller_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: li a0, 18 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: li a0, 17 +; ILP32E-FPELIM-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-NEXT: li a0, 16 +; ILP32E-FPELIM-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262236 +; ILP32E-FPELIM-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: lui a0, 377487 +; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: li a0, 15 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: li a0, 14 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a0, 4 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262153 +; ILP32E-FPELIM-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-NEXT: lui a0, 545260 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-NEXT: lui a0, 964690 +; ILP32E-FPELIM-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-NEXT: lui a0, 335544 +; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: lui a0, 688509 +; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 11 +; ILP32E-FPELIM-NEXT: addi a2, sp, 32 +; ILP32E-FPELIM-NEXT: li a3, 12 +; ILP32E-FPELIM-NEXT: li a4, 13 +; ILP32E-FPELIM-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-NEXT: call callee_aligned_stack +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: li a0, 18 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: li a0, 17 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: li a0, 16 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: li a0, 15 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: li a0, 14 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a0, 4 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 11 +; ILP32E-WITHFP-NEXT: addi a2, sp, 32 +; ILP32E-WITHFP-NEXT: li a3, 12 +; ILP32E-WITHFP-NEXT: li a4, 13 +; ILP32E-WITHFP-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-NEXT: call callee_aligned_stack +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_aligned_stack: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 18 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 17 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262236 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 377487 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 15 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 14 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262153 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 545260 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 964690 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 335544 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 688509 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 11 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 12 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 13 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_aligned_stack +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_aligned_stack: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 18 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 17 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262236 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 377487 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 15 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 14 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262153 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 545260 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 964690 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 335544 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 688509 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 11 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 13 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_aligned_stack +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_aligned_stack(i32 1, i32 11, + fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, + i64 20000000000, i32 14, i32 15, double 2.720000e+00, i32 16, + [2 x i32] [i32 17, i32 18]) + ret void +} + +define double @callee_small_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 261888 +; ILP32E-FPELIM-NEXT: li a0, 0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a1, 261888 +; ILP32E-WITHFP-NEXT: li a0, 0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 261888 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 261888 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + ret double 1.0 +} + +define i64 @caller_small_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_small_scalar_ret +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_small_scalar_ret +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_scalar_ret +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_scalar_ret +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call double @callee_small_scalar_ret() + %2 = bitcast double %1 to i64 + ret i64 %2 +} + +; Check that on RV32, i64 is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_i64_in_regs(i32 %a, i64 %b) { +; ILP32E-FPELIM-LABEL: callee_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_i64_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_i64_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %b_trunc = trunc i64 %b to i32 + %1 = add i32 %a, %b_trunc + ret i32 %1 +} + +define i32 @caller_i64_in_regs() { +; ILP32E-FPELIM-LABEL: caller_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 0 +; ILP32E-FPELIM-NEXT: call callee_i64_in_regs +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call callee_i64_in_regs +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_i64_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_i64_in_regs +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_i64_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_i64_in_regs +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_i64_in_regs(i32 1, i64 2) + ret i32 %1 +} + +; Check that the stack is used once the GPRs are exhausted + +define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) { +; ILP32E-FPELIM-LABEL: callee_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a6, 12(sp) +; ILP32E-FPELIM-NEXT: lw a7, 0(sp) +; ILP32E-FPELIM-NEXT: lw t0, 4(sp) +; ILP32E-FPELIM-NEXT: lw t1, 8(sp) +; ILP32E-FPELIM-NEXT: andi a0, a0, 255 +; ILP32E-FPELIM-NEXT: slli a1, a1, 16 +; ILP32E-FPELIM-NEXT: srli a1, a1, 16 +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a1, a4, t1 +; ILP32E-FPELIM-NEXT: xor a2, a3, t0 +; ILP32E-FPELIM-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-NEXT: seqz a1, a1 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a7 +; ILP32E-FPELIM-NEXT: add a0, a0, a6 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a6, 12(s0) +; ILP32E-WITHFP-NEXT: lw a7, 0(s0) +; ILP32E-WITHFP-NEXT: lw t0, 4(s0) +; ILP32E-WITHFP-NEXT: lw t1, 8(s0) +; ILP32E-WITHFP-NEXT: andi a0, a0, 255 +; ILP32E-WITHFP-NEXT: slli a1, a1, 16 +; ILP32E-WITHFP-NEXT: srli a1, a1, 16 +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a1, a4, t1 +; ILP32E-WITHFP-NEXT: xor a2, a3, t0 +; ILP32E-WITHFP-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-NEXT: seqz a1, a1 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a7 +; ILP32E-WITHFP-NEXT: add a0, a0, a6 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_many_scalars: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t1, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi a0, a0, 255 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: slli a1, a1, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: srli a1, a1, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a4, t1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, t0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a1, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_many_scalars: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 0(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t0, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t1, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi a0, a0, 255 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: slli a1, a1, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: srli a1, a1, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a4, t1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, t0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a1, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %a_ext = zext i8 %a to i32 + %b_ext = zext i16 %b to i32 + %1 = add i32 %a_ext, %b_ext + %2 = add i32 %1, %c + %3 = icmp eq i64 %d, %g + %4 = zext i1 %3 to i32 + %5 = add i32 %4, %2 + %6 = add i32 %5, %e + %7 = add i32 %6, %f + %8 = add i32 %7, %h + ret i32 %8 +} + +define i32 @caller_many_scalars() { +; ILP32E-FPELIM-LABEL: caller_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -20 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 20 +; ILP32E-FPELIM-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 8 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-NEXT: li a0, 7 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a4, 6 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: li a5, 5 +; ILP32E-FPELIM-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-NEXT: li a4, 0 +; ILP32E-FPELIM-NEXT: call callee_many_scalars +; ILP32E-FPELIM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 20 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 8 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-NEXT: li a0, 7 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a4, 6 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: li a5, 5 +; ILP32E-WITHFP-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-NEXT: li a4, 0 +; ILP32E-WITHFP-NEXT: call callee_many_scalars +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_many_scalars: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_many_scalars +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_many_scalars: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_many_scalars +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i32 5, i32 6, i64 7, i32 8) + ret i32 %1 +} + +; Check that i128 and fp128 are passed indirectly + +define i32 @callee_large_scalars(i128 %a, fp128 %b) { +; ILP32E-FPELIM-LABEL: callee_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a2, 0(a1) +; ILP32E-FPELIM-NEXT: lw a3, 0(a0) +; ILP32E-FPELIM-NEXT: lw a4, 4(a1) +; ILP32E-FPELIM-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-NEXT: lw a6, 12(a0) +; ILP32E-FPELIM-NEXT: lw a7, 4(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: xor a5, a6, a5 +; ILP32E-FPELIM-NEXT: xor a4, a7, a4 +; ILP32E-FPELIM-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a2, a3, a2 +; ILP32E-FPELIM-NEXT: or a0, a2, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a2, 0(a1) +; ILP32E-WITHFP-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-NEXT: lw a4, 4(a1) +; ILP32E-WITHFP-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-NEXT: lw a6, 12(a0) +; ILP32E-WITHFP-NEXT: lw a7, 4(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: xor a5, a6, a5 +; ILP32E-WITHFP-NEXT: xor a4, a7, a4 +; ILP32E-WITHFP-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a2, a3, a2 +; ILP32E-WITHFP-NEXT: or a0, a2, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalars: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %b_bitcast = bitcast fp128 %b to i128 + %1 = icmp eq i128 %a, %b_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars() { +; ILP32E-FPELIM-LABEL: caller_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 48 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: sw zero, 0(sp) +; ILP32E-FPELIM-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-NEXT: sw zero, 32(sp) +; ILP32E-FPELIM-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-NEXT: li a2, 1 +; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: mv a1, sp +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars +; ILP32E-FPELIM-NEXT: addi sp, s0, -48 +; ILP32E-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: sw zero, 0(sp) +; ILP32E-WITHFP-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-NEXT: sw zero, 32(sp) +; ILP32E-WITHFP-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-NEXT: li a2, 1 +; ILP32E-WITHFP-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-NEXT: mv a1, sp +; ILP32E-WITHFP-NEXT: sw a2, 24(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars +; ILP32E-WITHFP-NEXT: addi sp, s0, -48 +; ILP32E-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalars: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalars: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Check that arguments larger than 2*xlen are handled correctly when their +; address is passed on the stack rather than in memory + +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) { +; ILP32E-FPELIM-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a2, 0(a0) +; ILP32E-FPELIM-NEXT: lw a3, 0(a1) +; ILP32E-FPELIM-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-NEXT: lw a6, 12(a1) +; ILP32E-FPELIM-NEXT: lw a7, 4(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: xor a5, a6, a5 +; ILP32E-FPELIM-NEXT: xor a4, a7, a4 +; ILP32E-FPELIM-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-NEXT: xor a2, a3, a2 +; ILP32E-FPELIM-NEXT: or a0, a2, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a0, 12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a1) +; ILP32E-WITHFP-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-NEXT: lw a6, 12(a1) +; ILP32E-WITHFP-NEXT: lw a7, 4(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: xor a5, a6, a5 +; ILP32E-WITHFP-NEXT: xor a4, a7, a4 +; ILP32E-WITHFP-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-NEXT: xor a2, a3, a2 +; ILP32E-WITHFP-NEXT: or a0, a2, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %j_bitcast = bitcast fp128 %j to i128 + %1 = icmp eq i128 %h, %j_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars_exhausted_regs() { +; ILP32E-FPELIM-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: li a0, 9 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 40 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a0, 7 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 52(sp) +; ILP32E-FPELIM-NEXT: sw zero, 48(sp) +; ILP32E-FPELIM-NEXT: li a0, 8 +; ILP32E-FPELIM-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: li a4, 5 +; ILP32E-FPELIM-NEXT: li a5, 6 +; ILP32E-FPELIM-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: li a0, 9 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, sp, 40 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a0, 7 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 52(sp) +; ILP32E-WITHFP-NEXT: sw zero, 48(sp) +; ILP32E-WITHFP-NEXT: li a0, 8 +; ILP32E-WITHFP-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: li a4, 5 +; ILP32E-WITHFP-NEXT: li a5, 6 +; ILP32E-WITHFP-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 9 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 9 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_large_scalars_exhausted_regs( + i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, + fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Ensure that libcalls generated in the middle-end obey the calling convention + +define i32 @caller_mixed_scalar_libcalls(i64 %a) { +; ILP32E-FPELIM-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a2, a1 +; ILP32E-FPELIM-NEXT: mv a1, a0 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call __floatditf +; ILP32E-FPELIM-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a2, a1 +; ILP32E-WITHFP-NEXT: mv a1, a0 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call __floatditf +; ILP32E-WITHFP-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a2, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __floatditf +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a2, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __floatditf +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = sitofp i64 %a to fp128 + %2 = bitcast fp128 %1 to i128 + %3 = trunc i128 %2 to i32 + ret i32 %3 +} + + +; Check passing of coerced integer arrays + +%struct.small = type { i32, i32* } + +define i32 @callee_small_coerced_struct([2 x i32] %a.coerce) { +; ILP32E-FPELIM-LABEL: callee_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_coerced_struct: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_coerced_struct: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = extractvalue [2 x i32] %a.coerce, 0 + %2 = extractvalue [2 x i32] %a.coerce, 1 + %3 = icmp eq i32 %1, %2 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +define i32 @caller_small_coerced_struct() { +; ILP32E-FPELIM-LABEL: caller_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: call callee_small_coerced_struct +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: call callee_small_coerced_struct +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_coerced_struct: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_coerced_struct +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_coerced_struct: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_coerced_struct +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_small_coerced_struct([2 x i32] [i32 1, i32 2]) + ret i32 %1 +} + +; Check large struct arguments, which are passed byval + +%struct.large = type { i32, i32, i32, i32 } + +define i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %a) { +; ILP32E-FPELIM-LABEL: callee_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_struct: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_struct: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 0 + %2 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 3 + %3 = load i32, i32* %1 + %4 = load i32, i32* %2 + %5 = add i32 %3, %4 + ret i32 %5 +} + +define i32 @caller_large_struct() { +; ILP32E-FPELIM-LABEL: caller_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -36 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-FPELIM-NEXT: sw ra, 32(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: sw a2, 8(sp) +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_struct +; ILP32E-FPELIM-NEXT: lw ra, 32(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 36 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-NEXT: sw ra, 36(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 32(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: sw a0, -24(s0) +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: sw a1, -20(s0) +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: sw a0, -40(s0) +; ILP32E-WITHFP-NEXT: sw a1, -36(s0) +; ILP32E-WITHFP-NEXT: sw a2, -32(s0) +; ILP32E-WITHFP-NEXT: sw a3, -28(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, -40 +; ILP32E-WITHFP-NEXT: call callee_large_struct +; ILP32E-WITHFP-NEXT: lw ra, 36(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_struct: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_struct +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_struct: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -24(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -20(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -40(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -36(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -32(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, -28(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, s0, -40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_struct +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %ls = alloca %struct.large, align 4 + %1 = bitcast %struct.large* %ls to i8* + %a = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 0 + store i32 1, i32* %a + %b = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 1 + store i32 2, i32* %b + %c = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 2 + store i32 3, i32* %c + %d = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 3 + store i32 4, i32* %d + %2 = call i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %ls) + ret i32 %2 +} + +; Check return of 2x xlen structs + +define %struct.small @callee_small_struct_ret() { +; ILP32E-FPELIM-LABEL: callee_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_struct_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_struct_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + ret %struct.small { i32 1, i32* null } +} + +define i32 @caller_small_struct_ret() { +; ILP32E-FPELIM-LABEL: caller_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_small_struct_ret +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_small_struct_ret +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_struct_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_struct_ret +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_struct_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_struct_ret +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call %struct.small @callee_small_struct_ret() + %2 = extractvalue %struct.small %1, 0 + %3 = extractvalue %struct.small %1, 1 + %4 = ptrtoint i32* %3 to i32 + %5 = add i32 %2, %4 + ret i32 %5 +} + +; Check return of >2x xlen scalars + +define fp128 @callee_large_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 524272 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a1, 524272 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + ret fp128 0xL00000000000000007FFF000000000000 +} + +define void @caller_large_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_scalar_ret +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call callee_large_scalar_ret +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalar_ret +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalar_ret +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call fp128 @callee_large_scalar_ret() + ret void +} + +; Check return of >2x xlen structs + +define void @callee_large_struct_ret(%struct.large* noalias sret(%struct.large) %agg.result) { +; ILP32E-FPELIM-LABEL: callee_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: li a1, 1 +; ILP32E-FPELIM-NEXT: sw a1, 0(a0) +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: sw a1, 4(a0) +; ILP32E-FPELIM-NEXT: li a1, 3 +; ILP32E-FPELIM-NEXT: sw a1, 8(a0) +; ILP32E-FPELIM-NEXT: li a1, 4 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a1, 1 +; ILP32E-WITHFP-NEXT: sw a1, 0(a0) +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: sw a1, 4(a0) +; ILP32E-WITHFP-NEXT: li a1, 3 +; ILP32E-WITHFP-NEXT: sw a1, 8(a0) +; ILP32E-WITHFP-NEXT: li a1, 4 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_struct_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_struct_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 0 + store i32 1, i32* %a, align 4 + %b = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 1 + store i32 2, i32* %b, align 4 + %c = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 2 + store i32 3, i32* %c, align 4 + %d = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 3 + store i32 4, i32* %d, align 4 + ret void +} + +define i32 @caller_large_struct_ret() { +; ILP32E-FPELIM-LABEL: caller_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_struct_ret +; ILP32E-FPELIM-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call callee_large_struct_ret +; ILP32E-WITHFP-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lw a1, 12(sp) +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_struct_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_struct_ret +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_struct_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_struct_ret +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = alloca %struct.large + call void @callee_large_struct_ret(%struct.large* sret(%struct.large) %1) + %2 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 0 + %3 = load i32, i32* %2 + %4 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 3 + %5 = load i32, i32* %4 + %6 = add i32 %3, %5 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-LP64E-FPELIM %s +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck -check-prefix=RV64I-LP64E-WITHFP %s + +; This file contains tests that will have differing output for the lp64e ABIs. + +define i64 @callee_float_in_regs(i64 %a, float %b) nounwind { +; RV64I-LP64E-FPELIM-LABEL: callee_float_in_regs: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -16 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: mv s0, a0 +; RV64I-LP64E-FPELIM-NEXT: sext.w a0, a1 +; RV64I-LP64E-FPELIM-NEXT: call __fixsfdi +; RV64I-LP64E-FPELIM-NEXT: add a0, s0, a0 +; RV64I-LP64E-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 16 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: callee_float_in_regs: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -24 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s1, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 24 +; RV64I-LP64E-WITHFP-NEXT: mv s1, a0 +; RV64I-LP64E-WITHFP-NEXT: sext.w a0, a1 +; RV64I-LP64E-WITHFP-NEXT: call __fixsfdi +; RV64I-LP64E-WITHFP-NEXT: add a0, s1, a0 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s1, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 24 +; RV64I-LP64E-WITHFP-NEXT: ret + %b_fptosi = fptosi float %b to i64 + %1 = add i64 %a, %b_fptosi + ret i64 %1 +} + +define i64 @caller_float_in_regs() nounwind { +; RV64I-LP64E-FPELIM-LABEL: caller_float_in_regs: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -8 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: li a0, 1 +; RV64I-LP64E-FPELIM-NEXT: lui a1, 262144 +; RV64I-LP64E-FPELIM-NEXT: call callee_float_in_regs +; RV64I-LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 8 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: caller_float_in_regs: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: li a0, 1 +; RV64I-LP64E-WITHFP-NEXT: lui a1, 262144 +; RV64I-LP64E-WITHFP-NEXT: call callee_float_in_regs +; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: ret + %1 = call i64 @callee_float_in_regs(i64 1, float 2.0) + ret i64 %1 +} + +define i64 @callee_float_on_stack(i128 %a, i128 %b, i128 %c, i128 %d, float %e) nounwind { +; RV64I-LP64E-FPELIM-LABEL: callee_float_on_stack: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -16 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: addi s0, sp, 16 +; RV64I-LP64E-FPELIM-NEXT: andi sp, sp, -16 +; RV64I-LP64E-FPELIM-NEXT: lw a0, 16(s0) +; RV64I-LP64E-FPELIM-NEXT: addi sp, s0, -16 +; RV64I-LP64E-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 16 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: callee_float_on_stack: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: andi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: lw a0, 16(s0) +; RV64I-LP64E-WITHFP-NEXT: addi sp, s0, -16 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: ret + %1 = trunc i128 %d to i64 + %2 = bitcast float %e to i32 + %3 = sext i32 %2 to i64 + %4 = add i64 %1, %3 + ret i64 %3 +} + +define i64 @caller_float_on_stack() nounwind { +; RV64I-LP64E-FPELIM-LABEL: caller_float_on_stack: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -48 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: addi s0, sp, 48 +; RV64I-LP64E-FPELIM-NEXT: andi sp, sp, -16 +; RV64I-LP64E-FPELIM-NEXT: lui a0, 264704 +; RV64I-LP64E-FPELIM-NEXT: sd a0, 16(sp) +; RV64I-LP64E-FPELIM-NEXT: sd zero, 8(sp) +; RV64I-LP64E-FPELIM-NEXT: li a1, 4 +; RV64I-LP64E-FPELIM-NEXT: li a0, 1 +; RV64I-LP64E-FPELIM-NEXT: li a2, 2 +; RV64I-LP64E-FPELIM-NEXT: li a4, 3 +; RV64I-LP64E-FPELIM-NEXT: sd a1, 0(sp) +; RV64I-LP64E-FPELIM-NEXT: li a1, 0 +; RV64I-LP64E-FPELIM-NEXT: li a3, 0 +; RV64I-LP64E-FPELIM-NEXT: li a5, 0 +; RV64I-LP64E-FPELIM-NEXT: call callee_float_on_stack +; RV64I-LP64E-FPELIM-NEXT: addi sp, s0, -48 +; RV64I-LP64E-FPELIM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 48 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: caller_float_on_stack: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -48 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 48 +; RV64I-LP64E-WITHFP-NEXT: andi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: lui a0, 264704 +; RV64I-LP64E-WITHFP-NEXT: sd a0, 16(sp) +; RV64I-LP64E-WITHFP-NEXT: sd zero, 8(sp) +; RV64I-LP64E-WITHFP-NEXT: li a1, 4 +; RV64I-LP64E-WITHFP-NEXT: li a0, 1 +; RV64I-LP64E-WITHFP-NEXT: li a2, 2 +; RV64I-LP64E-WITHFP-NEXT: li a4, 3 +; RV64I-LP64E-WITHFP-NEXT: sd a1, 0(sp) +; RV64I-LP64E-WITHFP-NEXT: li a1, 0 +; RV64I-LP64E-WITHFP-NEXT: li a3, 0 +; RV64I-LP64E-WITHFP-NEXT: li a5, 0 +; RV64I-LP64E-WITHFP-NEXT: call callee_float_on_stack +; RV64I-LP64E-WITHFP-NEXT: addi sp, s0, -48 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 48 +; RV64I-LP64E-WITHFP-NEXT: ret + %1 = call i64 @callee_float_on_stack(i128 1, i128 2, i128 3, i128 4, float 5.0) + ret i64 %1 +} + +define float @callee_tiny_scalar_ret() nounwind { +; RV64I-LP64E-FPELIM-LABEL: callee_tiny_scalar_ret: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: lui a0, 260096 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: callee_tiny_scalar_ret: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: lui a0, 260096 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: ret + ret float 1.0 +} + +; The sign extension of the float return is necessary, as softened floats are +; passed anyext. + +define i64 @caller_tiny_scalar_ret() nounwind { +; RV64I-LP64E-FPELIM-LABEL: caller_tiny_scalar_ret: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -8 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: call callee_tiny_scalar_ret +; RV64I-LP64E-FPELIM-NEXT: sext.w a0, a0 +; RV64I-LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 8 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: caller_tiny_scalar_ret: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: call callee_tiny_scalar_ret +; RV64I-LP64E-WITHFP-NEXT: sext.w a0, a0 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: ret + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + %3 = sext i32 %2 to i64 + ret i64 %3 +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IF-ILP32E + +; Exercises the ILP32E calling convention code in the case that f32 is a legal +; type. As well as testing that lowering is correct, these tests also aim to +; check that floating point load/store or integer load/store is chosen +; optimally when floats are passed on the stack. + +define float @onstack_f32_noop(i64 %a, i64 %b, i64 %c, i64 %d, float %e, float %f) nounwind { +; RV32IF-ILP32E-LABEL: onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: lw a0, 12(sp) +; RV32IF-ILP32E-NEXT: ret + ret float %f +} + +define float @onstack_f32_fadd(i64 %a, i64 %b, i64 %c, i64 %d, float %e, float %f) nounwind { +; RV32IF-ILP32E-LABEL: onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: flw fa5, 12(sp) +; RV32IF-ILP32E-NEXT: flw fa4, 8(sp) +; RV32IF-ILP32E-NEXT: fadd.s fa5, fa4, fa5 +; RV32IF-ILP32E-NEXT: fmv.x.w a0, fa5 +; RV32IF-ILP32E-NEXT: ret + %1 = fadd float %e, %f + ret float %1 +} + +define float @caller_onstack_f32_noop(float %a) nounwind { +; RV32IF-ILP32E-LABEL: caller_onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -20 +; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw a0, 12(sp) +; RV32IF-ILP32E-NEXT: lui a0, 264704 +; RV32IF-ILP32E-NEXT: sw a0, 8(sp) +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: li a1, 4 +; RV32IF-ILP32E-NEXT: li a0, 1 +; RV32IF-ILP32E-NEXT: li a2, 2 +; RV32IF-ILP32E-NEXT: li a4, 3 +; RV32IF-ILP32E-NEXT: sw a1, 0(sp) +; RV32IF-ILP32E-NEXT: li a1, 0 +; RV32IF-ILP32E-NEXT: li a3, 0 +; RV32IF-ILP32E-NEXT: li a5, 0 +; RV32IF-ILP32E-NEXT: call onstack_f32_noop +; RV32IF-ILP32E-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 20 +; RV32IF-ILP32E-NEXT: ret + %1 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float 5.0, float %a) + ret float %1 +} + +define float @caller_onstack_f32_fadd(float %a, float %b) nounwind { +; RV32IF-ILP32E-LABEL: caller_onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -20 +; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: fmv.w.x fa5, a1 +; RV32IF-ILP32E-NEXT: fmv.w.x fa4, a0 +; RV32IF-ILP32E-NEXT: fadd.s fa3, fa4, fa5 +; RV32IF-ILP32E-NEXT: fsub.s fa5, fa5, fa4 +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: li a0, 4 +; RV32IF-ILP32E-NEXT: sw a0, 0(sp) +; RV32IF-ILP32E-NEXT: fsw fa5, 12(sp) +; RV32IF-ILP32E-NEXT: li a0, 1 +; RV32IF-ILP32E-NEXT: li a2, 2 +; RV32IF-ILP32E-NEXT: li a4, 3 +; RV32IF-ILP32E-NEXT: fsw fa3, 8(sp) +; RV32IF-ILP32E-NEXT: li a1, 0 +; RV32IF-ILP32E-NEXT: li a3, 0 +; RV32IF-ILP32E-NEXT: li a5, 0 +; RV32IF-ILP32E-NEXT: call onstack_f32_noop +; RV32IF-ILP32E-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 20 +; RV32IF-ILP32E-NEXT: ret + %1 = fadd float %a, %b + %2 = fsub float %b, %a + %3 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float %1, float %2) + ret float %3 +} diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll --- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll +++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll @@ -6,12 +6,28 @@ ; RUN: llc -mtriple riscv32-unknown-elf -mattr=+f,+d -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-FD ; +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+i -target-abi ilp32e -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32I-ILP32E +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+e -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32E +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+e,+f -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32E-F +; ; RUN: llc -mtriple riscv64-unknown-elf -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64 ; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-F ; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f,+d -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-FD +; +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+i -target-abi lp64e -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64I-LP64E +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64E +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e,+f -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64E-F +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e,+f,+d -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64E-FD ; ; Checking for special return instructions (sret, mret). @@ -289,6 +305,183 @@ ; CHECK-RV32-FD-NEXT: addi sp, sp, 320 ; CHECK-RV32-FD-NEXT: mret ; +; CHECK-RV32I-ILP32E-LABEL: foo_with_call: +; CHECK-RV32I-ILP32E: # %bb.0: +; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, -104 +; CHECK-RV32I-ILP32E-NEXT: sw ra, 100(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t0, 96(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t1, 92(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t2, 88(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a1, 80(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a2, 76(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a3, 72(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a4, 68(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a5, 64(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a6, 60(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a7, 56(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s2, 52(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s3, 48(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s4, 44(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s5, 40(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s6, 36(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s7, 32(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s8, 28(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s9, 24(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s10, 20(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s11, 16(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: call otherfoo +; CHECK-RV32I-ILP32E-NEXT: lw ra, 100(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t0, 96(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t1, 92(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t2, 88(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a1, 80(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a2, 76(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a3, 72(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a4, 68(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a5, 64(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a6, 60(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a7, 56(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s2, 52(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s3, 48(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s4, 44(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s5, 40(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s6, 36(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s7, 32(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s8, 28(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s10, 20(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, 104 +; CHECK-RV32I-ILP32E-NEXT: mret +; +; CHECK-RV32E-LABEL: foo_with_call: +; CHECK-RV32E: # %bb.0: +; CHECK-RV32E-NEXT: addi sp, sp, -40 +; CHECK-RV32E-NEXT: sw ra, 36(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t0, 32(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t1, 28(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t2, 24(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a2, 12(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a3, 8(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a4, 4(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a5, 0(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: call otherfoo +; CHECK-RV32E-NEXT: lw ra, 36(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t0, 32(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t1, 28(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t2, 24(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a1, 16(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a2, 12(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a4, 4(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a5, 0(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: addi sp, sp, 40 +; CHECK-RV32E-NEXT: mret +; +; CHECK-RV32E-F-LABEL: foo_with_call: +; CHECK-RV32E-F: # %bb.0: +; CHECK-RV32E-F-NEXT: addi sp, sp, -168 +; CHECK-RV32E-F-NEXT: sw ra, 164(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t0, 160(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t1, 156(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t2, 152(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a0, 148(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a2, 140(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a3, 136(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a4, 132(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a5, 128(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: call otherfoo +; CHECK-RV32E-F-NEXT: lw ra, 164(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t0, 160(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t1, 156(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t2, 152(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a0, 148(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a1, 144(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a2, 140(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a3, 136(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a4, 132(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: addi sp, sp, 168 +; CHECK-RV32E-F-NEXT: mret +; ; CHECK-RV64-LABEL: foo_with_call: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: addi sp, sp, -128 @@ -533,6 +726,306 @@ ; CHECK-RV64-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: addi sp, sp, 384 ; CHECK-RV64-FD-NEXT: mret +; +; CHECK-RV64I-LP64E-LABEL: foo_with_call: +; CHECK-RV64I-LP64E: # %bb.0: +; CHECK-RV64I-LP64E-NEXT: addi sp, sp, -208 +; CHECK-RV64I-LP64E-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t0, 192(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t1, 184(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t2, 176(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a6, 120(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a7, 112(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s2, 104(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s3, 96(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s4, 88(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s5, 80(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s6, 72(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s7, 64(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s8, 56(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s9, 48(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s10, 40(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s11, 32(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t3, 24(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t4, 16(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t5, 8(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t6, 0(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: call otherfoo +; CHECK-RV64I-LP64E-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t0, 192(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t1, 184(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a6, 120(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a7, 112(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s2, 104(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s3, 96(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s4, 88(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s5, 80(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s6, 72(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s7, 64(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s8, 56(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s9, 48(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s10, 40(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s11, 32(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t3, 24(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t4, 16(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t5, 8(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t6, 0(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: addi sp, sp, 208 +; CHECK-RV64I-LP64E-NEXT: mret +; +; CHECK-RV64E-LABEL: foo_with_call: +; CHECK-RV64E: # %bb.0: +; CHECK-RV64E-NEXT: addi sp, sp, -80 +; CHECK-RV64E-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t0, 64(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t1, 56(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t2, 48(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a2, 24(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a3, 16(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a4, 8(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a5, 0(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: call otherfoo +; CHECK-RV64E-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t0, 64(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t1, 56(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t2, 48(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a3, 16(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a4, 8(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a5, 0(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: addi sp, sp, 80 +; CHECK-RV64E-NEXT: mret +; +; CHECK-RV64E-F-LABEL: foo_with_call: +; CHECK-RV64E-F: # %bb.0: +; CHECK-RV64E-F-NEXT: addi sp, sp, -208 +; CHECK-RV64E-F-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t0, 192(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t1, 184(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t2, 176(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: call otherfoo +; CHECK-RV64E-F-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t0, 192(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t1, 184(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: addi sp, sp, 208 +; CHECK-RV64E-F-NEXT: mret +; +; CHECK-RV64E-FD-LABEL: foo_with_call: +; CHECK-RV64E-FD: # %bb.0: +; CHECK-RV64E-FD-NEXT: addi sp, sp, -464 +; CHECK-RV64E-FD-NEXT: sd ra, 456(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t0, 448(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t1, 440(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t2, 432(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a0, 424(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a1, 416(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a2, 408(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a3, 400(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a4, 392(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a5, 384(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a6, 376(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a7, 368(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s2, 360(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s3, 352(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s4, 344(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s5, 336(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s6, 328(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s7, 320(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s8, 312(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s9, 304(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s10, 296(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s11, 288(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t3, 280(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t4, 272(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t5, 264(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: call otherfoo +; CHECK-RV64E-FD-NEXT: ld ra, 456(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t0, 448(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t1, 440(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t2, 432(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a0, 424(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a1, 416(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a2, 408(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a3, 400(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a4, 392(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a5, 384(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a6, 376(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a7, 368(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s2, 360(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s3, 352(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s4, 344(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s5, 336(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s6, 328(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s7, 320(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s8, 312(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s9, 304(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s10, 296(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s11, 288(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t3, 280(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t4, 272(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t5, 264(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t6, 256(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: addi sp, sp, 464 +; CHECK-RV64E-FD-NEXT: mret %call = call i32 @otherfoo() ret void } @@ -796,6 +1289,192 @@ ; CHECK-RV32-FD-NEXT: addi sp, sp, 336 ; CHECK-RV32-FD-NEXT: mret ; +; CHECK-RV32I-ILP32E-LABEL: foo_fp_with_call: +; CHECK-RV32I-ILP32E: # %bb.0: +; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, -108 +; CHECK-RV32I-ILP32E-NEXT: sw ra, 104(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t0, 100(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t1, 96(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t2, 92(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s0, 88(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a1, 80(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a2, 76(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a3, 72(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a4, 68(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a5, 64(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a6, 60(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw a7, 56(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s2, 52(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s3, 48(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s4, 44(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s5, 40(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s6, 36(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s7, 32(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s8, 28(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s9, 24(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s10, 20(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw s11, 16(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; CHECK-RV32I-ILP32E-NEXT: addi s0, sp, 108 +; CHECK-RV32I-ILP32E-NEXT: call otherfoo +; CHECK-RV32I-ILP32E-NEXT: lw ra, 104(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t0, 100(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t1, 96(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t2, 92(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s0, 88(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a1, 80(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a2, 76(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a3, 72(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a4, 68(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a5, 64(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a6, 60(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw a7, 56(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s2, 52(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s3, 48(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s4, 44(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s5, 40(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s6, 36(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s7, 32(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s8, 28(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s10, 20(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, 108 +; CHECK-RV32I-ILP32E-NEXT: mret +; +; CHECK-RV32E-LABEL: foo_fp_with_call: +; CHECK-RV32E: # %bb.0: +; CHECK-RV32E-NEXT: addi sp, sp, -44 +; CHECK-RV32E-NEXT: sw ra, 40(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t0, 36(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t1, 32(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t2, 28(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a2, 12(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a3, 8(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a4, 4(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a5, 0(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: addi s0, sp, 44 +; CHECK-RV32E-NEXT: call otherfoo +; CHECK-RV32E-NEXT: lw ra, 40(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t0, 36(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t1, 32(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t2, 28(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a1, 16(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a2, 12(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a4, 4(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a5, 0(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: addi sp, sp, 44 +; CHECK-RV32E-NEXT: mret +; +; CHECK-RV32E-F-LABEL: foo_fp_with_call: +; CHECK-RV32E-F: # %bb.0: +; CHECK-RV32E-F-NEXT: addi sp, sp, -172 +; CHECK-RV32E-F-NEXT: sw ra, 168(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t0, 164(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t1, 160(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t2, 156(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw s0, 152(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a0, 148(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a2, 140(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a3, 136(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a4, 132(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a5, 128(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: addi s0, sp, 172 +; CHECK-RV32E-F-NEXT: call otherfoo +; CHECK-RV32E-F-NEXT: lw ra, 168(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t0, 164(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t1, 160(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t2, 156(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw s0, 152(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a0, 148(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a1, 144(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a2, 140(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a3, 136(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a4, 132(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: addi sp, sp, 172 +; CHECK-RV32E-F-NEXT: mret +; ; CHECK-RV64-LABEL: foo_fp_with_call: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: addi sp, sp, -144 @@ -1049,6 +1728,318 @@ ; CHECK-RV64-FD-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: addi sp, sp, 400 ; CHECK-RV64-FD-NEXT: mret +; +; CHECK-RV64I-LP64E-LABEL: foo_fp_with_call: +; CHECK-RV64I-LP64E: # %bb.0: +; CHECK-RV64I-LP64E-NEXT: addi sp, sp, -216 +; CHECK-RV64I-LP64E-NEXT: sd ra, 208(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t0, 200(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t1, 192(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t2, 184(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a6, 120(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd a7, 112(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s2, 104(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s3, 96(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s4, 88(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s5, 80(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s6, 72(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s7, 64(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s8, 56(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s9, 48(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s10, 40(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd s11, 32(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t3, 24(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t4, 16(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t5, 8(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: sd t6, 0(sp) # 8-byte Folded Spill +; CHECK-RV64I-LP64E-NEXT: addi s0, sp, 216 +; CHECK-RV64I-LP64E-NEXT: call otherfoo +; CHECK-RV64I-LP64E-NEXT: ld ra, 208(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t0, 200(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t1, 192(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a6, 120(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld a7, 112(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s2, 104(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s3, 96(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s4, 88(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s5, 80(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s6, 72(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s7, 64(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s8, 56(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s9, 48(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s10, 40(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld s11, 32(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t3, 24(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t4, 16(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t5, 8(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: ld t6, 0(sp) # 8-byte Folded Reload +; CHECK-RV64I-LP64E-NEXT: addi sp, sp, 216 +; CHECK-RV64I-LP64E-NEXT: mret +; +; CHECK-RV64E-LABEL: foo_fp_with_call: +; CHECK-RV64E: # %bb.0: +; CHECK-RV64E-NEXT: addi sp, sp, -88 +; CHECK-RV64E-NEXT: sd ra, 80(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t0, 72(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t1, 64(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t2, 56(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a2, 24(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a3, 16(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a4, 8(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a5, 0(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: addi s0, sp, 88 +; CHECK-RV64E-NEXT: call otherfoo +; CHECK-RV64E-NEXT: ld ra, 80(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t0, 72(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t1, 64(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t2, 56(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a3, 16(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a4, 8(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a5, 0(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: addi sp, sp, 88 +; CHECK-RV64E-NEXT: mret +; +; CHECK-RV64E-F-LABEL: foo_fp_with_call: +; CHECK-RV64E-F: # %bb.0: +; CHECK-RV64E-F-NEXT: addi sp, sp, -216 +; CHECK-RV64E-F-NEXT: sd ra, 208(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t0, 200(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t1, 192(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t2, 184(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: addi s0, sp, 216 +; CHECK-RV64E-F-NEXT: call otherfoo +; CHECK-RV64E-F-NEXT: ld ra, 208(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t0, 200(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t1, 192(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: addi sp, sp, 216 +; CHECK-RV64E-F-NEXT: mret +; +; CHECK-RV64E-FD-LABEL: foo_fp_with_call: +; CHECK-RV64E-FD: # %bb.0: +; CHECK-RV64E-FD-NEXT: addi sp, sp, -472 +; CHECK-RV64E-FD-NEXT: sd ra, 464(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t0, 456(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t1, 448(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t2, 440(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a0, 424(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a1, 416(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a2, 408(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a3, 400(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a4, 392(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a5, 384(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a6, 376(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a7, 368(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s2, 360(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s3, 352(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s4, 344(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s5, 336(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s6, 328(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s7, 320(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s8, 312(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s9, 304(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s10, 296(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s11, 288(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t3, 280(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t4, 272(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t5, 264(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: addi s0, sp, 472 +; CHECK-RV64E-FD-NEXT: call otherfoo +; CHECK-RV64E-FD-NEXT: ld ra, 464(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t0, 456(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t1, 448(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t2, 440(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a0, 424(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a1, 416(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a2, 408(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a3, 400(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a4, 392(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a5, 384(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a6, 376(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a7, 368(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s2, 360(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s3, 352(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s4, 344(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s5, 336(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s6, 328(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s7, 320(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s8, 312(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s9, 304(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s10, 296(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s11, 288(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t3, 280(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t4, 272(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t5, 264(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t6, 256(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: addi sp, sp, 472 +; CHECK-RV64E-FD-NEXT: mret %call = call i32 @otherfoo() ret void } diff --git a/llvm/test/CodeGen/RISCV/rv32e.ll b/llvm/test/CodeGen/RISCV/rv32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32e.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +; TODO: Add more tests. + +define i32 @exhausted(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) { +; CHECK-LABEL: exhausted: +; CHECK: # %bb.0: +; CHECK-NEXT: lw t0, 0(sp) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a4, a5, a4 +; CHECK-NEXT: add a0, a4, a0 +; CHECK-NEXT: add a0, t0, a0 +; CHECK-NEXT: ret + %1 = add i32 %a, %b + %2 = add i32 %c, %1 + %3 = add i32 %d, %2 + %4 = add i32 %e, %3 + %5 = add i32 %f, %4 + %6 = add i32 %g, %5 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/RISCV/rv64e.ll b/llvm/test/CodeGen/RISCV/rv64e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64e.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv64 -mattr=+e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +; TODO: Add more tests. + +define i64 @exhausted(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) { +; CHECK-LABEL: exhausted: +; CHECK: # %bb.0: +; CHECK-NEXT: ld t0, 0(sp) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a4, a5, a4 +; CHECK-NEXT: add a0, a4, a0 +; CHECK-NEXT: add a0, t0, a0 +; CHECK-NEXT: ret + %1 = add i64 %a, %b + %2 = add i64 %c, %1 + %3 = add i64 %d, %2 + %4 = add i64 %e, %3 + %5 = add i64 %f, %4 + %6 = add i64 %g, %5 + ret i64 %6 +} diff --git a/llvm/test/CodeGen/RISCV/rve.ll b/llvm/test/CodeGen/RISCV/rve.ll deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rve.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: not --crash llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -mtriple=riscv64 -mattr=+e < %s 2>&1 | FileCheck %s - -; CHECK: LLVM ERROR: Codegen not yet implemented for RVE - -define void @nothing() nounwind { - ret void -} diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-LP64E declare void @callee(ptr, ptr) @@ -34,6 +38,33 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: .cfi_offset s1, -12 +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv s1, sp +; RV32I-ILP32E-NEXT: addi a0, a0, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: sub a0, sp, a0 +; RV32I-ILP32E-NEXT: mv sp, a0 +; RV32I-ILP32E-NEXT: mv a1, s1 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 @@ -62,6 +93,35 @@ ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -64 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 64 +; RV64I-LP64E-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: .cfi_offset s1, -24 +; RV64I-LP64E-NEXT: addi s0, sp, 64 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -64 +; RV64I-LP64E-NEXT: mv s1, sp +; RV64I-LP64E-NEXT: slli a0, a0, 32 +; RV64I-LP64E-NEXT: srli a0, a0, 32 +; RV64I-LP64E-NEXT: addi a0, a0, 7 +; RV64I-LP64E-NEXT: andi a0, a0, -8 +; RV64I-LP64E-NEXT: sub a0, sp, a0 +; RV64I-LP64E-NEXT: mv sp, a0 +; RV64I-LP64E-NEXT: mv a1, s1 +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: addi sp, s0, -64 +; RV64I-LP64E-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 64 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, i32 %n %2 = alloca i32, align 64 call void @callee(ptr %1, ptr %2) diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll @@ -1,11 +1,135 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-LP64E declare void @callee(ptr) +define void @caller16() { +; RV32I-LABEL: caller16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-ILP32E-LABEL: caller16: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -16 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 16 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 16 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -16 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -16 +; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 16 +; RV32I-ILP32E-NEXT: ret +; +; RV64I-LABEL: caller16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller16: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -32 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 32 +; RV64I-LP64E-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 32 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -16 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: addi sp, s0, -32 +; RV64I-LP64E-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 32 +; RV64I-LP64E-NEXT: ret + %1 = alloca i8, align 16 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign16() "no-realign-stack" { +; RV32I-LABEL: caller_no_realign16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-ILP32E-LABEL: caller_no_realign16: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; +; RV64I-LABEL: caller_no_realign16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign16: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret + %1 = alloca i8, align 16 + call void @callee(i8* %1) + ret void +} + define void @caller32() { ; RV32I-LABEL: caller32: ; RV32I: # %bb.0: @@ -26,6 +150,25 @@ ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -32 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 32 +; RV32I-ILP32E-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 32 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -32 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -32 +; RV32I-ILP32E-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 32 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -44,6 +187,25 @@ ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller32: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -32 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 32 +; RV64I-LP64E-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 32 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -32 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: addi sp, s0, -32 +; RV64I-LP64E-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 32 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 32 call void @callee(ptr %1) ret void @@ -62,6 +224,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -73,6 +247,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign32: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 32 call void @callee(ptr %1) ret void @@ -98,6 +284,25 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 @@ -116,6 +321,25 @@ ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller64: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -64 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 64 +; RV64I-LP64E-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 64 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -64 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: addi sp, s0, -64 +; RV64I-LP64E-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 64 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 64 call void @callee(ptr %1) ret void @@ -134,6 +358,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -145,6 +381,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign64: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 64 call void @callee(ptr %1) ret void @@ -170,6 +418,25 @@ ; RV32I-NEXT: addi sp, sp, 128 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 128 +; RV32I-ILP32E-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -128 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -128 +; RV32I-ILP32E-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 128 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -128 @@ -188,6 +455,25 @@ ; RV64I-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 128 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller128: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -128 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 128 +; RV64I-LP64E-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 128 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -128 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: addi sp, s0, -128 +; RV64I-LP64E-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 128 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 128 call void @callee(ptr %1) ret void @@ -206,6 +492,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -217,6 +515,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign128: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 128 call void @callee(ptr %1) ret void @@ -242,6 +552,25 @@ ; RV32I-NEXT: addi sp, sp, 256 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -256 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 256 +; RV32I-ILP32E-NEXT: sw ra, 252(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 248(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 256 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -256 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -256 +; RV32I-ILP32E-NEXT: lw ra, 252(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 248(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 256 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -256 @@ -260,6 +589,25 @@ ; RV64I-NEXT: ld s0, 240(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 256 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller256: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -256 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 256 +; RV64I-LP64E-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 256 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -256 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: addi sp, s0, -256 +; RV64I-LP64E-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 256 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 256 call void @callee(ptr %1) ret void @@ -278,6 +626,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -289,6 +649,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign256: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 256 call void @callee(ptr %1) ret void @@ -314,6 +686,25 @@ ; RV32I-NEXT: addi sp, sp, 1024 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -1024 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 1024 +; RV32I-ILP32E-NEXT: sw ra, 1020(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 1016(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 1024 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -512 +; RV32I-ILP32E-NEXT: addi a0, sp, 512 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -1024 +; RV32I-ILP32E-NEXT: lw ra, 1020(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 1016(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 1024 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -1024 @@ -332,6 +723,25 @@ ; RV64I-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 1024 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller512: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -1024 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 1024 +; RV64I-LP64E-NEXT: sd ra, 1016(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 1008(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 1024 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -512 +; RV64I-LP64E-NEXT: addi a0, sp, 512 +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: addi sp, s0, -1024 +; RV64I-LP64E-NEXT: ld ra, 1016(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 1024 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 512 call void @callee(ptr %1) ret void @@ -350,6 +760,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -361,6 +783,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign512: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 512 call void @callee(ptr %1) ret void @@ -388,6 +822,27 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: andi sp, sp, -1024 +; RV32I-ILP32E-NEXT: addi a0, sp, 1024 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: addi sp, s0, -2048 +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -408,6 +863,27 @@ ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller1024: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040 +; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: addi sp, sp, -8 +; RV64I-LP64E-NEXT: andi sp, sp, -1024 +; RV64I-LP64E-NEXT: addi a0, sp, 1024 +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: addi sp, s0, -2048 +; RV64I-LP64E-NEXT: addi sp, sp, 8 +; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 2040 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 1024 call void @callee(ptr %1) ret void @@ -426,6 +902,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -437,6 +925,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign1024: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 1024 call void @callee(ptr %1) ret void @@ -468,6 +968,31 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: addi sp, sp, -2048 +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: andi sp, sp, -2048 +; RV32I-ILP32E-NEXT: addi a0, sp, 2047 +; RV32I-ILP32E-NEXT: addi a0, a0, 1 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -492,6 +1017,31 @@ ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller2048: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040 +; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: addi sp, sp, -2048 +; RV64I-LP64E-NEXT: addi sp, sp, -8 +; RV64I-LP64E-NEXT: andi sp, sp, -2048 +; RV64I-LP64E-NEXT: addi a0, sp, 2047 +; RV64I-LP64E-NEXT: addi a0, a0, 1 +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: lui a0, 1 +; RV64I-LP64E-NEXT: sub sp, s0, a0 +; RV64I-LP64E-NEXT: addi sp, sp, 2040 +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 2040 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 2048 call void @callee(ptr %1) ret void @@ -510,6 +1060,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -521,6 +1083,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign2048: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 2048 call void @callee(ptr %1) ret void @@ -554,6 +1128,33 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: srli a0, sp, 12 +; RV32I-ILP32E-NEXT: slli sp, a0, 12 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: add sp, sp, a0 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -580,6 +1181,33 @@ ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller4096: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040 +; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: lui a0, 2 +; RV64I-LP64E-NEXT: addiw a0, a0, -2040 +; RV64I-LP64E-NEXT: sub sp, sp, a0 +; RV64I-LP64E-NEXT: srli a0, sp, 12 +; RV64I-LP64E-NEXT: slli sp, a0, 12 +; RV64I-LP64E-NEXT: lui a0, 1 +; RV64I-LP64E-NEXT: add a0, sp, a0 +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: lui a0, 2 +; RV64I-LP64E-NEXT: sub sp, s0, a0 +; RV64I-LP64E-NEXT: addiw a0, a0, -2040 +; RV64I-LP64E-NEXT: add sp, sp, a0 +; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 2040 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 4096 call void @callee(ptr %1) ret void @@ -598,6 +1226,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -609,6 +1249,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign4096: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 4096 call void @callee(ptr %1) ret void diff --git a/llvm/test/CodeGen/RISCV/target-abi-valid.ll b/llvm/test/CodeGen/RISCV/target-abi-valid.ll --- a/llvm/test/CodeGen/RISCV/target-abi-valid.ll +++ b/llvm/test/CodeGen/RISCV/target-abi-valid.ll @@ -2,6 +2,8 @@ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32 < %s \ @@ -10,6 +12,8 @@ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv64 -target-abi lp64 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s +; RUN: llc -mtriple=riscv64 -target-abi lp64e < %s \ +; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64 < %s \ @@ -33,8 +37,3 @@ ; CHECK-IMP-NEXT: ret ret void } - -; RUN: not --crash llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ -; RUN: | FileCheck -check-prefix=CHECK-UNIMP %s - -; CHECK-UNIMP: LLVM ERROR: Don't know how to lower this ABI diff --git a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare void @abort() + +define i32 @caller(i32 %a) { +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: # %entry +; ILP32E-NEXT: addi sp, sp, -8 +; ILP32E-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-NEXT: .cfi_offset ra, -4 +; ILP32E-NEXT: .cfi_offset s0, -8 +; ILP32E-NEXT: mv s0, a0 +; ILP32E-NEXT: li a0, 1 +; ILP32E-NEXT: lui a2, 262144 +; ILP32E-NEXT: li a1, 0 +; ILP32E-NEXT: call va_double +; ILP32E-NEXT: mv a0, s0 +; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 8 +; ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller: +; ILP32E-WITHFP: # %bb.0: # %entry +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a2, 262144 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call va_double +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +entry: + call void (i32, ...) @va_double(i32 1, double 2.000000e+00) + ret i32 %a +} + +define void @va_double(i32 %n, ...) { +; ILP32E-LABEL: va_double: +; ILP32E: # %bb.0: # %entry +; ILP32E-NEXT: addi sp, sp, -32 +; ILP32E-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: .cfi_offset ra, -28 +; ILP32E-NEXT: sw a5, 28(sp) +; ILP32E-NEXT: sw a4, 24(sp) +; ILP32E-NEXT: sw a3, 20(sp) +; ILP32E-NEXT: sw a2, 16(sp) +; ILP32E-NEXT: sw a1, 12(sp) +; ILP32E-NEXT: addi a0, sp, 12 +; ILP32E-NEXT: sw a0, 0(sp) +; ILP32E-NEXT: addi a0, sp, 19 +; ILP32E-NEXT: andi a1, a0, -8 +; ILP32E-NEXT: addi a0, a1, 8 +; ILP32E-NEXT: sw a0, 0(sp) +; ILP32E-NEXT: lw a0, 0(a1) +; ILP32E-NEXT: lw a1, 4(a1) +; ILP32E-NEXT: lui a3, 262144 +; ILP32E-NEXT: li a2, 0 +; ILP32E-NEXT: call __eqdf2 +; ILP32E-NEXT: bnez a0, .LBB1_2 +; ILP32E-NEXT: # %bb.1: # %if.end +; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 32 +; ILP32E-NEXT: ret +; ILP32E-NEXT: .LBB1_2: # %if.then +; ILP32E-NEXT: call abort +; +; ILP32E-WITHFP-LABEL: va_double: +; ILP32E-WITHFP: # %bb.0: # %entry +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 4 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, a1, 8 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: lw a1, 4(a1) +; ILP32E-WITHFP-NEXT: lui a3, 262144 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call __eqdf2 +; ILP32E-WITHFP-NEXT: bnez a0, .LBB1_2 +; ILP32E-WITHFP-NEXT: # %bb.1: # %if.end +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ILP32E-WITHFP-NEXT: .LBB1_2: # %if.then +; ILP32E-WITHFP-NEXT: call abort +entry: + %args = alloca i8*, align 4 + %args1 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %args1) + %argp.cur = load i8*, i8** %args, align 4 + %0 = ptrtoint i8* %argp.cur to i32 + %1 = add i32 %0, 7 + %2 = and i32 %1, -8 + %argp.cur.aligned = inttoptr i32 %2 to i8* + %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 8 + store i8* %argp.next, i8** %args, align 4 + %3 = bitcast i8* %argp.cur.aligned to double* + %4 = load double, double* %3, align 8 + %cmp = fcmp une double %4, 2.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + call void @abort() + unreachable + +if.end: + %args2 = bitcast i8** %args to i8* + call void @llvm.va_end(i8* %args2) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -11,6 +11,12 @@ ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d \ ; RUN: -verify-machineinstrs \ ; RUN: | FileCheck -check-prefix=RV32D-ILP32-ILP32F-ILP32D-FPELIM %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -target-abi ilp32e \ +; RUN: -verify-machineinstrs \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all \ +; RUN: -verify-machineinstrs \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -verify-machineinstrs \ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d -target-abi lp64f \ @@ -21,6 +27,12 @@ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -verify-machineinstrs -frame-pointer=all \ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-WITHFP %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -target-abi lp64e \ +; RUN: -verify-machineinstrs \ +; RUN: | FileCheck -check-prefix=LP64E-FPELIM %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -target-abi lp64e -frame-pointer=all \ +; RUN: -verify-machineinstrs \ +; RUN: | FileCheck -check-prefix=LP64E-WITHFP %s ; The same vararg calling convention is used for ilp32/ilp32f/ilp32d and for ; lp64/lp64f/lp64d. Different CHECK lines are required for RV32D due to slight @@ -97,6 +109,44 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va1: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: addi a1, sp, 16 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -138,6 +188,44 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a0, sp, 28 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lw a0, 24(sp) +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: .cfi_def_cfa_offset 80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: .cfi_offset ra, -56 +; LP64E-WITHFP-NEXT: .cfi_offset s0, -64 +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48 +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 12 +; LP64E-WITHFP-NEXT: sd a0, -24(s0) +; LP64E-WITHFP-NEXT: lw a0, 8(s0) +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %argp.cur = load ptr, ptr %va, align 4 @@ -202,6 +290,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va1_va_arg: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: addi a1, sp, 16 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -238,6 +359,39 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 32 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 16 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, i32 @@ -338,6 +492,62 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va1_va_arg_alloca: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -40 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: addi s0, sp, 16 +; ILP32E-FPELIM-NEXT: mv s1, a1 +; ILP32E-FPELIM-NEXT: sw a5, 20(s0) +; ILP32E-FPELIM-NEXT: sw a4, 16(s0) +; ILP32E-FPELIM-NEXT: sw a3, 12(s0) +; ILP32E-FPELIM-NEXT: sw a2, 8(s0) +; ILP32E-FPELIM-NEXT: sw a1, 4(s0) +; ILP32E-FPELIM-NEXT: addi a0, s0, 8 +; ILP32E-FPELIM-NEXT: sw a0, -16(s0) +; ILP32E-FPELIM-NEXT: addi a0, a1, 3 +; ILP32E-FPELIM-NEXT: andi a0, a0, -4 +; ILP32E-FPELIM-NEXT: sub a0, sp, a0 +; ILP32E-FPELIM-NEXT: mv sp, a0 +; ILP32E-FPELIM-NEXT: call notdead +; ILP32E-FPELIM-NEXT: mv a0, s1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -16 +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 40 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg_alloca: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a1, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: sub a0, sp, a0 +; ILP32E-WITHFP-NEXT: mv sp, a0 +; ILP32E-WITHFP-NEXT: call notdead +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -16 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg_alloca: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -401,6 +611,66 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_va_arg_alloca: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -80 +; LP64E-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: addi s0, sp, 32 +; LP64E-FPELIM-NEXT: mv s1, a1 +; LP64E-FPELIM-NEXT: sd a5, 40(s0) +; LP64E-FPELIM-NEXT: sd a4, 32(s0) +; LP64E-FPELIM-NEXT: sd a3, 24(s0) +; LP64E-FPELIM-NEXT: sd a2, 16(s0) +; LP64E-FPELIM-NEXT: sd a1, 8(s0) +; LP64E-FPELIM-NEXT: addi a0, s0, 16 +; LP64E-FPELIM-NEXT: sd a0, -32(s0) +; LP64E-FPELIM-NEXT: slli a0, a1, 32 +; LP64E-FPELIM-NEXT: srli a0, a0, 32 +; LP64E-FPELIM-NEXT: addi a0, a0, 7 +; LP64E-FPELIM-NEXT: andi a0, a0, -8 +; LP64E-FPELIM-NEXT: sub a0, sp, a0 +; LP64E-FPELIM-NEXT: mv sp, a0 +; LP64E-FPELIM-NEXT: call notdead +; LP64E-FPELIM-NEXT: mv a0, s1 +; LP64E-FPELIM-NEXT: addi sp, s0, -32 +; LP64E-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 80 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_va_arg_alloca: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv s1, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 16 +; LP64E-WITHFP-NEXT: sd a0, -32(s0) +; LP64E-WITHFP-NEXT: slli a0, a1, 32 +; LP64E-WITHFP-NEXT: srli a0, a0, 32 +; LP64E-WITHFP-NEXT: addi a0, a0, 7 +; LP64E-WITHFP-NEXT: andi a0, a0, -8 +; LP64E-WITHFP-NEXT: sub a0, sp, a0 +; LP64E-WITHFP-NEXT: mv sp, a0 +; LP64E-WITHFP-NEXT: call notdead +; LP64E-WITHFP-NEXT: mv a0, s1 +; LP64E-WITHFP-NEXT: addi sp, s0, -32 +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, i32 @@ -451,6 +721,33 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va1_caller: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: lui a2, 261888 +; ILP32E-FPELIM-NEXT: li a3, 2 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: call va1 +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a2, 261888 +; ILP32E-WITHFP-NEXT: li a3, 2 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call va1 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -477,6 +774,33 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a1, 1023 +; LP64E-FPELIM-NEXT: slli a1, a1, 52 +; LP64E-FPELIM-NEXT: li a2, 2 +; LP64E-FPELIM-NEXT: call va1 +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a1, 1023 +; LP64E-WITHFP-NEXT: slli a1, a1, 52 +; LP64E-WITHFP-NEXT: li a2, 2 +; LP64E-WITHFP-NEXT: call va1 +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret %1 = call i32 (ptr, ...) @va1(ptr undef, double 1.0, i32 2) ret void } @@ -553,6 +877,49 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va2: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 12 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 19 +; ILP32E-FPELIM-NEXT: andi a0, a0, -8 +; ILP32E-FPELIM-NEXT: addi a1, sp, 27 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(a0) +; ILP32E-FPELIM-NEXT: lw a0, 0(a0) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 4 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a1, s0, 19 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(a0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -589,6 +956,39 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 39 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 23 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %argp.cur = load ptr, ptr %va @@ -674,6 +1074,49 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va2_va_arg: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 19 +; ILP32E-FPELIM-NEXT: andi a1, a0, -8 +; ILP32E-FPELIM-NEXT: addi a0, a1, 4 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: lw a0, 0(a1) +; ILP32E-FPELIM-NEXT: addi a2, a1, 8 +; ILP32E-FPELIM-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(a1) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, a1, 4 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: addi a2, a1, 8 +; ILP32E-WITHFP-NEXT: sw a2, -12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(a1) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -710,6 +1153,39 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 32 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 16 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, double @@ -755,6 +1231,31 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va2_caller: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: lui a2, 261888 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: call va2 +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a2, 261888 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call va2 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -779,6 +1280,31 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a1, 1023 +; LP64E-FPELIM-NEXT: slli a1, a1, 52 +; LP64E-FPELIM-NEXT: call va2 +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a1, 1023 +; LP64E-WITHFP-NEXT: slli a1, a1, 52 +; LP64E-WITHFP-NEXT: call va2 +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret %1 = call i64 (ptr, ...) @va2(ptr undef, double 1.000000e+00) ret void } @@ -861,6 +1387,53 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va3: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 20 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 27 +; ILP32E-FPELIM-NEXT: andi a0, a0, -8 +; ILP32E-FPELIM-NEXT: addi a3, sp, 35 +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-NEXT: lw a0, 0(a0) +; ILP32E-FPELIM-NEXT: add a2, a2, a3 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: sltu a1, a0, a1 +; ILP32E-FPELIM-NEXT: add a1, a2, a1 +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 4 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a3, s0, 19 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: add a2, a2, a3 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -895,6 +1468,37 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -48 +; LP64E-FPELIM-NEXT: sd a5, 40(sp) +; LP64E-FPELIM-NEXT: sd a4, 32(sp) +; LP64E-FPELIM-NEXT: sd a3, 24(sp) +; LP64E-FPELIM-NEXT: sd a2, 16(sp) +; LP64E-FPELIM-NEXT: addi a3, sp, 31 +; LP64E-FPELIM-NEXT: add a0, a1, a2 +; LP64E-FPELIM-NEXT: sd a3, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 48 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 0(s0) +; LP64E-WITHFP-NEXT: addi a3, s0, 15 +; LP64E-WITHFP-NEXT: add a0, a1, a2 +; LP64E-WITHFP-NEXT: sd a3, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %argp.cur = load ptr, ptr %va @@ -987,6 +1591,53 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va3_va_arg: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 27 +; ILP32E-FPELIM-NEXT: andi a0, a0, -8 +; ILP32E-FPELIM-NEXT: addi a3, a0, 4 +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-NEXT: lw a3, 0(a0) +; ILP32E-FPELIM-NEXT: addi a4, a0, 8 +; ILP32E-FPELIM-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-NEXT: add a0, a1, a3 +; ILP32E-FPELIM-NEXT: sltu a1, a0, a1 +; ILP32E-FPELIM-NEXT: add a2, a2, a4 +; ILP32E-FPELIM-NEXT: add a1, a2, a1 +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-NEXT: addi a4, a0, 8 +; ILP32E-WITHFP-NEXT: sw a4, -12(s0) +; ILP32E-WITHFP-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a3 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a2, a2, a4 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -1021,6 +1672,37 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -48 +; LP64E-FPELIM-NEXT: sd a5, 40(sp) +; LP64E-FPELIM-NEXT: sd a4, 32(sp) +; LP64E-FPELIM-NEXT: sd a3, 24(sp) +; LP64E-FPELIM-NEXT: sd a2, 16(sp) +; LP64E-FPELIM-NEXT: addi a3, sp, 24 +; LP64E-FPELIM-NEXT: add a0, a1, a2 +; LP64E-FPELIM-NEXT: sd a3, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 48 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 0(s0) +; LP64E-WITHFP-NEXT: addi a3, s0, 8 +; LP64E-WITHFP-NEXT: add a0, a1, a2 +; LP64E-WITHFP-NEXT: sd a3, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, double @@ -1076,6 +1758,37 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va3_caller: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: li a0, 2 +; ILP32E-FPELIM-NEXT: li a1, 1111 +; ILP32E-FPELIM-NEXT: lui a4, 262144 +; ILP32E-FPELIM-NEXT: li a2, 0 +; ILP32E-FPELIM-NEXT: li a3, 0 +; ILP32E-FPELIM-NEXT: call va3 +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: li a0, 2 +; ILP32E-WITHFP-NEXT: li a1, 1111 +; ILP32E-WITHFP-NEXT: lui a4, 262144 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: li a3, 0 +; ILP32E-WITHFP-NEXT: call va3 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -1104,6 +1817,35 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a2, 1 +; LP64E-FPELIM-NEXT: slli a2, a2, 62 +; LP64E-FPELIM-NEXT: li a0, 2 +; LP64E-FPELIM-NEXT: li a1, 1111 +; LP64E-FPELIM-NEXT: call va3 +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a2, 1 +; LP64E-WITHFP-NEXT: slli a2, a2, 62 +; LP64E-WITHFP-NEXT: li a0, 2 +; LP64E-WITHFP-NEXT: li a1, 1111 +; LP64E-WITHFP-NEXT: call va3 +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret %1 = call i64 (i32, i64, ...) @va3(i32 2, i64 1111, double 2.000000e+00) ret void } @@ -1237,6 +1979,87 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va4_va_copy: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -40 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: mv s0, a1 +; ILP32E-FPELIM-NEXT: sw a5, 36(sp) +; ILP32E-FPELIM-NEXT: sw a4, 32(sp) +; ILP32E-FPELIM-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: call notdead +; ILP32E-FPELIM-NEXT: lw a0, 4(sp) +; ILP32E-FPELIM-NEXT: addi a0, a0, 3 +; ILP32E-FPELIM-NEXT: andi a0, a0, -4 +; ILP32E-FPELIM-NEXT: addi a1, a0, 4 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-NEXT: addi a0, a0, 7 +; ILP32E-FPELIM-NEXT: andi a0, a0, -4 +; ILP32E-FPELIM-NEXT: addi a2, a0, 4 +; ILP32E-FPELIM-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-NEXT: lw a2, 0(a0) +; ILP32E-FPELIM-NEXT: addi a0, a0, 7 +; ILP32E-FPELIM-NEXT: andi a0, a0, -4 +; ILP32E-FPELIM-NEXT: addi a3, a0, 4 +; ILP32E-FPELIM-NEXT: sw a3, 4(sp) +; ILP32E-FPELIM-NEXT: lw a0, 0(a0) +; ILP32E-FPELIM-NEXT: add a1, a1, s0 +; ILP32E-FPELIM-NEXT: add a1, a1, a2 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 40 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va4_va_copy: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -44 +; ILP32E-WITHFP-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: sw a0, -20(s0) +; ILP32E-WITHFP-NEXT: call notdead +; ILP32E-WITHFP-NEXT: lw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a1, a0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -16(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a2, a0, 4 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -16(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: add a1, a1, s1 +; ILP32E-WITHFP-NEXT: add a1, a1, a2 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 44 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va4_va_copy: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -1321,6 +2144,87 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 112 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va4_va_copy: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -80 +; LP64E-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: mv s0, a1 +; LP64E-FPELIM-NEXT: sd a5, 72(sp) +; LP64E-FPELIM-NEXT: sd a4, 64(sp) +; LP64E-FPELIM-NEXT: sd a3, 56(sp) +; LP64E-FPELIM-NEXT: sd a2, 48(sp) +; LP64E-FPELIM-NEXT: sd a1, 40(sp) +; LP64E-FPELIM-NEXT: addi a0, sp, 48 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: sd a0, 0(sp) +; LP64E-FPELIM-NEXT: call notdead +; LP64E-FPELIM-NEXT: ld a0, 8(sp) +; LP64E-FPELIM-NEXT: addi a0, a0, 3 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a1, a0, 8 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: ld a1, 0(a0) +; LP64E-FPELIM-NEXT: addi a0, a0, 11 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a2, a0, 8 +; LP64E-FPELIM-NEXT: sd a2, 8(sp) +; LP64E-FPELIM-NEXT: ld a2, 0(a0) +; LP64E-FPELIM-NEXT: addi a0, a0, 11 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a3, a0, 8 +; LP64E-FPELIM-NEXT: sd a3, 8(sp) +; LP64E-FPELIM-NEXT: ld a0, 0(a0) +; LP64E-FPELIM-NEXT: add a1, a1, s0 +; LP64E-FPELIM-NEXT: add a1, a1, a2 +; LP64E-FPELIM-NEXT: addw a0, a1, a0 +; LP64E-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 80 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va4_va_copy: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -88 +; LP64E-WITHFP-NEXT: sd ra, 32(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s1, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 40 +; LP64E-WITHFP-NEXT: mv s1, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 16 +; LP64E-WITHFP-NEXT: sd a0, -32(s0) +; LP64E-WITHFP-NEXT: sd a0, -40(s0) +; LP64E-WITHFP-NEXT: call notdead +; LP64E-WITHFP-NEXT: ld a0, -32(s0) +; LP64E-WITHFP-NEXT: addi a0, a0, 3 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a1, a0, 8 +; LP64E-WITHFP-NEXT: sd a1, -32(s0) +; LP64E-WITHFP-NEXT: ld a1, 0(a0) +; LP64E-WITHFP-NEXT: addi a0, a0, 11 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a2, a0, 8 +; LP64E-WITHFP-NEXT: sd a2, -32(s0) +; LP64E-WITHFP-NEXT: ld a2, 0(a0) +; LP64E-WITHFP-NEXT: addi a0, a0, 11 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a3, a0, 8 +; LP64E-WITHFP-NEXT: sd a3, -32(s0) +; LP64E-WITHFP-NEXT: ld a0, 0(a0) +; LP64E-WITHFP-NEXT: add a1, a1, s1 +; LP64E-WITHFP-NEXT: add a1, a1, a2 +; LP64E-WITHFP-NEXT: addw a0, a1, a0 +; LP64E-WITHFP-NEXT: ld ra, 32(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s1, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 88 +; LP64E-WITHFP-NEXT: ret %vargs = alloca ptr %wargs = alloca ptr call void @llvm.va_start(ptr %vargs) @@ -1478,6 +2382,104 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va5_aligned_stack_caller: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: li a0, 17 +; ILP32E-FPELIM-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-NEXT: li a0, 16 +; ILP32E-FPELIM-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-NEXT: li a0, 15 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262236 +; ILP32E-FPELIM-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lui a0, 377487 +; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: li a0, 14 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a0, 4 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262153 +; ILP32E-FPELIM-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-NEXT: lui a0, 545260 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-NEXT: lui a0, 964690 +; ILP32E-FPELIM-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-NEXT: lui a0, 335544 +; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: lui a0, 688509 +; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 11 +; ILP32E-FPELIM-NEXT: addi a2, sp, 32 +; ILP32E-FPELIM-NEXT: li a3, 12 +; ILP32E-FPELIM-NEXT: li a4, 13 +; ILP32E-FPELIM-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-NEXT: call va5_aligned_stack_callee +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va5_aligned_stack_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: li a0, 17 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: li a0, 16 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: li a0, 15 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: li a0, 14 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a0, 4 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 11 +; ILP32E-WITHFP-NEXT: addi a2, sp, 32 +; ILP32E-WITHFP-NEXT: li a3, 12 +; ILP32E-WITHFP-NEXT: li a4, 13 +; ILP32E-WITHFP-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va5_aligned_stack_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48 @@ -1540,6 +2542,73 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 48 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va5_aligned_stack_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -56 +; LP64E-FPELIM-NEXT: sd ra, 48(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a0, 17 +; LP64E-FPELIM-NEXT: sd a0, 40(sp) +; LP64E-FPELIM-NEXT: li a0, 16 +; LP64E-FPELIM-NEXT: lui a1, %hi(.LCPI11_0) +; LP64E-FPELIM-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; LP64E-FPELIM-NEXT: sd a0, 32(sp) +; LP64E-FPELIM-NEXT: li a0, 15 +; LP64E-FPELIM-NEXT: sd a0, 24(sp) +; LP64E-FPELIM-NEXT: sd a1, 16(sp) +; LP64E-FPELIM-NEXT: li a0, 14 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lui a0, 2384 +; LP64E-FPELIM-NEXT: addiw a0, a0, 761 +; LP64E-FPELIM-NEXT: slli a6, a0, 11 +; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_1) +; LP64E-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_2) +; LP64E-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(a0) +; LP64E-FPELIM-NEXT: li a0, 1 +; LP64E-FPELIM-NEXT: li a1, 11 +; LP64E-FPELIM-NEXT: li a4, 12 +; LP64E-FPELIM-NEXT: li a5, 13 +; LP64E-FPELIM-NEXT: sd a6, 0(sp) +; LP64E-FPELIM-NEXT: call va5_aligned_stack_callee +; LP64E-FPELIM-NEXT: ld ra, 48(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 56 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va5_aligned_stack_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 64 +; LP64E-WITHFP-NEXT: li a0, 17 +; LP64E-WITHFP-NEXT: sd a0, 40(sp) +; LP64E-WITHFP-NEXT: li a0, 16 +; LP64E-WITHFP-NEXT: lui a1, %hi(.LCPI11_0) +; LP64E-WITHFP-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; LP64E-WITHFP-NEXT: sd a0, 32(sp) +; LP64E-WITHFP-NEXT: li a0, 15 +; LP64E-WITHFP-NEXT: sd a0, 24(sp) +; LP64E-WITHFP-NEXT: sd a1, 16(sp) +; LP64E-WITHFP-NEXT: li a0, 14 +; LP64E-WITHFP-NEXT: sd a0, 8(sp) +; LP64E-WITHFP-NEXT: lui a0, 2384 +; LP64E-WITHFP-NEXT: addiw a0, a0, 761 +; LP64E-WITHFP-NEXT: slli a6, a0, 11 +; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_1) +; LP64E-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_2) +; LP64E-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(a0) +; LP64E-WITHFP-NEXT: li a0, 1 +; LP64E-WITHFP-NEXT: li a1, 11 +; LP64E-WITHFP-NEXT: li a4, 12 +; LP64E-WITHFP-NEXT: li a5, 13 +; LP64E-WITHFP-NEXT: sd a6, 0(sp) +; LP64E-WITHFP-NEXT: call va5_aligned_stack_callee +; LP64E-WITHFP-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret %1 = call i32 (i32, ...) @va5_aligned_stack_callee(i32 1, i32 11, fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, i64 20000000000, i32 14, double 2.720000e+00, i32 15, [2 x i32] [i32 16, i32 17]) @@ -1604,6 +2673,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va6_no_fixed_args: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a1, sp, 12 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va6_no_fixed_args: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a0, 0(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -1640,6 +2742,39 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va6_no_fixed_args: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: sd a0, 16(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 24 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va6_no_fixed_args: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a0, 0(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 8 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, i32 @@ -1757,6 +2892,68 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add sp, sp, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va_large_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a0, 24414 +; ILP32E-FPELIM-NEXT: addi a0, a0, 288 +; ILP32E-FPELIM-NEXT: sub sp, sp, a0 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 100000032 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: lui a6, 24414 +; ILP32E-FPELIM-NEXT: add a6, sp, a6 +; ILP32E-FPELIM-NEXT: sw a5, 284(a6) +; ILP32E-FPELIM-NEXT: lui a5, 24414 +; ILP32E-FPELIM-NEXT: add a5, sp, a5 +; ILP32E-FPELIM-NEXT: sw a4, 280(a5) +; ILP32E-FPELIM-NEXT: lui a4, 24414 +; ILP32E-FPELIM-NEXT: add a4, sp, a4 +; ILP32E-FPELIM-NEXT: sw a3, 276(a4) +; ILP32E-FPELIM-NEXT: lui a3, 24414 +; ILP32E-FPELIM-NEXT: add a3, sp, a3 +; ILP32E-FPELIM-NEXT: sw a2, 272(a3) +; ILP32E-FPELIM-NEXT: lui a2, 24414 +; ILP32E-FPELIM-NEXT: add a2, sp, a2 +; ILP32E-FPELIM-NEXT: sw a1, 268(a2) +; ILP32E-FPELIM-NEXT: lui a1, 24414 +; ILP32E-FPELIM-NEXT: addi a1, a1, 272 +; ILP32E-FPELIM-NEXT: add a1, sp, a1 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lui a1, 24414 +; ILP32E-FPELIM-NEXT: addi a1, a1, 288 +; ILP32E-FPELIM-NEXT: add sp, sp, a1 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va_large_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -2044 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 2044 +; ILP32E-WITHFP-NEXT: sw ra, 2016(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 2012(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 2020 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: lui a0, 24414 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1740 +; ILP32E-WITHFP-NEXT: sub sp, sp, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: lui a2, 24414 +; ILP32E-WITHFP-NEXT: sub a2, s0, a2 +; ILP32E-WITHFP-NEXT: sw a1, -272(a2) +; ILP32E-WITHFP-NEXT: lui a1, 24414 +; ILP32E-WITHFP-NEXT: addi a1, a1, -1740 +; ILP32E-WITHFP-NEXT: add sp, sp, a1 +; ILP32E-WITHFP-NEXT: lw ra, 2016(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 2012(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 2044 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va_large_stack: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 @@ -1828,6 +3025,70 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 1952(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 2032 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va_large_stack: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: addiw a0, a0, 320 +; LP64E-FPELIM-NEXT: sub sp, sp, a0 +; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 100000064 +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a1, 280(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: addiw a0, a0, 284 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: lw a0, 280(a0) +; LP64E-FPELIM-NEXT: lui a1, 24414 +; LP64E-FPELIM-NEXT: add a1, sp, a1 +; LP64E-FPELIM-NEXT: sd a5, 312(a1) +; LP64E-FPELIM-NEXT: lui a1, 24414 +; LP64E-FPELIM-NEXT: add a1, sp, a1 +; LP64E-FPELIM-NEXT: sd a4, 304(a1) +; LP64E-FPELIM-NEXT: lui a1, 24414 +; LP64E-FPELIM-NEXT: add a1, sp, a1 +; LP64E-FPELIM-NEXT: sd a3, 296(a1) +; LP64E-FPELIM-NEXT: lui a1, 24414 +; LP64E-FPELIM-NEXT: add a1, sp, a1 +; LP64E-FPELIM-NEXT: sd a2, 288(a1) +; LP64E-FPELIM-NEXT: lui a1, 24414 +; LP64E-FPELIM-NEXT: addiw a1, a1, 320 +; LP64E-FPELIM-NEXT: add sp, sp, a1 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va_large_stack: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -2040 +; LP64E-WITHFP-NEXT: .cfi_def_cfa_offset 2040 +; LP64E-WITHFP-NEXT: sd ra, 1984(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 1976(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: .cfi_offset ra, -56 +; LP64E-WITHFP-NEXT: .cfi_offset s0, -64 +; LP64E-WITHFP-NEXT: addi s0, sp, 1992 +; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48 +; LP64E-WITHFP-NEXT: lui a0, 24414 +; LP64E-WITHFP-NEXT: addiw a0, a0, -1704 +; LP64E-WITHFP-NEXT: sub sp, sp, a0 +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 12 +; LP64E-WITHFP-NEXT: lui a1, 24414 +; LP64E-WITHFP-NEXT: sub a1, s0, a1 +; LP64E-WITHFP-NEXT: sd a0, -288(a1) +; LP64E-WITHFP-NEXT: lw a0, 8(s0) +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: lui a1, 24414 +; LP64E-WITHFP-NEXT: addiw a1, a1, -1704 +; LP64E-WITHFP-NEXT: add sp, sp, a1 +; LP64E-WITHFP-NEXT: ld ra, 1984(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 1976(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 2040 +; LP64E-WITHFP-NEXT: ret %large = alloca [ 100000000 x i8 ] %va = alloca ptr call void @llvm.va_start(ptr %va) diff --git a/llvm/test/MC/RISCV/option-invalid.s b/llvm/test/MC/RISCV/option-invalid.s --- a/llvm/test/MC/RISCV/option-invalid.s +++ b/llvm/test/MC/RISCV/option-invalid.s @@ -56,9 +56,6 @@ # CHECK: :[[#@LINE+1]]:12: warning: unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'arch', 'relax' or 'norelax' .option bar -# CHECK: :[[#@LINE+1]]:16: error: unknown extension feature -.option arch, -i - # CHECK: :[[#@LINE+1]]:12: error: .option pop with no .option push .option pop diff --git a/llvm/test/MC/RISCV/target-abi-invalid.s b/llvm/test/MC/RISCV/target-abi-invalid.s --- a/llvm/test/MC/RISCV/target-abi-invalid.s +++ b/llvm/test/MC/RISCV/target-abi-invalid.s @@ -30,7 +30,7 @@ # RUN: | FileCheck -check-prefix=RV32E-LP64 %s # RUN: llvm-mc -triple=riscv32 -mattr=+e,+f -target-abi lp64f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EF-LP64F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-LP64D %s # RUN: llvm-mc -triple=riscv32 -mattr=+e -target-abi lp64e %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32E-LP64E %s @@ -42,6 +42,7 @@ # RV32EF-LP64F: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32EFD-LP64D: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32E-LP64E: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) +# RV32EFD-LP64D: LLVM ERROR: ILP32E cannot be used with the D ISA extension # RUN: llvm-mc -triple=riscv32 -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32I-ILP32F %s @@ -69,15 +70,17 @@ # RUN: | FileCheck -check-prefix=RV32EF-ILP32F %s # RUN: llvm-mc -triple=riscv32 -mattr=+e,+f -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EF-ILP32F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32f < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-ILP32F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32d < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32d < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-ILP32D %s # RV32E-ILP32: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) # RV32EF-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) # RV32EFD-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) +# RV32EFD-ILP32F: LLVM ERROR: ILP32E cannot be used with the D ISA extension # RV32EFD-ILP32D: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) +# RV32EFD-ILP32D: LLVM ERROR: ILP32E cannot be used with the D ISA extension # RUN: llvm-mc -triple=riscv64 -mattr=+e -target-abi lp64 < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV64EF-LP64F %s