diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -121,6 +121,12 @@ } bool setABI(const std::string &Name) override { + if (Name == "ilp32e") { + ABI = Name; + resetDataLayout("e-m:e-p:32:32-i64:64-n32-S32"); + return true; + } + if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") { ABI = Name; return true; @@ -145,6 +151,12 @@ } bool setABI(const std::string &Name) override { + if (Name == "lp64e") { + ABI = Name; + resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n32:64-S64"); + return true; + } + if (Name == "lp64" || Name == "lp64f" || Name == "lp64d") { ABI = Name; return true; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -148,7 +148,7 @@ else Builder.defineMacro("__riscv_float_abi_soft"); - if (ABIName == "ilp32e") + if (ABIName == "ilp32e" || ABIName == "lp64e") Builder.defineMacro("__riscv_abi_rve"); Builder.defineMacro("__riscv_arch_test"); @@ -204,6 +204,13 @@ if (VScale && VScale->first && VScale->first == VScale->second) Builder.defineMacro("__riscv_v_fixed_vlen", Twine(VScale->first * llvm::RISCV::RVVBitsPerBlock)); + + if (ISAInfo->hasExtension("e")) { + if (Is64Bit) + Builder.defineMacro("__riscv_64e"); + else + Builder.defineMacro("__riscv_32e"); + } } static constexpr Builtin::Info BuiltinInfo[] = { @@ -322,6 +329,11 @@ if (ISAInfo->hasExtension("zfh") || ISAInfo->hasExtension("zhinx")) HasLegalHalfType = true; + if (ABI == "ilp32e" && ISAInfo->hasExtension("d")) { + Diags.Report(diag::err_invalid_feature_combination) + << "ILP32E must not be used with the D ISA extension"; + return false; + } return true; } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -228,7 +228,8 @@ ABIFLen = 32; else if (ABIStr.endswith("d")) ABIFLen = 64; - return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen); + bool EABI = ABIStr.endswith("e"); + return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen, EABI); } case llvm::Triple::systemz: { diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -493,7 +493,8 @@ bool SoftFloatABI); std::unique_ptr -createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, unsigned FLen); +createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, unsigned FLen, + bool EABI); std::unique_ptr createCommonSPIRTargetCodeGenInfo(CodeGenModule &CGM); diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -26,8 +26,9 @@ // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target // with soft float ABI has FLen==0). unsigned FLen; - static const int NumArgGPRs = 8; - static const int NumArgFPRs = 8; + const int NumArgGPRs; + const int NumArgFPRs; + const bool EABI; bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, @@ -35,8 +36,10 @@ CharUnits &Field2Off) const; public: - RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) - : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen, + bool EABI) + : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen), NumArgGPRs(EABI ? 6 : 8), + NumArgFPRs(FLen != 0 ? 8 : 0), EABI(EABI) {} // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo is virtual, so we overload it. @@ -87,7 +90,7 @@ } int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; - int ArgFPRsLeft = FLen ? NumArgFPRs : 0; + int ArgFPRsLeft = NumArgFPRs; int NumFixedArgs = FI.getNumRequiredArgs(); int ArgNum = 0; @@ -384,9 +387,12 @@ // Determine the number of GPRs needed to pass the current argument // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" // register pairs, so may consume 3 registers. + // TODO: To be compatible with GCC's behaviors, we don't align registers + // currently if we are using ILP32E calling convention. This behavior may be + // changed when RV32E/ILP32E is ratified. int NeededArgGPRs = 1; if (!IsFixed && NeededAlign == 2 * XLen) - NeededArgGPRs = 2 + (ArgGPRsLeft % 2); + NeededArgGPRs = 2 + (EABI && XLen == 32 ? 0 : (ArgGPRsLeft % 2)); else if (Size > XLen && Size <= 2 * XLen) NeededArgGPRs = 2; @@ -468,6 +474,13 @@ auto TInfo = getContext().getTypeInfoInChars(Ty); + // TODO: To be compatible with GCC's behaviors, we force arguments with + // 2×XLEN-bit alignment and size at most 2×XLEN bits like `long long`, + // `unsigned long long` and `double` to have 4-bytes alignment. This + // behavior may be changed when RV32E/ILP32E is ratified. + if (EABI && XLen == 32) + TInfo.Align = std::min(TInfo.Align, CharUnits::fromQuantity(4)); + // Arguments bigger than 2*Xlen bytes are passed indirectly. bool IsIndirect = TInfo.Width > 2 * SlotSize; @@ -487,8 +500,9 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, - unsigned FLen) - : TargetCodeGenInfo(std::make_unique(CGT, XLen, FLen)) {} + unsigned FLen, bool EABI) + : TargetCodeGenInfo( + std::make_unique(CGT, XLen, FLen, EABI)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -514,6 +528,7 @@ std::unique_ptr CodeGen::createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, - unsigned FLen) { - return std::make_unique(CGM.getTypes(), XLen, FLen); + unsigned FLen, bool EABI) { + return std::make_unique(CGM.getTypes(), XLen, FLen, + EABI); } diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -206,6 +206,7 @@ // rv32e -> ilp32e // rv32* -> ilp32 // rv64g | rv64*d -> lp64d + // rv64e -> lp64e // rv64* -> lp64 StringRef Arch = getRISCVArch(Args, Triple); @@ -283,6 +284,7 @@ // 3. Choose a default based on `-mabi=` // // ilp32e -> rv32e + // lp64e -> rv64e // ilp32 | ilp32f | ilp32d -> rv32imafdc // lp64 | lp64f | lp64d -> rv64imafdc if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { @@ -290,6 +292,8 @@ if (MABI.equals_insensitive("ilp32e")) return "rv32e"; + else if (MABI.starts_with_insensitive("lp64e")) + return "rv64e"; else if (MABI.starts_with_insensitive("ilp32")) return "rv32imafdc"; else if (MABI.starts_with_insensitive("lp64")) { diff --git a/clang/test/CodeGen/RISCV/riscv32-abi.c b/clang/test/CodeGen/RISCV/riscv32-abi.c --- a/clang/test/CodeGen/RISCV/riscv32-abi.c +++ b/clang/test/CodeGen/RISCV/riscv32-abi.c @@ -5,6 +5,8 @@ // RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32F-ILP32D,ILP32-ILP32F,ILP32F %s // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d -target-abi ilp32d -emit-llvm %s -o - \ // RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32F-ILP32D,ILP32D %s +// RUN: %clang_cc1 -triple riscv32 -emit-llvm -target-abi ilp32e %s -o - \ +// RUN: | FileCheck -check-prefixes=ILP32-ILP32F-ILP32D,ILP32-ILP32F,ILP32,ILP32E %s #include #include @@ -2064,4 +2066,5 @@ } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// ILP32E: {{.*}} // ILP32F: {{.*}} diff --git a/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c b/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv32-ilp32e-error.c @@ -0,0 +1,4 @@ +// RUN: not %clang_cc1 -triple riscv32 -target-feature +d -emit-llvm -target-abi ilp32e %s 2>&1 \ +// RUN: | FileCheck -check-prefix=ILP32E-WITH-FD %s + +// ILP32E-WITH-FD: error: invalid feature combination: ILP32E must not be used with the D ISA extension diff --git a/clang/test/CodeGen/RISCV/riscv32-vararg.c b/clang/test/CodeGen/RISCV/riscv32-vararg.c --- a/clang/test/CodeGen/RISCV/riscv32-vararg.c +++ b/clang/test/CodeGen/RISCV/riscv32-vararg.c @@ -1,9 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-abi ilp32f -emit-llvm %s -o - \ -// RUN: | FileCheck %s +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32F // RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-feature +f -target-abi ilp32d -emit-llvm %s -o - \ -// RUN: | FileCheck %s +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32D +// RUN: %clang_cc1 -triple riscv32 -target-abi ilp32e -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-ILP32E #include #include @@ -102,24 +104,60 @@ // used to pass varargs with 2x xlen alignment and 2x xlen size. Ensure the // correct offsets are used. -// CHECK-LABEL: define dso_local double @f_va_2 -// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[V:%.*]] = alloca double, align 8 -// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) -// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 -// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) -// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 -// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 -// CHECK-NEXT: store double [[TMP1]], ptr [[V]], align 8 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) -// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8 -// CHECK-NEXT: ret double [[TMP2]] +// CHECK-ILP32F-LABEL: define dso_local double @f_va_2 +// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32F-NEXT: entry: +// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-ILP32F-NEXT: store double [[TMP1]], ptr [[V]], align 8 +// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32F-NEXT: ret double [[TMP2]] +// +// CHECK-ILP32D-LABEL: define dso_local double @f_va_2 +// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32D-NEXT: entry: +// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-ILP32D-NEXT: store double [[TMP1]], ptr [[V]], align 8 +// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32D-NEXT: ret double [[TMP2]] +// +// CHECK-ILP32E-LABEL: define dso_local double @f_va_2 +// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32E-NEXT: entry: +// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32E-NEXT: store double [[TMP0]], ptr [[V]], align 8 +// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32E-NEXT: ret double [[TMP1]] // double f_va_2(char *fmt, ...) { __builtin_va_list va; @@ -133,40 +171,106 @@ // Two "aligned" register pairs. -// CHECK-LABEL: define dso_local double @f_va_3 -// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[V:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[W:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[X:%.*]] = alloca double, align 8 -// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) -// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 -// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) -// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 -// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 -// CHECK-NEXT: store double [[TMP1]], ptr [[V]], align 8 -// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[W]], align 4 -// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7 -// CHECK-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8) -// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8 -// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8 -// CHECK-NEXT: store double [[TMP4]], ptr [[X]], align 8 -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) -// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8 -// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]] -// CHECK-NEXT: ret double [[ADD]] +// CHECK-ILP32F-LABEL: define dso_local double @f_va_3 +// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32F-NEXT: entry: +// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32F-NEXT: [[W:%.*]] = alloca i32, align 4 +// CHECK-ILP32F-NEXT: [[X:%.*]] = alloca double, align 8 +// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-ILP32F-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-ILP32F-NEXT: store double [[TMP1]], ptr [[V]], align 8 +// CHECK-ILP32F-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32F-NEXT: store i32 [[TMP2]], ptr [[W]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7 +// CHECK-ILP32F-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8) +// CHECK-ILP32F-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8 +// CHECK-ILP32F-NEXT: store double [[TMP4]], ptr [[X]], align 8 +// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32F-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8 +// CHECK-ILP32F-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]] +// CHECK-ILP32F-NEXT: ret double [[ADD]] +// +// CHECK-ILP32D-LABEL: define dso_local double @f_va_3 +// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32D-NEXT: entry: +// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32D-NEXT: [[W:%.*]] = alloca i32, align 4 +// CHECK-ILP32D-NEXT: [[X:%.*]] = alloca double, align 8 +// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-ILP32D-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-ILP32D-NEXT: store double [[TMP1]], ptr [[V]], align 8 +// CHECK-ILP32D-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32D-NEXT: store i32 [[TMP2]], ptr [[W]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 7 +// CHECK-ILP32D-NEXT: [[ARGP_CUR3_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP3]], i32 -8) +// CHECK-ILP32D-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3_ALIGNED]], i32 8 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8 +// CHECK-ILP32D-NEXT: store double [[TMP4]], ptr [[X]], align 8 +// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32D-NEXT: [[TMP6:%.*]] = load double, ptr [[X]], align 8 +// CHECK-ILP32D-NEXT: [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]] +// CHECK-ILP32D-NEXT: ret double [[ADD]] +// +// CHECK-ILP32E-LABEL: define dso_local double @f_va_3 +// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32E-NEXT: entry: +// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK-ILP32E-NEXT: [[W:%.*]] = alloca i32, align 4 +// CHECK-ILP32E-NEXT: [[X:%.*]] = alloca double, align 8 +// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32E-NEXT: store double [[TMP0]], ptr [[V]], align 8 +// CHECK-ILP32E-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32E-NEXT: store i32 [[TMP1]], ptr [[W]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 8 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP2:%.*]] = load double, ptr [[ARGP_CUR3]], align 4 +// CHECK-ILP32E-NEXT: store double [[TMP2]], ptr [[X]], align 8 +// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load double, ptr [[V]], align 8 +// CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load double, ptr [[X]], align 8 +// CHECK-ILP32E-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], [[TMP4]] +// CHECK-ILP32E-NEXT: ret double [[ADD]] // double f_va_3(char *fmt, ...) { __builtin_va_list va; @@ -180,93 +284,269 @@ return v + x; } -// CHECK-LABEL: define dso_local i32 @f_va_4 -// CHECK-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[LD:%.*]] = alloca fp128, align 16 -// CHECK-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 -// CHECK-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 -// CHECK-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 -// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 -// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) -// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 -// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16 -// CHECK-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16 -// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false) -// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8 -// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false) -// CHECK-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4 -// CHECK-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4 -// CHECK-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) -// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 -// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 -// CHECK-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 -// CHECK-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]] -// CHECK-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32 -// CHECK-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4 -// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 -// CHECK-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32 -// CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]] -// CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1 -// CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1 -// CHECK-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32 -// CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] -// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2 -// CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1 -// CHECK-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32 -// CHECK-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]] -// CHECK-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3 -// CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1 -// CHECK-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32 -// CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]] -// CHECK-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4 -// CHECK-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4 -// CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1 -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4 -// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 -// CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]] -// CHECK-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4 -// CHECK-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4 -// CHECK-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4 -// CHECK-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]] -// CHECK-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4 -// CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]] -// CHECK-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3 -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4 -// CHECK-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]] -// CHECK-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4 -// CHECK-NEXT: ret i32 [[TMP20]] +// CHECK-ILP32F-LABEL: define dso_local i32 @f_va_4 +// CHECK-ILP32F-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32F-NEXT: entry: +// CHECK-ILP32F-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32F-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-ILP32F-NEXT: [[LD:%.*]] = alloca fp128, align 16 +// CHECK-ILP32F-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 +// CHECK-ILP32F-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 +// CHECK-ILP32F-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 +// CHECK-ILP32F-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-ILP32F-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32F-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32F-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16 +// CHECK-ILP32F-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16 +// CHECK-ILP32F-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false) +// CHECK-ILP32F-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false) +// CHECK-ILP32F-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4 +// CHECK-ILP32F-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 +// CHECK-ILP32F-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 +// CHECK-ILP32F-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) +// CHECK-ILP32F-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32F-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 +// CHECK-ILP32F-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 +// CHECK-ILP32F-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 +// CHECK-ILP32F-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]] +// CHECK-ILP32F-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32 +// CHECK-ILP32F-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 +// CHECK-ILP32F-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 +// CHECK-ILP32F-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32 +// CHECK-ILP32F-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]] +// CHECK-ILP32F-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1 +// CHECK-ILP32F-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1 +// CHECK-ILP32F-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32 +// CHECK-ILP32F-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] +// CHECK-ILP32F-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2 +// CHECK-ILP32F-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1 +// CHECK-ILP32F-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32 +// CHECK-ILP32F-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]] +// CHECK-ILP32F-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3 +// CHECK-ILP32F-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1 +// CHECK-ILP32F-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32 +// CHECK-ILP32F-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]] +// CHECK-ILP32F-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0 +// CHECK-ILP32F-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4 +// CHECK-ILP32F-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-ILP32F-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1 +// CHECK-ILP32F-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4 +// CHECK-ILP32F-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 +// CHECK-ILP32F-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]] +// CHECK-ILP32F-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0 +// CHECK-ILP32F-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4 +// CHECK-ILP32F-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-ILP32F-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1 +// CHECK-ILP32F-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4 +// CHECK-ILP32F-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]] +// CHECK-ILP32F-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2 +// CHECK-ILP32F-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4 +// CHECK-ILP32F-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]] +// CHECK-ILP32F-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3 +// CHECK-ILP32F-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4 +// CHECK-ILP32F-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]] +// CHECK-ILP32F-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32F-NEXT: ret i32 [[TMP20]] +// +// CHECK-ILP32D-LABEL: define dso_local i32 @f_va_4 +// CHECK-ILP32D-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32D-NEXT: entry: +// CHECK-ILP32D-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32D-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-ILP32D-NEXT: [[LD:%.*]] = alloca fp128, align 16 +// CHECK-ILP32D-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 +// CHECK-ILP32D-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 +// CHECK-ILP32D-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 +// CHECK-ILP32D-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-ILP32D-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32D-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32D-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 16 +// CHECK-ILP32D-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16 +// CHECK-ILP32D-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false) +// CHECK-ILP32D-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false) +// CHECK-ILP32D-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4 +// CHECK-ILP32D-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 +// CHECK-ILP32D-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 +// CHECK-ILP32D-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) +// CHECK-ILP32D-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32D-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 +// CHECK-ILP32D-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 +// CHECK-ILP32D-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 +// CHECK-ILP32D-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]] +// CHECK-ILP32D-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32 +// CHECK-ILP32D-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 +// CHECK-ILP32D-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 +// CHECK-ILP32D-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32 +// CHECK-ILP32D-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]] +// CHECK-ILP32D-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1 +// CHECK-ILP32D-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1 +// CHECK-ILP32D-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32 +// CHECK-ILP32D-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] +// CHECK-ILP32D-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2 +// CHECK-ILP32D-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1 +// CHECK-ILP32D-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32 +// CHECK-ILP32D-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]] +// CHECK-ILP32D-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3 +// CHECK-ILP32D-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1 +// CHECK-ILP32D-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32 +// CHECK-ILP32D-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]] +// CHECK-ILP32D-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0 +// CHECK-ILP32D-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4 +// CHECK-ILP32D-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-ILP32D-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1 +// CHECK-ILP32D-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4 +// CHECK-ILP32D-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 +// CHECK-ILP32D-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]] +// CHECK-ILP32D-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0 +// CHECK-ILP32D-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4 +// CHECK-ILP32D-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-ILP32D-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1 +// CHECK-ILP32D-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4 +// CHECK-ILP32D-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]] +// CHECK-ILP32D-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2 +// CHECK-ILP32D-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4 +// CHECK-ILP32D-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]] +// CHECK-ILP32D-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3 +// CHECK-ILP32D-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4 +// CHECK-ILP32D-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]] +// CHECK-ILP32D-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32D-NEXT: ret i32 [[TMP20]] +// +// CHECK-ILP32E-LABEL: define dso_local i32 @f_va_4 +// CHECK-ILP32E-SAME: (ptr noundef [[FMT:%.*]], ...) #[[ATTR0]] { +// CHECK-ILP32E-NEXT: entry: +// CHECK-ILP32E-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[VA:%.*]] = alloca ptr, align 4 +// CHECK-ILP32E-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-ILP32E-NEXT: [[LD:%.*]] = alloca fp128, align 16 +// CHECK-ILP32E-NEXT: [[TS:%.*]] = alloca [[STRUCT_TINY:%.*]], align 1 +// CHECK-ILP32E-NEXT: [[SS:%.*]] = alloca [[STRUCT_SMALL:%.*]], align 4 +// CHECK-ILP32E-NEXT: [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4 +// CHECK-ILP32E-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-ILP32E-NEXT: store ptr [[FMT]], ptr [[FMT_ADDR]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +// CHECK-ILP32E-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4 +// CHECK-ILP32E-NEXT: [[TMP2:%.*]] = load fp128, ptr [[TMP1]], align 4 +// CHECK-ILP32E-NEXT: store fp128 [[TMP2]], ptr [[LD]], align 16 +// CHECK-ILP32E-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS]], ptr align 4 [[ARGP_CUR3]], i32 4, i1 false) +// CHECK-ILP32E-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 8 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT6]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[SS]], ptr align 4 [[ARGP_CUR5]], i32 8, i1 false) +// CHECK-ILP32E-NEXT: [[ARGP_CUR7:%.*]] = load ptr, ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR7]], i32 4 +// CHECK-ILP32E-NEXT: store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4 +// CHECK-ILP32E-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4 +// CHECK-ILP32E-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false) +// CHECK-ILP32E-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-ILP32E-NEXT: [[TMP4:%.*]] = load i32, ptr [[V]], align 4 +// CHECK-ILP32E-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128 +// CHECK-ILP32E-NEXT: [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16 +// CHECK-ILP32E-NEXT: [[ADD:%.*]] = fadd fp128 [[CONV]], [[TMP5]] +// CHECK-ILP32E-NEXT: [[CONV9:%.*]] = fptosi fp128 [[ADD]] to i32 +// CHECK-ILP32E-NEXT: store i32 [[CONV9]], ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[TMP6:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0 +// CHECK-ILP32E-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 +// CHECK-ILP32E-NEXT: [[CONV10:%.*]] = zext i8 [[TMP7]] to i32 +// CHECK-ILP32E-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP6]], [[CONV10]] +// CHECK-ILP32E-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 1 +// CHECK-ILP32E-NEXT: [[TMP8:%.*]] = load i8, ptr [[B]], align 1 +// CHECK-ILP32E-NEXT: [[CONV12:%.*]] = zext i8 [[TMP8]] to i32 +// CHECK-ILP32E-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] +// CHECK-ILP32E-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 2 +// CHECK-ILP32E-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1 +// CHECK-ILP32E-NEXT: [[CONV14:%.*]] = zext i8 [[TMP9]] to i32 +// CHECK-ILP32E-NEXT: [[ADD15:%.*]] = add nsw i32 [[ADD13]], [[CONV14]] +// CHECK-ILP32E-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 3 +// CHECK-ILP32E-NEXT: [[TMP10:%.*]] = load i8, ptr [[D]], align 1 +// CHECK-ILP32E-NEXT: [[CONV16:%.*]] = zext i8 [[TMP10]] to i32 +// CHECK-ILP32E-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CONV16]] +// CHECK-ILP32E-NEXT: store i32 [[ADD17]], ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[TMP11:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 0 +// CHECK-ILP32E-NEXT: [[TMP12:%.*]] = load i32, ptr [[A18]], align 4 +// CHECK-ILP32E-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-ILP32E-NEXT: [[B20:%.*]] = getelementptr inbounds [[STRUCT_SMALL]], ptr [[SS]], i32 0, i32 1 +// CHECK-ILP32E-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B20]], align 4 +// CHECK-ILP32E-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 +// CHECK-ILP32E-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD19]], [[TMP14]] +// CHECK-ILP32E-NEXT: store i32 [[ADD21]], ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[TMP15:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 0 +// CHECK-ILP32E-NEXT: [[TMP16:%.*]] = load i32, ptr [[A22]], align 4 +// CHECK-ILP32E-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-ILP32E-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 1 +// CHECK-ILP32E-NEXT: [[TMP17:%.*]] = load i32, ptr [[B24]], align 4 +// CHECK-ILP32E-NEXT: [[ADD25:%.*]] = add nsw i32 [[ADD23]], [[TMP17]] +// CHECK-ILP32E-NEXT: [[C26:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 2 +// CHECK-ILP32E-NEXT: [[TMP18:%.*]] = load i32, ptr [[C26]], align 4 +// CHECK-ILP32E-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD25]], [[TMP18]] +// CHECK-ILP32E-NEXT: [[D28:%.*]] = getelementptr inbounds [[STRUCT_LARGE]], ptr [[LS]], i32 0, i32 3 +// CHECK-ILP32E-NEXT: [[TMP19:%.*]] = load i32, ptr [[D28]], align 4 +// CHECK-ILP32E-NEXT: [[ADD29:%.*]] = add nsw i32 [[ADD27]], [[TMP19]] +// CHECK-ILP32E-NEXT: store i32 [[ADD29]], ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: [[TMP20:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-ILP32E-NEXT: ret i32 [[TMP20]] // int f_va_4(char *fmt, ...) { __builtin_va_list va; diff --git a/clang/test/CodeGen/RISCV/riscv64-abi.c b/clang/test/CodeGen/RISCV/riscv64-abi.c --- a/clang/test/CodeGen/RISCV/riscv64-abi.c +++ b/clang/test/CodeGen/RISCV/riscv64-abi.c @@ -5,6 +5,8 @@ // RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64F-LP64D,LP64-LP64F,LP64F %s // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - \ // RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64F-LP64D,LP64D %s +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -target-abi lp64e %s -o - \ +// RUN: | FileCheck -check-prefixes=LP64-LP64F-LP64D,LP64-LP64F,LP64,LP64E %s #include #include @@ -2046,3 +2048,5 @@ return (union float16_u){1.0}; } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// LP64E: {{.*}} diff --git a/clang/test/CodeGen/RISCV/riscv64-vararg.c b/clang/test/CodeGen/RISCV/riscv64-vararg.c --- a/clang/test/CodeGen/RISCV/riscv64-vararg.c +++ b/clang/test/CodeGen/RISCV/riscv64-vararg.c @@ -1,9 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // RUN: %clang_cc1 -triple riscv64 -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-abi lp64f -emit-llvm %s -o - \ -// RUN: | FileCheck %s +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LP64F // RUN: %clang_cc1 -triple riscv64 -target-feature +d -target-feature +f -target-abi lp64d -emit-llvm %s -o - \ -// RUN: | FileCheck %s +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LP64D +// RUN: %clang_cc1 -triple riscv64 -target-abi lp64e -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LP64E #include #include @@ -294,3 +296,7 @@ return ret; } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK-LP64D: {{.*}} +// CHECK-LP64E: {{.*}} +// CHECK-LP64F: {{.*}} diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -3,6 +3,7 @@ // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64i -x c -E -dM %s \ // RUN: -o - | FileCheck %s +// CHECK-NOT: __riscv_32e // CHECK-NOT: __riscv_div {{.*$}} // CHECK-NOT: __riscv_m {{.*$}} // CHECK-NOT: __riscv_mul {{.*$}} @@ -79,6 +80,21 @@ // RUN: -o - | FileCheck %s // CHECK: __riscv_i 2001000{{$}} +// RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefixes=CHECK-E-EXT,CHECK-RV32E %s +// RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefixes=CHECK-E-EXT,CHECK-RV64E %s +// RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32i -mabi=ilp32e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ILP32E %s +// RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64i -mabi=lp64e -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-LP64E %s +// CHECK-RV32E: __riscv_32e 1 +// CHECK-RV64E: __riscv_64e 1 +// CHECK-E-EXT: __riscv_abi_rve 1 +// CHECK-E-EXT: __riscv_e 2000000{{$}} +// CHECK-ILP32E: __riscv_abi_rve 1 +// CHECK-LP64E: __riscv_abi_rve 1 + // RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32im -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-M-EXT %s // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64im -x c -E -dM %s \ diff --git a/llvm/include/llvm/Support/RISCVAttributes.h b/llvm/include/llvm/Support/RISCVAttributes.h --- a/llvm/include/llvm/Support/RISCVAttributes.h +++ b/llvm/include/llvm/Support/RISCVAttributes.h @@ -34,7 +34,7 @@ PRIV_SPEC_REVISION = 12, }; -enum StackAlign { ALIGN_4 = 4, ALIGN_16 = 16 }; +enum StackAlign { ALIGN_4 = 4, ALIGN_8 = 8, ALIGN_16 = 16 }; enum { NOT_ALLOWED = 0, ALLOWED = 1 }; diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -934,7 +934,6 @@ // Additional dependency checks. // TODO: The 'q' extension requires rv64. - // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'. return Error::success(); } @@ -1211,16 +1210,16 @@ StringRef RISCVISAInfo::computeDefaultABI() const { if (XLen == 32) { - if (hasExtension("d")) - return "ilp32d"; if (hasExtension("e")) return "ilp32e"; + if (hasExtension("d")) + return "ilp32d"; return "ilp32"; } else if (XLen == 64) { - if (hasExtension("d")) - return "lp64d"; if (hasExtension("e")) return "lp64e"; + if (hasExtension("d")) + return "lp64d"; return "lp64"; } llvm_unreachable("Invalid XLEN"); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -68,6 +68,11 @@ TargetABI = ABI_Unknown; } + if ((TargetABI == RISCVABI::ABI::ABI_ILP32E || + (TargetABI == ABI_Unknown && IsRVE && !IsRV64)) && + FeatureBits[RISCV::FeatureStdExtD]) + report_fatal_error("ILP32E must not be used with the D ISA extension"); + if (TargetABI != ABI_Unknown) return TargetABI; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -50,11 +50,14 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign) { - if (STI.hasFeature(RISCV::FeatureRVE)) - report_fatal_error("Codegen not yet implemented for RVE"); - - if (EmitStackAlign) - emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); + if (EmitStackAlign) { + if (STI.hasFeature(RISCV::FeatureRVE)) + emitAttribute(RISCVAttrs::STACK_ALIGN, STI.hasFeature(RISCV::Feature32Bit) + ? RISCVAttrs::ALIGN_4 + : RISCVAttrs::ALIGN_8); + else + emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); + } auto ParseResult = RISCVFeatures::parseFeatureBits( STI.hasFeature(RISCV::Feature64Bit), STI.getFeatureBits()); diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -13,8 +13,10 @@ // The RISC-V calling convention is handled with custom code in // RISCVISelLowering.cpp (CC_RISCV). +def CSR_ILP32E_LP64E : CalleeSavedRegs<(add X1, X3, X4, X8, X9)>; + def CSR_ILP32_LP64 - : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; + : CalleeSavedRegs<(add CSR_ILP32E_LP64E, (sequence "X%u", 18, 27))>; def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, @@ -63,3 +65,29 @@ (sequence "F%u_D", 28, 31), (sequence "F%u_D", 8, 9), (sequence "F%u_D", 18, 27))>; + +// Same as CSR_Interrupt, but excluding X16-X31. +def CSR_Interrupt_RVE : CalleeSavedRegs<(add X1, + (sequence "X%u", 3, 9), + (sequence "X%u", 10, 11), + (sequence "X%u", 12, 15))>; + +// Same as CSR_XLEN_F32_Interrupt, but excluding X16-X31. +def CSR_XLEN_F32_Interrupt_RVE: CalleeSavedRegs<(add + CSR_Interrupt_RVE, + (sequence "F%u_F", 0, 7), + (sequence "F%u_F", 10, 11), + (sequence "F%u_F", 12, 17), + (sequence "F%u_F", 28, 31), + (sequence "F%u_F", 8, 9), + (sequence "F%u_F", 18, 27))>; + +// Same as CSR_XLEN_F64_Interrupt, but excluding X16-X31. +def CSR_XLEN_F64_Interrupt_RVE: CalleeSavedRegs<(add + CSR_Interrupt_RVE, + (sequence "F%u_D", 0, 7), + (sequence "F%u_D", 10, 11), + (sequence "F%u_D", 12, 17), + (sequence "F%u_D", 28, 31), + (sequence "F%u_D", 8, 9), + (sequence "F%u_D", 18, 27))>; diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -17,6 +17,13 @@ AssemblerPredicate<(all_of FeatureStdExtZicsr), "'Zicsr' (CSRs)">; +def FeatureStdExtI + : SubtargetFeature<"i", "HasStdExtI", "true", + "'I' (Base Integer Instruction Set)">; +def HasStdExtI : Predicate<"Subtarget->hasStdExtI()">, + AssemblerPredicate<(all_of FeatureStdExtI), + "'I' (Base Integer Instruction Set)">; + def FeatureStdExtM : SubtargetFeature<"m", "HasStdExtM", "true", "'M' (Integer Multiplication and Division)">; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -21,12 +21,7 @@ class RISCVFrameLowering : public TargetFrameLowering { public: - explicit RISCVFrameLowering(const RISCVSubtarget &STI) - : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/Align(16), - /*LocalAreaOffset=*/0, - /*TransientStackAlignment=*/Align(16)), - STI(STI) {} + explicit RISCVFrameLowering(const RISCVSubtarget &STI); void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -27,6 +27,21 @@ using namespace llvm; +static Align getABIStackAlignment(RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E) + return Align(4); + if (ABI == RISCVABI::ABI_LP64E) + return Align(8); + return Align(16); +} + +RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, + getABIStackAlignment(STI.getTargetABI()), + /*LocalAreaOffset=*/0, + /*TransientStackAlignment=*/Align(16)), + STI(STI) {} + static const Register AllPopRegs[] = { RISCV::X1, RISCV::X8, RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23, RISCV::X24, @@ -513,7 +528,8 @@ if (int LibCallRegs = getLibCallID(MF, MFI.getCalleeSavedInfo()) + 1) { // Calculate the size of the frame managed by the libcall. The libcalls are // implemented such that the stack will always be 16 byte aligned. - unsigned LibCallFrameSize = alignTo((STI.getXLen() / 8) * LibCallRegs, 16); + unsigned LibCallFrameSize = + alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign()); RVFI->setLibCallStackSize(LibCallFrameSize); } @@ -984,6 +1000,7 @@ // unconditionally save all Caller-saved registers and // all FP registers, regardless whether they are used. MachineFrameInfo &MFI = MF.getFrameInfo(); + auto &Subtarget = MF.getSubtarget(); if (MF.getFunction().hasFnAttribute("interrupt") && MFI.hasCalls()) { @@ -995,9 +1012,10 @@ }; for (unsigned i = 0; CSRegs[i]; ++i) - SavedRegs.set(CSRegs[i]); + if (CSRegs[i] < RISCV::X16 || !Subtarget.isRVE()) + SavedRegs.set(CSRegs[i]); - if (MF.getSubtarget().hasStdExtF()) { + if (Subtarget.hasStdExtF()) { // If interrupt is enabled, this list contains all FP registers. const MCPhysReg * Regs = MF.getRegInfo().getCalleeSavedRegs(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -77,9 +77,6 @@ const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - if (Subtarget.isRVE()) - report_fatal_error("Codegen not yet implemented for RVE"); - RISCVABI::ABI ABI = Subtarget.getTargetABI(); assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); @@ -101,6 +98,8 @@ default: report_fatal_error("Don't know how to lower this ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64: @@ -14193,10 +14192,15 @@ // register-size fields in the same situations they would be for fixed // arguments. -static const MCPhysReg ArgGPRs[] = { - RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, - RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 -}; +// The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except +// the ILP32E ABI. +static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X16, RISCV::X17}; +// The GPRs used for passing arguments in the ILP32E/ILP64E ABI. +static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15}; +// The FPRs used for passing arguments in the ILP32F and LP64F ABIs. static const MCPhysReg ArgFPR16s[] = { RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H @@ -14205,6 +14209,7 @@ RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F }; +// The FPRs used for passing arguments in the ILP32D and LP64D ABIs. static const MCPhysReg ArgFPR64s[] = { RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D @@ -14221,21 +14226,62 @@ RISCV::V20M4}; static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; +// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used +// for save-restore libcall, so we don't use them. +static const MCPhysReg FastCCIGPRs[] = { + RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, + RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, + RISCV::X29, RISCV::X30, RISCV::X31}; + +// The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. +static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X7}; + +static ArrayRef getCallingConvArgGPRs(const RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(ArgEGPRs); + + return ArrayRef(ArgIGPRs); +} + +static ArrayRef getFastCCGPRs(const RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(FastCCEGPRs); + + return ArrayRef(FastCCIGPRs); +} + +static Register getCallingConvLastArgGPR(const RISCVABI::ABI ABI) { + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return RISCV::X15; + + return RISCV::X17; +} + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, - ISD::ArgFlagsTy ArgFlags2) { + ISD::ArgFlagsTy ArgFlags2, bool EABI) { unsigned XLenInBytes = XLen / 8; + const RISCVSubtarget &STI = + State.getMachineFunction().getSubtarget(); + ArrayRef ArgGPRs = getCallingConvArgGPRs(STI.getTargetABI()); + if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, VA1.getLocVT(), CCValAssign::Full)); } else { // Both halves must be passed on the stack, with proper alignment. + // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte + // alignment. This behavior may be changed when RV32E/ILP32E is ratified. Align StackAlign = - std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); + EABI && XLen == 32 + ? Align(XLenInBytes) + : std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); State.addLoc( CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), State.AllocateStack(XLenInBytes, StackAlign), @@ -14316,7 +14362,9 @@ default: llvm_unreachable("Unexpected ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_LP64: + case RISCVABI::ABI_LP64E: break; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: @@ -14347,6 +14395,8 @@ LocInfo = CCValAssign::BCvt; } + ArrayRef ArgGPRs = getCallingConvArgGPRs(ABI); + // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte // alignment (RV32) or 16-byte alignment (RV64). An aligned register should @@ -14354,9 +14404,13 @@ // legalisation or not. The argument will not be passed by registers if the // original type is larger than 2*XLEN, so the register alignment rule does // not apply. + // TODO: To be compatible with GCC's behaviors, we don't align registers + // currently if we are using ILP32E calling convention. This behavior may be + // changed when RV32E/ILP32E is ratified. unsigned TwoXLenInBytes = (2 * XLen) / 8; if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && - DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { + DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && + ABI != RISCVABI::ABI_ILP32E) { unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); // Skip 'odd' register if necessary. if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) @@ -14423,8 +14477,9 @@ ISD::ArgFlagsTy AF = PendingArgFlags[0]; PendingLocs.clear(); PendingArgFlags.clear(); - return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, - ArgFlags); + return CC_RISCVAssign2XLen( + XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags, + ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); } // Allocate to a register if possible, or else a stack slot. @@ -14702,6 +14757,7 @@ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const RISCVSubtarget &STI = MF.getSubtarget(); if (VA.isMemLoc()) { // f64 is passed on the stack. @@ -14718,7 +14774,7 @@ RegInfo.addLiveIn(VA.getLocReg(), LoVReg); SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); SDValue Hi; - if (VA.getLocReg() == RISCV::X17) { + if (VA.getLocReg() == getCallingConvLastArgGPR(STI.getTargetABI())) { // Second half of f64 is passed on the stack. int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); @@ -14742,15 +14798,8 @@ bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, std::optional FirstMaskArgument) { - - // X5 and X6 might be used for save-restore libcall. - static const MCPhysReg GPRList[] = { - RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, - RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, - RISCV::X29, RISCV::X30, RISCV::X31}; - if (LocVT == MVT::i32 || LocVT == MVT::i64) { - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (unsigned Reg = State.AllocateReg(getFastCCGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -14814,7 +14863,7 @@ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); } else { // Try and pass the address via a "fast" GPR. - if (unsigned GPRReg = State.AllocateReg(GPRList)) { + if (unsigned GPRReg = State.AllocateReg(getFastCCGPRs(ABI))) { LocInfo = CCValAssign::Indirect; LocVT = TLI.getSubtarget().getXLenVT(); State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); @@ -14901,6 +14950,8 @@ case CallingConv::Fast: break; case CallingConv::GHC: + if (Subtarget.isRVE()) + report_fatal_error("GHC calling convention is not supported on RVE!"); if (!Subtarget.hasStdExtF() || !Subtarget.hasStdExtD()) report_fatal_error( "GHC calling convention requires the F and D instruction set extensions"); @@ -14923,6 +14974,7 @@ EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); unsigned XLenInBytes = Subtarget.getXLen() / 8; + RISCVABI::ABI ABI = Subtarget.getTargetABI(); // Used with vargs to acumulate store chains. std::vector OutChains; @@ -14981,7 +15033,7 @@ MF.getInfo()->setIsVectorCall(); if (IsVarArg) { - ArrayRef ArgRegs = ArrayRef(ArgGPRs); + ArrayRef ArgRegs = getCallingConvArgGPRs(ABI); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -15135,9 +15187,11 @@ SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - if (CallConv == CallingConv::GHC) + if (CallConv == CallingConv::GHC) { + if (Subtarget.isRVE()) + report_fatal_error("GHC calling convention is not supported on RVE!"); ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC); - else + } else analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV); @@ -15202,7 +15256,7 @@ Register RegLo = VA.getLocReg(); RegsToPass.push_back(std::make_pair(RegLo, Lo)); - if (RegLo == RISCV::X17) { + if (RegLo == getCallingConvLastArgGPR(Subtarget.getTargetABI())) { // Second half of f64 is passed on the stack. // Work out the address of the stack slot. if (!StackPtr.getNode()) @@ -15403,9 +15457,9 @@ Glue = RetValue.getValue(2); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { - assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); + assert(VA.getLocReg() == RISCV::X10 && "Unexpected reg assignment"); SDValue RetValue2 = - DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); + DAG.getCopyFromReg(Chain, DL, RISCV::X11, MVT::i32, Glue); Chain = RetValue2.getValue(1); Glue = RetValue2.getValue(2); RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -62,13 +62,18 @@ if (Subtarget.hasStdExtD()) return CSR_XLEN_F64_Interrupt_SaveList; if (Subtarget.hasStdExtF()) - return CSR_XLEN_F32_Interrupt_SaveList; - return CSR_Interrupt_SaveList; + return Subtarget.isRVE() ? CSR_XLEN_F32_Interrupt_RVE_SaveList + : CSR_XLEN_F32_Interrupt_SaveList; + return Subtarget.isRVE() ? CSR_Interrupt_RVE_SaveList + : CSR_Interrupt_SaveList; } switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: + return CSR_ILP32E_LP64E_SaveList; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_SaveList; @@ -82,12 +87,13 @@ } BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + const RISCVSubtarget &STI = MF.getSubtarget(); const RISCVFrameLowering *TFI = getFrameLowering(MF); BitVector Reserved(getNumRegs()); // Mark any registers requested to be reserved as such for (size_t Reg = 0; Reg < getNumRegs(); Reg++) { - if (MF.getSubtarget().isRegisterReservedByUser(Reg)) + if (STI.isRegisterReservedByUser(Reg)) markSuperRegs(Reserved, Reg); } @@ -103,6 +109,11 @@ if (TFI->hasBP(MF)) markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp + // There are only 16 GPRs for RVE. + if (STI.isRVE()) + for (size_t Reg = RISCV::X16; Reg <= RISCV::X31; Reg++) + markSuperRegs(Reserved, Reg); + // V registers for code generation. We handle them manually. markSuperRegs(Reserved, RISCV::VL); markSuperRegs(Reserved, RISCV::VTYPE); @@ -634,6 +645,9 @@ switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); + case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: + return CSR_ILP32E_LP64E_RegMask; case RISCVABI::ABI_ILP32: case RISCVABI::ABI_LP64: return CSR_ILP32_LP64_RegMask; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -92,10 +92,20 @@ initializeRISCVPushPopOptPass(*PR); } -static StringRef computeDataLayout(const Triple &TT) { - if (TT.isArch64Bit()) +static StringRef computeDataLayout(const Triple &TT, + const TargetOptions &Options) { + StringRef ABIName = Options.MCOptions.getABIName(); + if (TT.isArch64Bit()) { + if (ABIName == "lp64e") + return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S64"; + return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; + } assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); + + if (ABIName == "ilp32e") + return "e-m:e-p:32:32-i64:64-n32-S32"; + return "e-m:e-p:32:32-i64:64-n32-S128"; } @@ -110,7 +120,7 @@ std::optional RM, std::optional CM, CodeGenOpt::Level OL, bool JIT) - : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + : LLVMTargetMachine(T, computeDataLayout(TT, Options), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TLOF(std::make_unique()) { diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32 +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=ILP32E ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=LP64 +; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=LP64E ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32F ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \ @@ -14,8 +18,8 @@ @var = global [32 x float] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. -; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. +; All floating point registers are temporaries for the ilp32, ilp32e, lp64e and lp64 +; ABIs. fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns ; something appropriate. @@ -91,6 +95,76 @@ ; ILP32-NEXT: fsw fa5, %lo(var)(a0) ; ILP32-NEXT: ret ; +; ILP32E-LABEL: callee: +; ILP32E: # %bb.0: +; ILP32E-NEXT: lui a0, %hi(var) +; ILP32E-NEXT: flw fa5, %lo(var)(a0) +; ILP32E-NEXT: flw fa4, %lo(var+4)(a0) +; ILP32E-NEXT: flw fa3, %lo(var+8)(a0) +; ILP32E-NEXT: flw fa2, %lo(var+12)(a0) +; ILP32E-NEXT: addi a1, a0, %lo(var) +; ILP32E-NEXT: flw fa1, 16(a1) +; ILP32E-NEXT: flw fa0, 20(a1) +; ILP32E-NEXT: flw ft0, 24(a1) +; ILP32E-NEXT: flw ft1, 28(a1) +; ILP32E-NEXT: flw ft2, 32(a1) +; ILP32E-NEXT: flw ft3, 36(a1) +; ILP32E-NEXT: flw ft4, 40(a1) +; ILP32E-NEXT: flw ft5, 44(a1) +; ILP32E-NEXT: flw ft6, 48(a1) +; ILP32E-NEXT: flw ft7, 52(a1) +; ILP32E-NEXT: flw fa6, 56(a1) +; ILP32E-NEXT: flw fa7, 60(a1) +; ILP32E-NEXT: flw ft8, 64(a1) +; ILP32E-NEXT: flw ft9, 68(a1) +; ILP32E-NEXT: flw ft10, 72(a1) +; ILP32E-NEXT: flw ft11, 76(a1) +; ILP32E-NEXT: flw fs0, 80(a1) +; ILP32E-NEXT: flw fs1, 84(a1) +; ILP32E-NEXT: flw fs2, 88(a1) +; ILP32E-NEXT: flw fs3, 92(a1) +; ILP32E-NEXT: flw fs4, 96(a1) +; ILP32E-NEXT: flw fs5, 100(a1) +; ILP32E-NEXT: flw fs6, 104(a1) +; ILP32E-NEXT: flw fs7, 108(a1) +; ILP32E-NEXT: flw fs8, 124(a1) +; ILP32E-NEXT: flw fs9, 120(a1) +; ILP32E-NEXT: flw fs10, 116(a1) +; ILP32E-NEXT: flw fs11, 112(a1) +; ILP32E-NEXT: fsw fs8, 124(a1) +; ILP32E-NEXT: fsw fs9, 120(a1) +; ILP32E-NEXT: fsw fs10, 116(a1) +; ILP32E-NEXT: fsw fs11, 112(a1) +; ILP32E-NEXT: fsw fs7, 108(a1) +; ILP32E-NEXT: fsw fs6, 104(a1) +; ILP32E-NEXT: fsw fs5, 100(a1) +; ILP32E-NEXT: fsw fs4, 96(a1) +; ILP32E-NEXT: fsw fs3, 92(a1) +; ILP32E-NEXT: fsw fs2, 88(a1) +; ILP32E-NEXT: fsw fs1, 84(a1) +; ILP32E-NEXT: fsw fs0, 80(a1) +; ILP32E-NEXT: fsw ft11, 76(a1) +; ILP32E-NEXT: fsw ft10, 72(a1) +; ILP32E-NEXT: fsw ft9, 68(a1) +; ILP32E-NEXT: fsw ft8, 64(a1) +; ILP32E-NEXT: fsw fa7, 60(a1) +; ILP32E-NEXT: fsw fa6, 56(a1) +; ILP32E-NEXT: fsw ft7, 52(a1) +; ILP32E-NEXT: fsw ft6, 48(a1) +; ILP32E-NEXT: fsw ft5, 44(a1) +; ILP32E-NEXT: fsw ft4, 40(a1) +; ILP32E-NEXT: fsw ft3, 36(a1) +; ILP32E-NEXT: fsw ft2, 32(a1) +; ILP32E-NEXT: fsw ft1, 28(a1) +; ILP32E-NEXT: fsw ft0, 24(a1) +; ILP32E-NEXT: fsw fa0, 20(a1) +; ILP32E-NEXT: fsw fa1, 16(a1) +; ILP32E-NEXT: fsw fa2, %lo(var+12)(a0) +; ILP32E-NEXT: fsw fa3, %lo(var+8)(a0) +; ILP32E-NEXT: fsw fa4, %lo(var+4)(a0) +; ILP32E-NEXT: fsw fa5, %lo(var)(a0) +; ILP32E-NEXT: ret +; ; LP64-LABEL: callee: ; LP64: # %bb.0: ; LP64-NEXT: lui a0, %hi(var) @@ -161,6 +235,76 @@ ; LP64-NEXT: fsw fa5, %lo(var)(a0) ; LP64-NEXT: ret ; +; LP64E-LABEL: callee: +; LP64E: # %bb.0: +; LP64E-NEXT: lui a0, %hi(var) +; LP64E-NEXT: flw fa5, %lo(var)(a0) +; LP64E-NEXT: flw fa4, %lo(var+4)(a0) +; LP64E-NEXT: flw fa3, %lo(var+8)(a0) +; LP64E-NEXT: flw fa2, %lo(var+12)(a0) +; LP64E-NEXT: addi a1, a0, %lo(var) +; LP64E-NEXT: flw fa1, 16(a1) +; LP64E-NEXT: flw fa0, 20(a1) +; LP64E-NEXT: flw ft0, 24(a1) +; LP64E-NEXT: flw ft1, 28(a1) +; LP64E-NEXT: flw ft2, 32(a1) +; LP64E-NEXT: flw ft3, 36(a1) +; LP64E-NEXT: flw ft4, 40(a1) +; LP64E-NEXT: flw ft5, 44(a1) +; LP64E-NEXT: flw ft6, 48(a1) +; LP64E-NEXT: flw ft7, 52(a1) +; LP64E-NEXT: flw fa6, 56(a1) +; LP64E-NEXT: flw fa7, 60(a1) +; LP64E-NEXT: flw ft8, 64(a1) +; LP64E-NEXT: flw ft9, 68(a1) +; LP64E-NEXT: flw ft10, 72(a1) +; LP64E-NEXT: flw ft11, 76(a1) +; LP64E-NEXT: flw fs0, 80(a1) +; LP64E-NEXT: flw fs1, 84(a1) +; LP64E-NEXT: flw fs2, 88(a1) +; LP64E-NEXT: flw fs3, 92(a1) +; LP64E-NEXT: flw fs4, 96(a1) +; LP64E-NEXT: flw fs5, 100(a1) +; LP64E-NEXT: flw fs6, 104(a1) +; LP64E-NEXT: flw fs7, 108(a1) +; LP64E-NEXT: flw fs8, 124(a1) +; LP64E-NEXT: flw fs9, 120(a1) +; LP64E-NEXT: flw fs10, 116(a1) +; LP64E-NEXT: flw fs11, 112(a1) +; LP64E-NEXT: fsw fs8, 124(a1) +; LP64E-NEXT: fsw fs9, 120(a1) +; LP64E-NEXT: fsw fs10, 116(a1) +; LP64E-NEXT: fsw fs11, 112(a1) +; LP64E-NEXT: fsw fs7, 108(a1) +; LP64E-NEXT: fsw fs6, 104(a1) +; LP64E-NEXT: fsw fs5, 100(a1) +; LP64E-NEXT: fsw fs4, 96(a1) +; LP64E-NEXT: fsw fs3, 92(a1) +; LP64E-NEXT: fsw fs2, 88(a1) +; LP64E-NEXT: fsw fs1, 84(a1) +; LP64E-NEXT: fsw fs0, 80(a1) +; LP64E-NEXT: fsw ft11, 76(a1) +; LP64E-NEXT: fsw ft10, 72(a1) +; LP64E-NEXT: fsw ft9, 68(a1) +; LP64E-NEXT: fsw ft8, 64(a1) +; LP64E-NEXT: fsw fa7, 60(a1) +; LP64E-NEXT: fsw fa6, 56(a1) +; LP64E-NEXT: fsw ft7, 52(a1) +; LP64E-NEXT: fsw ft6, 48(a1) +; LP64E-NEXT: fsw ft5, 44(a1) +; LP64E-NEXT: fsw ft4, 40(a1) +; LP64E-NEXT: fsw ft3, 36(a1) +; LP64E-NEXT: fsw ft2, 32(a1) +; LP64E-NEXT: fsw ft1, 28(a1) +; LP64E-NEXT: fsw ft0, 24(a1) +; LP64E-NEXT: fsw fa0, 20(a1) +; LP64E-NEXT: fsw fa1, 16(a1) +; LP64E-NEXT: fsw fa2, %lo(var+12)(a0) +; LP64E-NEXT: fsw fa3, %lo(var+8)(a0) +; LP64E-NEXT: fsw fa4, %lo(var+4)(a0) +; LP64E-NEXT: fsw fa5, %lo(var)(a0) +; LP64E-NEXT: ret +; ; ILP32F-LABEL: callee: ; ILP32F: # %bb.0: ; ILP32F-NEXT: addi sp, sp, -48 @@ -700,6 +844,149 @@ ; ILP32-NEXT: addi sp, sp, 144 ; ILP32-NEXT: ret ; +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: +; ILP32E-NEXT: addi sp, sp, -140 +; ILP32E-NEXT: sw ra, 136(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 132(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s1, 128(sp) # 4-byte Folded Spill +; ILP32E-NEXT: lui s0, %hi(var) +; ILP32E-NEXT: flw fa5, %lo(var)(s0) +; ILP32E-NEXT: fsw fa5, 124(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, %lo(var+4)(s0) +; ILP32E-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, %lo(var+8)(s0) +; ILP32E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, %lo(var+12)(s0) +; ILP32E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill +; ILP32E-NEXT: addi s1, s0, %lo(var) +; ILP32E-NEXT: flw fa5, 16(s1) +; ILP32E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 20(s1) +; ILP32E-NEXT: fsw fa5, 104(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 24(s1) +; ILP32E-NEXT: fsw fa5, 100(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 28(s1) +; ILP32E-NEXT: fsw fa5, 96(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 32(s1) +; ILP32E-NEXT: fsw fa5, 92(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 36(s1) +; ILP32E-NEXT: fsw fa5, 88(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 40(s1) +; ILP32E-NEXT: fsw fa5, 84(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 44(s1) +; ILP32E-NEXT: fsw fa5, 80(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 48(s1) +; ILP32E-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 52(s1) +; ILP32E-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 56(s1) +; ILP32E-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 60(s1) +; ILP32E-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 64(s1) +; ILP32E-NEXT: fsw fa5, 60(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 68(s1) +; ILP32E-NEXT: fsw fa5, 56(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 72(s1) +; ILP32E-NEXT: fsw fa5, 52(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 76(s1) +; ILP32E-NEXT: fsw fa5, 48(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 80(s1) +; ILP32E-NEXT: fsw fa5, 44(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 84(s1) +; ILP32E-NEXT: fsw fa5, 40(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 88(s1) +; ILP32E-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 92(s1) +; ILP32E-NEXT: fsw fa5, 32(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 96(s1) +; ILP32E-NEXT: fsw fa5, 28(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 100(s1) +; ILP32E-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 104(s1) +; ILP32E-NEXT: fsw fa5, 20(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 108(s1) +; ILP32E-NEXT: fsw fa5, 16(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 112(s1) +; ILP32E-NEXT: fsw fa5, 12(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 116(s1) +; ILP32E-NEXT: fsw fa5, 8(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 120(s1) +; ILP32E-NEXT: fsw fa5, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: flw fa5, 124(s1) +; ILP32E-NEXT: fsw fa5, 0(sp) # 4-byte Folded Spill +; ILP32E-NEXT: call callee@plt +; ILP32E-NEXT: flw fa5, 0(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 124(s1) +; ILP32E-NEXT: flw fa5, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 120(s1) +; ILP32E-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 116(s1) +; ILP32E-NEXT: flw fa5, 12(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 112(s1) +; ILP32E-NEXT: flw fa5, 16(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 108(s1) +; ILP32E-NEXT: flw fa5, 20(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 104(s1) +; ILP32E-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 100(s1) +; ILP32E-NEXT: flw fa5, 28(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 96(s1) +; ILP32E-NEXT: flw fa5, 32(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 92(s1) +; ILP32E-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 88(s1) +; ILP32E-NEXT: flw fa5, 40(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 84(s1) +; ILP32E-NEXT: flw fa5, 44(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 80(s1) +; ILP32E-NEXT: flw fa5, 48(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 76(s1) +; ILP32E-NEXT: flw fa5, 52(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 72(s1) +; ILP32E-NEXT: flw fa5, 56(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 68(s1) +; ILP32E-NEXT: flw fa5, 60(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 64(s1) +; ILP32E-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 60(s1) +; ILP32E-NEXT: flw fa5, 68(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 56(s1) +; ILP32E-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 52(s1) +; ILP32E-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 48(s1) +; ILP32E-NEXT: flw fa5, 80(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 44(s1) +; ILP32E-NEXT: flw fa5, 84(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 40(s1) +; ILP32E-NEXT: flw fa5, 88(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 36(s1) +; ILP32E-NEXT: flw fa5, 92(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 32(s1) +; ILP32E-NEXT: flw fa5, 96(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 28(s1) +; ILP32E-NEXT: flw fa5, 100(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 24(s1) +; ILP32E-NEXT: flw fa5, 104(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 20(s1) +; ILP32E-NEXT: flw fa5, 108(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, 16(s1) +; ILP32E-NEXT: flw fa5, 112(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, %lo(var+12)(s0) +; ILP32E-NEXT: flw fa5, 116(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, %lo(var+8)(s0) +; ILP32E-NEXT: flw fa5, 120(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, %lo(var+4)(s0) +; ILP32E-NEXT: flw fa5, 124(sp) # 4-byte Folded Reload +; ILP32E-NEXT: fsw fa5, %lo(var)(s0) +; ILP32E-NEXT: lw ra, 136(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s0, 132(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s1, 128(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 140 +; ILP32E-NEXT: ret +; ; LP64-LABEL: caller: ; LP64: # %bb.0: ; LP64-NEXT: addi sp, sp, -160 @@ -843,6 +1130,149 @@ ; LP64-NEXT: addi sp, sp, 160 ; LP64-NEXT: ret ; +; LP64E-LABEL: caller: +; LP64E: # %bb.0: +; LP64E-NEXT: addi sp, sp, -152 +; LP64E-NEXT: sd ra, 144(sp) # 8-byte Folded Spill +; LP64E-NEXT: sd s0, 136(sp) # 8-byte Folded Spill +; LP64E-NEXT: sd s1, 128(sp) # 8-byte Folded Spill +; LP64E-NEXT: lui s0, %hi(var) +; LP64E-NEXT: flw fa5, %lo(var)(s0) +; LP64E-NEXT: fsw fa5, 124(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, %lo(var+4)(s0) +; LP64E-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, %lo(var+8)(s0) +; LP64E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, %lo(var+12)(s0) +; LP64E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill +; LP64E-NEXT: addi s1, s0, %lo(var) +; LP64E-NEXT: flw fa5, 16(s1) +; LP64E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 20(s1) +; LP64E-NEXT: fsw fa5, 104(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 24(s1) +; LP64E-NEXT: fsw fa5, 100(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 28(s1) +; LP64E-NEXT: fsw fa5, 96(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 32(s1) +; LP64E-NEXT: fsw fa5, 92(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 36(s1) +; LP64E-NEXT: fsw fa5, 88(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 40(s1) +; LP64E-NEXT: fsw fa5, 84(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 44(s1) +; LP64E-NEXT: fsw fa5, 80(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 48(s1) +; LP64E-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 52(s1) +; LP64E-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 56(s1) +; LP64E-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 60(s1) +; LP64E-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 64(s1) +; LP64E-NEXT: fsw fa5, 60(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 68(s1) +; LP64E-NEXT: fsw fa5, 56(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 72(s1) +; LP64E-NEXT: fsw fa5, 52(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 76(s1) +; LP64E-NEXT: fsw fa5, 48(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 80(s1) +; LP64E-NEXT: fsw fa5, 44(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 84(s1) +; LP64E-NEXT: fsw fa5, 40(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 88(s1) +; LP64E-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 92(s1) +; LP64E-NEXT: fsw fa5, 32(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 96(s1) +; LP64E-NEXT: fsw fa5, 28(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 100(s1) +; LP64E-NEXT: fsw fa5, 24(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 104(s1) +; LP64E-NEXT: fsw fa5, 20(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 108(s1) +; LP64E-NEXT: fsw fa5, 16(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 112(s1) +; LP64E-NEXT: fsw fa5, 12(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 116(s1) +; LP64E-NEXT: fsw fa5, 8(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 120(s1) +; LP64E-NEXT: fsw fa5, 4(sp) # 4-byte Folded Spill +; LP64E-NEXT: flw fa5, 124(s1) +; LP64E-NEXT: fsw fa5, 0(sp) # 4-byte Folded Spill +; LP64E-NEXT: call callee@plt +; LP64E-NEXT: flw fa5, 0(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 124(s1) +; LP64E-NEXT: flw fa5, 4(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 120(s1) +; LP64E-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 116(s1) +; LP64E-NEXT: flw fa5, 12(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 112(s1) +; LP64E-NEXT: flw fa5, 16(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 108(s1) +; LP64E-NEXT: flw fa5, 20(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 104(s1) +; LP64E-NEXT: flw fa5, 24(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 100(s1) +; LP64E-NEXT: flw fa5, 28(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 96(s1) +; LP64E-NEXT: flw fa5, 32(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 92(s1) +; LP64E-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 88(s1) +; LP64E-NEXT: flw fa5, 40(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 84(s1) +; LP64E-NEXT: flw fa5, 44(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 80(s1) +; LP64E-NEXT: flw fa5, 48(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 76(s1) +; LP64E-NEXT: flw fa5, 52(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 72(s1) +; LP64E-NEXT: flw fa5, 56(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 68(s1) +; LP64E-NEXT: flw fa5, 60(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 64(s1) +; LP64E-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 60(s1) +; LP64E-NEXT: flw fa5, 68(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 56(s1) +; LP64E-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 52(s1) +; LP64E-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 48(s1) +; LP64E-NEXT: flw fa5, 80(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 44(s1) +; LP64E-NEXT: flw fa5, 84(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 40(s1) +; LP64E-NEXT: flw fa5, 88(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 36(s1) +; LP64E-NEXT: flw fa5, 92(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 32(s1) +; LP64E-NEXT: flw fa5, 96(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 28(s1) +; LP64E-NEXT: flw fa5, 100(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 24(s1) +; LP64E-NEXT: flw fa5, 104(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 20(s1) +; LP64E-NEXT: flw fa5, 108(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, 16(s1) +; LP64E-NEXT: flw fa5, 112(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, %lo(var+12)(s0) +; LP64E-NEXT: flw fa5, 116(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, %lo(var+8)(s0) +; LP64E-NEXT: flw fa5, 120(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, %lo(var+4)(s0) +; LP64E-NEXT: flw fa5, 124(sp) # 4-byte Folded Reload +; LP64E-NEXT: fsw fa5, %lo(var)(s0) +; LP64E-NEXT: ld ra, 144(sp) # 8-byte Folded Reload +; LP64E-NEXT: ld s0, 136(sp) # 8-byte Folded Reload +; LP64E-NEXT: ld s1, 128(sp) # 8-byte Folded Reload +; LP64E-NEXT: addi sp, sp, 152 +; LP64E-NEXT: ret +; ; ILP32F-LABEL: caller: ; ILP32F: # %bb.0: ; ILP32F-NEXT: addi sp, sp, -144 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck %s -check-prefix=ILP32 ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs< %s \ ; RUN: | FileCheck %s -check-prefix=LP64 +; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64e -verify-machineinstrs< %s \ +; RUN: | FileCheck %s -check-prefix=LP64E ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=ILP32D ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64d -verify-machineinstrs < %s \ @@ -10,7 +12,7 @@ @var = global [32 x double] zeroinitializer -; All floating point registers are temporaries for the ilp32 and lp64 ABIs. +; All floating point registers are temporaries for the ilp32, lp64e and lp64 ABIs. ; fs0-fs11 are callee-saved for the ilp32f, ilp32d, lp64f, and lp64d ABIs. ; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns @@ -157,6 +159,76 @@ ; LP64-NEXT: fsd fa5, %lo(var)(a0) ; LP64-NEXT: ret ; +; LP64E-LABEL: callee: +; LP64E: # %bb.0: +; LP64E-NEXT: lui a0, %hi(var) +; LP64E-NEXT: fld fa5, %lo(var)(a0) +; LP64E-NEXT: fld fa4, %lo(var+8)(a0) +; LP64E-NEXT: addi a1, a0, %lo(var) +; LP64E-NEXT: fld fa3, 16(a1) +; LP64E-NEXT: fld fa2, 24(a1) +; LP64E-NEXT: fld fa1, 32(a1) +; LP64E-NEXT: fld fa0, 40(a1) +; LP64E-NEXT: fld ft0, 48(a1) +; LP64E-NEXT: fld ft1, 56(a1) +; LP64E-NEXT: fld ft2, 64(a1) +; LP64E-NEXT: fld ft3, 72(a1) +; LP64E-NEXT: fld ft4, 80(a1) +; LP64E-NEXT: fld ft5, 88(a1) +; LP64E-NEXT: fld ft6, 96(a1) +; LP64E-NEXT: fld ft7, 104(a1) +; LP64E-NEXT: fld fa6, 112(a1) +; LP64E-NEXT: fld fa7, 120(a1) +; LP64E-NEXT: fld ft8, 128(a1) +; LP64E-NEXT: fld ft9, 136(a1) +; LP64E-NEXT: fld ft10, 144(a1) +; LP64E-NEXT: fld ft11, 152(a1) +; LP64E-NEXT: fld fs0, 160(a1) +; LP64E-NEXT: fld fs1, 168(a1) +; LP64E-NEXT: fld fs2, 176(a1) +; LP64E-NEXT: fld fs3, 184(a1) +; LP64E-NEXT: fld fs4, 192(a1) +; LP64E-NEXT: fld fs5, 200(a1) +; LP64E-NEXT: fld fs6, 208(a1) +; LP64E-NEXT: fld fs7, 216(a1) +; LP64E-NEXT: fld fs8, 248(a1) +; LP64E-NEXT: fld fs9, 240(a1) +; LP64E-NEXT: fld fs10, 232(a1) +; LP64E-NEXT: fld fs11, 224(a1) +; LP64E-NEXT: fsd fs8, 248(a1) +; LP64E-NEXT: fsd fs9, 240(a1) +; LP64E-NEXT: fsd fs10, 232(a1) +; LP64E-NEXT: fsd fs11, 224(a1) +; LP64E-NEXT: fsd fs7, 216(a1) +; LP64E-NEXT: fsd fs6, 208(a1) +; LP64E-NEXT: fsd fs5, 200(a1) +; LP64E-NEXT: fsd fs4, 192(a1) +; LP64E-NEXT: fsd fs3, 184(a1) +; LP64E-NEXT: fsd fs2, 176(a1) +; LP64E-NEXT: fsd fs1, 168(a1) +; LP64E-NEXT: fsd fs0, 160(a1) +; LP64E-NEXT: fsd ft11, 152(a1) +; LP64E-NEXT: fsd ft10, 144(a1) +; LP64E-NEXT: fsd ft9, 136(a1) +; LP64E-NEXT: fsd ft8, 128(a1) +; LP64E-NEXT: fsd fa7, 120(a1) +; LP64E-NEXT: fsd fa6, 112(a1) +; LP64E-NEXT: fsd ft7, 104(a1) +; LP64E-NEXT: fsd ft6, 96(a1) +; LP64E-NEXT: fsd ft5, 88(a1) +; LP64E-NEXT: fsd ft4, 80(a1) +; LP64E-NEXT: fsd ft3, 72(a1) +; LP64E-NEXT: fsd ft2, 64(a1) +; LP64E-NEXT: fsd ft1, 56(a1) +; LP64E-NEXT: fsd ft0, 48(a1) +; LP64E-NEXT: fsd fa0, 40(a1) +; LP64E-NEXT: fsd fa1, 32(a1) +; LP64E-NEXT: fsd fa2, 24(a1) +; LP64E-NEXT: fsd fa3, 16(a1) +; LP64E-NEXT: fsd fa4, %lo(var+8)(a0) +; LP64E-NEXT: fsd fa5, %lo(var)(a0) +; LP64E-NEXT: ret +; ; ILP32D-LABEL: callee: ; ILP32D: # %bb.0: ; ILP32D-NEXT: addi sp, sp, -96 @@ -647,6 +719,149 @@ ; LP64-NEXT: addi sp, sp, 288 ; LP64-NEXT: ret ; +; LP64E-LABEL: caller: +; LP64E: # %bb.0: +; LP64E-NEXT: addi sp, sp, -280 +; LP64E-NEXT: sd ra, 272(sp) # 8-byte Folded Spill +; LP64E-NEXT: sd s0, 264(sp) # 8-byte Folded Spill +; LP64E-NEXT: sd s1, 256(sp) # 8-byte Folded Spill +; LP64E-NEXT: lui s0, %hi(var) +; LP64E-NEXT: fld fa5, %lo(var)(s0) +; LP64E-NEXT: fsd fa5, 248(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, %lo(var+8)(s0) +; LP64E-NEXT: fsd fa5, 240(sp) # 8-byte Folded Spill +; LP64E-NEXT: addi s1, s0, %lo(var) +; LP64E-NEXT: fld fa5, 16(s1) +; LP64E-NEXT: fsd fa5, 232(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 24(s1) +; LP64E-NEXT: fsd fa5, 224(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 32(s1) +; LP64E-NEXT: fsd fa5, 216(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 40(s1) +; LP64E-NEXT: fsd fa5, 208(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 48(s1) +; LP64E-NEXT: fsd fa5, 200(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 56(s1) +; LP64E-NEXT: fsd fa5, 192(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 64(s1) +; LP64E-NEXT: fsd fa5, 184(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 72(s1) +; LP64E-NEXT: fsd fa5, 176(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 80(s1) +; LP64E-NEXT: fsd fa5, 168(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 88(s1) +; LP64E-NEXT: fsd fa5, 160(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 96(s1) +; LP64E-NEXT: fsd fa5, 152(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 104(s1) +; LP64E-NEXT: fsd fa5, 144(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 112(s1) +; LP64E-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 120(s1) +; LP64E-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 128(s1) +; LP64E-NEXT: fsd fa5, 120(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 136(s1) +; LP64E-NEXT: fsd fa5, 112(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 144(s1) +; LP64E-NEXT: fsd fa5, 104(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 152(s1) +; LP64E-NEXT: fsd fa5, 96(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 160(s1) +; LP64E-NEXT: fsd fa5, 88(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 168(s1) +; LP64E-NEXT: fsd fa5, 80(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 176(s1) +; LP64E-NEXT: fsd fa5, 72(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 184(s1) +; LP64E-NEXT: fsd fa5, 64(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 192(s1) +; LP64E-NEXT: fsd fa5, 56(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 200(s1) +; LP64E-NEXT: fsd fa5, 48(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 208(s1) +; LP64E-NEXT: fsd fa5, 40(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 216(s1) +; LP64E-NEXT: fsd fa5, 32(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 224(s1) +; LP64E-NEXT: fsd fa5, 24(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 232(s1) +; LP64E-NEXT: fsd fa5, 16(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 240(s1) +; LP64E-NEXT: fsd fa5, 8(sp) # 8-byte Folded Spill +; LP64E-NEXT: fld fa5, 248(s1) +; LP64E-NEXT: fsd fa5, 0(sp) # 8-byte Folded Spill +; LP64E-NEXT: call callee@plt +; LP64E-NEXT: fld fa5, 0(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 248(s1) +; LP64E-NEXT: fld fa5, 8(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 240(s1) +; LP64E-NEXT: fld fa5, 16(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 232(s1) +; LP64E-NEXT: fld fa5, 24(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 224(s1) +; LP64E-NEXT: fld fa5, 32(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 216(s1) +; LP64E-NEXT: fld fa5, 40(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 208(s1) +; LP64E-NEXT: fld fa5, 48(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 200(s1) +; LP64E-NEXT: fld fa5, 56(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 192(s1) +; LP64E-NEXT: fld fa5, 64(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 184(s1) +; LP64E-NEXT: fld fa5, 72(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 176(s1) +; LP64E-NEXT: fld fa5, 80(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 168(s1) +; LP64E-NEXT: fld fa5, 88(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 160(s1) +; LP64E-NEXT: fld fa5, 96(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 152(s1) +; LP64E-NEXT: fld fa5, 104(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 144(s1) +; LP64E-NEXT: fld fa5, 112(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 136(s1) +; LP64E-NEXT: fld fa5, 120(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 128(s1) +; LP64E-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 120(s1) +; LP64E-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 112(s1) +; LP64E-NEXT: fld fa5, 144(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 104(s1) +; LP64E-NEXT: fld fa5, 152(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 96(s1) +; LP64E-NEXT: fld fa5, 160(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 88(s1) +; LP64E-NEXT: fld fa5, 168(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 80(s1) +; LP64E-NEXT: fld fa5, 176(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 72(s1) +; LP64E-NEXT: fld fa5, 184(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 64(s1) +; LP64E-NEXT: fld fa5, 192(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 56(s1) +; LP64E-NEXT: fld fa5, 200(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 48(s1) +; LP64E-NEXT: fld fa5, 208(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 40(s1) +; LP64E-NEXT: fld fa5, 216(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 32(s1) +; LP64E-NEXT: fld fa5, 224(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 24(s1) +; LP64E-NEXT: fld fa5, 232(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, 16(s1) +; LP64E-NEXT: fld fa5, 240(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, %lo(var+8)(s0) +; LP64E-NEXT: fld fa5, 248(sp) # 8-byte Folded Reload +; LP64E-NEXT: fsd fa5, %lo(var)(s0) +; LP64E-NEXT: ld ra, 272(sp) # 8-byte Folded Reload +; LP64E-NEXT: ld s0, 264(sp) # 8-byte Folded Reload +; LP64E-NEXT: ld s1, 256(sp) # 8-byte Folded Reload +; LP64E-NEXT: addi sp, sp, 280 +; LP64E-NEXT: ret +; ; ILP32D-LABEL: caller: ; ILP32D: # %bb.0: ; ILP32D-NEXT: addi sp, sp, -272 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32f -verify-machineinstrs < %s \ @@ -15,6 +17,8 @@ ; RUN: -frame-pointer=all < %s | FileCheck %s -check-prefixes=RV32IZCMP-WITH-FP ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-LP64E ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64f -verify-machineinstrs < %s \ @@ -144,6 +148,96 @@ ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: callee: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -48 +; RV32I-ILP32E-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a7, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a7) +; RV32I-ILP32E-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a7) +; RV32I-ILP32E-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a7) +; RV32I-ILP32E-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a7) +; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi a5, a7, %lo(var) +; RV32I-ILP32E-NEXT: lw a0, 16(a5) +; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 20(a5) +; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw t0, 24(a5) +; RV32I-ILP32E-NEXT: lw t1, 28(a5) +; RV32I-ILP32E-NEXT: lw t2, 32(a5) +; RV32I-ILP32E-NEXT: lw t3, 36(a5) +; RV32I-ILP32E-NEXT: lw t4, 40(a5) +; RV32I-ILP32E-NEXT: lw t5, 44(a5) +; RV32I-ILP32E-NEXT: lw t6, 48(a5) +; RV32I-ILP32E-NEXT: lw s2, 52(a5) +; RV32I-ILP32E-NEXT: lw s3, 56(a5) +; RV32I-ILP32E-NEXT: lw s4, 60(a5) +; RV32I-ILP32E-NEXT: lw s5, 64(a5) +; RV32I-ILP32E-NEXT: lw s6, 68(a5) +; RV32I-ILP32E-NEXT: lw s7, 72(a5) +; RV32I-ILP32E-NEXT: lw s8, 76(a5) +; RV32I-ILP32E-NEXT: lw s9, 80(a5) +; RV32I-ILP32E-NEXT: lw s10, 84(a5) +; RV32I-ILP32E-NEXT: lw s11, 88(a5) +; RV32I-ILP32E-NEXT: lw s0, 92(a5) +; RV32I-ILP32E-NEXT: lw s1, 96(a5) +; RV32I-ILP32E-NEXT: lw ra, 100(a5) +; RV32I-ILP32E-NEXT: lw a6, 104(a5) +; RV32I-ILP32E-NEXT: lw a4, 108(a5) +; RV32I-ILP32E-NEXT: lw a0, 124(a5) +; RV32I-ILP32E-NEXT: lw a1, 120(a5) +; RV32I-ILP32E-NEXT: lw a2, 116(a5) +; RV32I-ILP32E-NEXT: lw a3, 112(a5) +; RV32I-ILP32E-NEXT: sw a0, 124(a5) +; RV32I-ILP32E-NEXT: sw a1, 120(a5) +; RV32I-ILP32E-NEXT: sw a2, 116(a5) +; RV32I-ILP32E-NEXT: sw a3, 112(a5) +; RV32I-ILP32E-NEXT: sw a4, 108(a5) +; RV32I-ILP32E-NEXT: sw a6, 104(a5) +; RV32I-ILP32E-NEXT: sw ra, 100(a5) +; RV32I-ILP32E-NEXT: sw s1, 96(a5) +; RV32I-ILP32E-NEXT: sw s0, 92(a5) +; RV32I-ILP32E-NEXT: sw s11, 88(a5) +; RV32I-ILP32E-NEXT: sw s10, 84(a5) +; RV32I-ILP32E-NEXT: sw s9, 80(a5) +; RV32I-ILP32E-NEXT: sw s8, 76(a5) +; RV32I-ILP32E-NEXT: sw s7, 72(a5) +; RV32I-ILP32E-NEXT: sw s6, 68(a5) +; RV32I-ILP32E-NEXT: sw s5, 64(a5) +; RV32I-ILP32E-NEXT: sw s4, 60(a5) +; RV32I-ILP32E-NEXT: sw s3, 56(a5) +; RV32I-ILP32E-NEXT: sw s2, 52(a5) +; RV32I-ILP32E-NEXT: sw t6, 48(a5) +; RV32I-ILP32E-NEXT: sw t5, 44(a5) +; RV32I-ILP32E-NEXT: sw t4, 40(a5) +; RV32I-ILP32E-NEXT: sw t3, 36(a5) +; RV32I-ILP32E-NEXT: sw t2, 32(a5) +; RV32I-ILP32E-NEXT: sw t1, 28(a5) +; RV32I-ILP32E-NEXT: sw t0, 24(a5) +; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 20(a5) +; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 16(a5) +; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a7) +; RV32I-ILP32E-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a7) +; RV32I-ILP32E-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a7) +; RV32I-ILP32E-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a7) +; RV32I-ILP32E-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 48 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: callee: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -80 @@ -563,6 +657,96 @@ ; RV64I-NEXT: addi sp, sp, 160 ; RV64I-NEXT: ret ; +; RV64I-LP64E-LABEL: callee: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -80 +; RV64I-LP64E-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lui a7, %hi(var) +; RV64I-LP64E-NEXT: lw a0, %lo(var)(a7) +; RV64I-LP64E-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a7) +; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a7) +; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a7) +; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: addi a5, a7, %lo(var) +; RV64I-LP64E-NEXT: lw a0, 16(a5) +; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 20(a5) +; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw t0, 24(a5) +; RV64I-LP64E-NEXT: lw t1, 28(a5) +; RV64I-LP64E-NEXT: lw t2, 32(a5) +; RV64I-LP64E-NEXT: lw t3, 36(a5) +; RV64I-LP64E-NEXT: lw t4, 40(a5) +; RV64I-LP64E-NEXT: lw t5, 44(a5) +; RV64I-LP64E-NEXT: lw t6, 48(a5) +; RV64I-LP64E-NEXT: lw s2, 52(a5) +; RV64I-LP64E-NEXT: lw s3, 56(a5) +; RV64I-LP64E-NEXT: lw s4, 60(a5) +; RV64I-LP64E-NEXT: lw s5, 64(a5) +; RV64I-LP64E-NEXT: lw s6, 68(a5) +; RV64I-LP64E-NEXT: lw s7, 72(a5) +; RV64I-LP64E-NEXT: lw s8, 76(a5) +; RV64I-LP64E-NEXT: lw s9, 80(a5) +; RV64I-LP64E-NEXT: lw s10, 84(a5) +; RV64I-LP64E-NEXT: lw s11, 88(a5) +; RV64I-LP64E-NEXT: lw s0, 92(a5) +; RV64I-LP64E-NEXT: lw s1, 96(a5) +; RV64I-LP64E-NEXT: lw ra, 100(a5) +; RV64I-LP64E-NEXT: lw a6, 104(a5) +; RV64I-LP64E-NEXT: lw a4, 108(a5) +; RV64I-LP64E-NEXT: lw a0, 124(a5) +; RV64I-LP64E-NEXT: lw a1, 120(a5) +; RV64I-LP64E-NEXT: lw a2, 116(a5) +; RV64I-LP64E-NEXT: lw a3, 112(a5) +; RV64I-LP64E-NEXT: sw a0, 124(a5) +; RV64I-LP64E-NEXT: sw a1, 120(a5) +; RV64I-LP64E-NEXT: sw a2, 116(a5) +; RV64I-LP64E-NEXT: sw a3, 112(a5) +; RV64I-LP64E-NEXT: sw a4, 108(a5) +; RV64I-LP64E-NEXT: sw a6, 104(a5) +; RV64I-LP64E-NEXT: sw ra, 100(a5) +; RV64I-LP64E-NEXT: sw s1, 96(a5) +; RV64I-LP64E-NEXT: sw s0, 92(a5) +; RV64I-LP64E-NEXT: sw s11, 88(a5) +; RV64I-LP64E-NEXT: sw s10, 84(a5) +; RV64I-LP64E-NEXT: sw s9, 80(a5) +; RV64I-LP64E-NEXT: sw s8, 76(a5) +; RV64I-LP64E-NEXT: sw s7, 72(a5) +; RV64I-LP64E-NEXT: sw s6, 68(a5) +; RV64I-LP64E-NEXT: sw s5, 64(a5) +; RV64I-LP64E-NEXT: sw s4, 60(a5) +; RV64I-LP64E-NEXT: sw s3, 56(a5) +; RV64I-LP64E-NEXT: sw s2, 52(a5) +; RV64I-LP64E-NEXT: sw t6, 48(a5) +; RV64I-LP64E-NEXT: sw t5, 44(a5) +; RV64I-LP64E-NEXT: sw t4, 40(a5) +; RV64I-LP64E-NEXT: sw t3, 36(a5) +; RV64I-LP64E-NEXT: sw t2, 32(a5) +; RV64I-LP64E-NEXT: sw t1, 28(a5) +; RV64I-LP64E-NEXT: sw t0, 24(a5) +; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 20(a5) +; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 16(a5) +; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a7) +; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a7) +; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a7) +; RV64I-LP64E-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var)(a7) +; RV64I-LP64E-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 80 +; RV64I-LP64E-NEXT: ret +; ; RV64I-WITH-FP-LABEL: callee: ; RV64I-WITH-FP: # %bb.0: ; RV64I-WITH-FP-NEXT: addi sp, sp, -160 @@ -1023,6 +1207,148 @@ ; RV32I-NEXT: addi sp, sp, 144 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -136 +; RV32I-ILP32E-NEXT: sw ra, 132(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 128(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 124(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lui a0, %hi(var) +; RV32I-ILP32E-NEXT: lw a1, %lo(var)(a0) +; RV32I-ILP32E-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+4)(a0) +; RV32I-ILP32E-NEXT: sw a1, 116(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+8)(a0) +; RV32I-ILP32E-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a1, %lo(var+12)(a0) +; RV32I-ILP32E-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: addi s1, a0, %lo(var) +; RV32I-ILP32E-NEXT: lw a0, 16(s1) +; RV32I-ILP32E-NEXT: sw a0, 104(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 20(s1) +; RV32I-ILP32E-NEXT: sw a0, 100(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 24(s1) +; RV32I-ILP32E-NEXT: sw a0, 96(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 28(s1) +; RV32I-ILP32E-NEXT: sw a0, 92(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 32(s1) +; RV32I-ILP32E-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 36(s1) +; RV32I-ILP32E-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 40(s1) +; RV32I-ILP32E-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 44(s1) +; RV32I-ILP32E-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 48(s1) +; RV32I-ILP32E-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 52(s1) +; RV32I-ILP32E-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 56(s1) +; RV32I-ILP32E-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 60(s1) +; RV32I-ILP32E-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 64(s1) +; RV32I-ILP32E-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 68(s1) +; RV32I-ILP32E-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 72(s1) +; RV32I-ILP32E-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 76(s1) +; RV32I-ILP32E-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 80(s1) +; RV32I-ILP32E-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 84(s1) +; RV32I-ILP32E-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 88(s1) +; RV32I-ILP32E-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 92(s1) +; RV32I-ILP32E-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 96(s1) +; RV32I-ILP32E-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 100(s1) +; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 104(s1) +; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 108(s1) +; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 112(s1) +; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 116(s1) +; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw a0, 120(s1) +; RV32I-ILP32E-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: lw s0, 124(s1) +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: sw s0, 124(s1) +; RV32I-ILP32E-NEXT: lw a0, 0(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 120(s1) +; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 116(s1) +; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 112(s1) +; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 108(s1) +; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 104(s1) +; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 100(s1) +; RV32I-ILP32E-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 96(s1) +; RV32I-ILP32E-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 92(s1) +; RV32I-ILP32E-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 88(s1) +; RV32I-ILP32E-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 84(s1) +; RV32I-ILP32E-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 80(s1) +; RV32I-ILP32E-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 76(s1) +; RV32I-ILP32E-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 72(s1) +; RV32I-ILP32E-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 68(s1) +; RV32I-ILP32E-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 64(s1) +; RV32I-ILP32E-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 60(s1) +; RV32I-ILP32E-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 56(s1) +; RV32I-ILP32E-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 52(s1) +; RV32I-ILP32E-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 48(s1) +; RV32I-ILP32E-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 44(s1) +; RV32I-ILP32E-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 40(s1) +; RV32I-ILP32E-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 36(s1) +; RV32I-ILP32E-NEXT: lw a0, 88(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 32(s1) +; RV32I-ILP32E-NEXT: lw a0, 92(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 28(s1) +; RV32I-ILP32E-NEXT: lw a0, 96(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 24(s1) +; RV32I-ILP32E-NEXT: lw a0, 100(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 20(s1) +; RV32I-ILP32E-NEXT: lw a0, 104(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, 16(s1) +; RV32I-ILP32E-NEXT: lui a1, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, 108(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a1) +; RV32I-ILP32E-NEXT: lw a0, 112(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a1) +; RV32I-ILP32E-NEXT: lw a0, 116(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a1) +; RV32I-ILP32E-NEXT: lw a0, 120(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a1) +; RV32I-ILP32E-NEXT: lw ra, 132(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 128(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 124(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 136 +; RV32I-ILP32E-NEXT: ret +; ; RV32I-WITH-FP-LABEL: caller: ; RV32I-WITH-FP: # %bb.0: ; RV32I-WITH-FP-NEXT: addi sp, sp, -144 @@ -1576,6 +1902,148 @@ ; RV64I-NEXT: addi sp, sp, 288 ; RV64I-NEXT: ret ; +; RV64I-LP64E-LABEL: caller: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -272 +; RV64I-LP64E-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s1, 248(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lui a0, %hi(var) +; RV64I-LP64E-NEXT: lw a1, %lo(var)(a0) +; RV64I-LP64E-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a1, %lo(var+4)(a0) +; RV64I-LP64E-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a1, %lo(var+8)(a0) +; RV64I-LP64E-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a1, %lo(var+12)(a0) +; RV64I-LP64E-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: addi s1, a0, %lo(var) +; RV64I-LP64E-NEXT: lw a0, 16(s1) +; RV64I-LP64E-NEXT: sd a0, 208(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 20(s1) +; RV64I-LP64E-NEXT: sd a0, 200(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 24(s1) +; RV64I-LP64E-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 28(s1) +; RV64I-LP64E-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 32(s1) +; RV64I-LP64E-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 36(s1) +; RV64I-LP64E-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 40(s1) +; RV64I-LP64E-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 44(s1) +; RV64I-LP64E-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 48(s1) +; RV64I-LP64E-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 52(s1) +; RV64I-LP64E-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 56(s1) +; RV64I-LP64E-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 60(s1) +; RV64I-LP64E-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 64(s1) +; RV64I-LP64E-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 68(s1) +; RV64I-LP64E-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 72(s1) +; RV64I-LP64E-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 76(s1) +; RV64I-LP64E-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 80(s1) +; RV64I-LP64E-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 84(s1) +; RV64I-LP64E-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 88(s1) +; RV64I-LP64E-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 92(s1) +; RV64I-LP64E-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 96(s1) +; RV64I-LP64E-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 100(s1) +; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 104(s1) +; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 108(s1) +; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 112(s1) +; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 116(s1) +; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw a0, 120(s1) +; RV64I-LP64E-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: lw s0, 124(s1) +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: sw s0, 124(s1) +; RV64I-LP64E-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 120(s1) +; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 116(s1) +; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 112(s1) +; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 108(s1) +; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 104(s1) +; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 100(s1) +; RV64I-LP64E-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 96(s1) +; RV64I-LP64E-NEXT: ld a0, 56(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 92(s1) +; RV64I-LP64E-NEXT: ld a0, 64(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 88(s1) +; RV64I-LP64E-NEXT: ld a0, 72(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 84(s1) +; RV64I-LP64E-NEXT: ld a0, 80(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 80(s1) +; RV64I-LP64E-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 76(s1) +; RV64I-LP64E-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 72(s1) +; RV64I-LP64E-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 68(s1) +; RV64I-LP64E-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 64(s1) +; RV64I-LP64E-NEXT: ld a0, 120(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 60(s1) +; RV64I-LP64E-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 56(s1) +; RV64I-LP64E-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 52(s1) +; RV64I-LP64E-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 48(s1) +; RV64I-LP64E-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 44(s1) +; RV64I-LP64E-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 40(s1) +; RV64I-LP64E-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 36(s1) +; RV64I-LP64E-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 32(s1) +; RV64I-LP64E-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 28(s1) +; RV64I-LP64E-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 24(s1) +; RV64I-LP64E-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 20(s1) +; RV64I-LP64E-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, 16(s1) +; RV64I-LP64E-NEXT: lui a1, %hi(var) +; RV64I-LP64E-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a1) +; RV64I-LP64E-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a1) +; RV64I-LP64E-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a1) +; RV64I-LP64E-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: sw a0, %lo(var)(a1) +; RV64I-LP64E-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s1, 248(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 272 +; RV64I-LP64E-NEXT: ret +; ; RV64I-WITH-FP-LABEL: caller: ; RV64I-WITH-FP: # %bb.0: ; RV64I-WITH-FP-NEXT: addi sp, sp, -288 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -0,0 +1,2556 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+save-restore -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM-SAVE-RESTORE %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -mattr=+save-restore -frame-pointer=all \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP-SAVE-RESTORE %s + +; As well as calling convention details, we check that ra and fp are +; consistently stored to fp-4 and fp-8. + +; Any tests that would have identical output for some combination of the ilp32* +; ABIs belong in calling-conv-*-common.ll. This file contains tests that will +; have different output across those ABIs. i.e. where some arguments would be +; passed according to the floating point ABI, or where the stack is aligned to +; a different boundary. + +define i32 @callee_float_in_regs(i32 %a, float %b) { +; ILP32E-FPELIM-LABEL: callee_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: call __fixsfsi@plt +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: call __fixsfsi@plt +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_float_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv s0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __fixsfsi@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_float_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv s1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __fixsfsi@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_2 + %b_fptosi = fptosi float %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_float_in_regs() { +; ILP32E-FPELIM-LABEL: caller_float_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: lui a1, 262144 +; ILP32E-FPELIM-NEXT: call callee_float_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a1, 262144 +; ILP32E-WITHFP-NEXT: call callee_float_in_regs@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_float_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 262144 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_float_in_regs@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_float_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 262144 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_float_in_regs@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_float_in_regs(i32 1, float 2.0) + ret i32 %1 +} + +define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) { +; ILP32E-FPELIM-LABEL: callee_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 8(sp) +; ILP32E-FPELIM-NEXT: lw a1, 0(sp) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a0, 8(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(s0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_float_on_stack: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_float_on_stack: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 0(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = trunc i64 %d to i32 + %2 = bitcast float %e to i32 + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i32 @caller_float_on_stack() { +; ILP32E-FPELIM-LABEL: caller_float_on_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: lui a0, 264704 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: li a1, 4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a2, 2 +; ILP32E-FPELIM-NEXT: li a4, 3 +; ILP32E-FPELIM-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: li a3, 0 +; ILP32E-FPELIM-NEXT: li a5, 0 +; ILP32E-FPELIM-NEXT: call callee_float_on_stack@plt +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_float_on_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -20 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 20 +; ILP32E-WITHFP-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a0, 264704 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: li a1, 4 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a2, 2 +; ILP32E-WITHFP-NEXT: li a4, 3 +; ILP32E-WITHFP-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: li a3, 0 +; ILP32E-WITHFP-NEXT: li a5, 0 +; ILP32E-WITHFP-NEXT: call callee_float_on_stack@plt +; ILP32E-WITHFP-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 20 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_float_on_stack: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -12 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 264704 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_float_on_stack@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 12 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_float_on_stack: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 264704 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_float_on_stack@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0) + ret i32 %1 +} + +define float @callee_tiny_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a0, 260096 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a0, 260096 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_tiny_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 260096 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_tiny_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 260096 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + ret float 1.0 +} + +define i32 @caller_tiny_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_tiny_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_tiny_scalar_ret@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_tiny_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_tiny_scalar_ret@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_tiny_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_tiny_scalar_ret@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_tiny_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_tiny_scalar_ret@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + ret i32 %2 +} + +; Check that on RV32 ilp32e, double is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_double_in_regs(i32 %a, double %b) { +; ILP32E-FPELIM-LABEL: callee_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -8 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: mv s0, a0 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: mv a1, a2 +; ILP32E-FPELIM-NEXT: call __fixdfsi@plt +; ILP32E-FPELIM-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 8 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: mv a1, a2 +; ILP32E-WITHFP-NEXT: call __fixdfsi@plt +; ILP32E-WITHFP-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_double_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv s0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __fixdfsi@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, s0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_double_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv s1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __fixdfsi@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, s1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_2 + %b_fptosi = fptosi double %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_double_in_regs() { +; ILP32E-FPELIM-LABEL: caller_double_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: lui a2, 262144 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: call callee_double_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_double_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a2, 262144 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call callee_double_in_regs@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_double_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a2, 262144 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_double_in_regs@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_double_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a2, 262144 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_double_in_regs@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_double_in_regs(i32 1, double 2.0) + ret i32 %1 +} + +; Check 2x*xlen values are aligned appropriately when passed on the stack +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %f, i32 %g, i32 %h, double %i, i32 %j, [2 x i32] %k) { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: callee_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 0(a2) +; ILP32E-FPELIM-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-NEXT: lw a2, 4(sp) +; ILP32E-FPELIM-NEXT: lw a3, 8(sp) +; ILP32E-FPELIM-NEXT: lw a4, 24(sp) +; ILP32E-FPELIM-NEXT: lw a5, 20(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: add a1, a3, a1 +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: add a4, a5, a4 +; ILP32E-FPELIM-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a0, 0(a2) +; ILP32E-WITHFP-NEXT: lw a1, 12(s0) +; ILP32E-WITHFP-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-NEXT: lw a4, 24(s0) +; ILP32E-WITHFP-NEXT: lw a5, 20(s0) +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: add a1, a3, a1 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: add a4, a5, a4 +; ILP32E-WITHFP-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_aligned_stack: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(a2) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a1, a3, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a4, a5, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_aligned_stack: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(a2) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 24(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 20(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a1, a3, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a4, a5, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = bitcast fp128 %c to i128 + %2 = trunc i128 %1 to i32 + %3 = add i32 %2, %g + %4 = add i32 %3, %h + %5 = bitcast double %i to i64 + %6 = trunc i64 %5 to i32 + %7 = add i32 %4, %6 + %8 = add i32 %7, %j + %9 = extractvalue [2 x i32] %k, 0 + %10 = add i32 %8, %9 + ret i32 %10 +} + +define void @caller_aligned_stack() { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; ILP32E-FPELIM-LABEL: caller_aligned_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: li a0, 18 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: li a0, 17 +; ILP32E-FPELIM-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-NEXT: li a0, 16 +; ILP32E-FPELIM-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262236 +; ILP32E-FPELIM-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: lui a0, 377487 +; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: li a0, 15 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: li a0, 14 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a0, 4 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262153 +; ILP32E-FPELIM-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-NEXT: lui a0, 545260 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-NEXT: lui a0, 964690 +; ILP32E-FPELIM-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-NEXT: lui a0, 335544 +; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: lui a0, 688509 +; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 11 +; ILP32E-FPELIM-NEXT: addi a2, sp, 32 +; ILP32E-FPELIM-NEXT: li a3, 12 +; ILP32E-FPELIM-NEXT: li a4, 13 +; ILP32E-FPELIM-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-NEXT: call callee_aligned_stack@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_aligned_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: li a0, 18 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: li a0, 17 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: li a0, 16 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: li a0, 15 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: li a0, 14 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a0, 4 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 11 +; ILP32E-WITHFP-NEXT: addi a2, sp, 32 +; ILP32E-WITHFP-NEXT: li a3, 12 +; ILP32E-WITHFP-NEXT: li a4, 13 +; ILP32E-WITHFP-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-NEXT: call callee_aligned_stack@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_aligned_stack: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 18 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 17 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262236 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 377487 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 15 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 14 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262153 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 545260 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 964690 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 335544 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 688509 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 11 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 12 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 13 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_aligned_stack@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_aligned_stack: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 18 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 17 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262236 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 377487 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 15 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 14 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262153 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 545260 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 964690 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 335544 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 688509 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 11 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 12 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 13 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_aligned_stack@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_aligned_stack(i32 1, i32 11, + fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, + i64 20000000000, i32 14, i32 15, double 2.720000e+00, i32 16, + [2 x i32] [i32 17, i32 18]) + ret void +} + +define double @callee_small_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 261888 +; ILP32E-FPELIM-NEXT: li a0, 0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a1, 261888 +; ILP32E-WITHFP-NEXT: li a0, 0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 261888 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 261888 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + ret double 1.0 +} + +define i64 @caller_small_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_small_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_small_scalar_ret@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_small_scalar_ret@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_scalar_ret@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_scalar_ret@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call double @callee_small_scalar_ret() + %2 = bitcast double %1 to i64 + ret i64 %2 +} + +; Check that on RV32, i64 is passed in a pair of registers. Unlike +; the convention for varargs, this need not be an aligned pair. + +define i32 @callee_i64_in_regs(i32 %a, i64 %b) { +; ILP32E-FPELIM-LABEL: callee_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_i64_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_i64_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %b_trunc = trunc i64 %b to i32 + %1 = add i32 %a, %b_trunc + ret i32 %1 +} + +define i32 @caller_i64_in_regs() { +; ILP32E-FPELIM-LABEL: caller_i64_in_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 0 +; ILP32E-FPELIM-NEXT: call callee_i64_in_regs@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_i64_in_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call callee_i64_in_regs@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_i64_in_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_i64_in_regs@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_i64_in_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_i64_in_regs@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_i64_in_regs(i32 1, i64 2) + ret i32 %1 +} + +; Check that the stack is used once the GPRs are exhausted + +define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) { +; ILP32E-FPELIM-LABEL: callee_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a6, 12(sp) +; ILP32E-FPELIM-NEXT: lw a7, 0(sp) +; ILP32E-FPELIM-NEXT: lw t0, 4(sp) +; ILP32E-FPELIM-NEXT: lw t1, 8(sp) +; ILP32E-FPELIM-NEXT: andi a0, a0, 255 +; ILP32E-FPELIM-NEXT: slli a1, a1, 16 +; ILP32E-FPELIM-NEXT: srli a1, a1, 16 +; ILP32E-FPELIM-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a1, a4, t1 +; ILP32E-FPELIM-NEXT: xor a2, a3, t0 +; ILP32E-FPELIM-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-NEXT: seqz a1, a1 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a7 +; ILP32E-FPELIM-NEXT: add a0, a0, a6 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a6, 12(s0) +; ILP32E-WITHFP-NEXT: lw a7, 0(s0) +; ILP32E-WITHFP-NEXT: lw t0, 4(s0) +; ILP32E-WITHFP-NEXT: lw t1, 8(s0) +; ILP32E-WITHFP-NEXT: andi a0, a0, 255 +; ILP32E-WITHFP-NEXT: slli a1, a1, 16 +; ILP32E-WITHFP-NEXT: srli a1, a1, 16 +; ILP32E-WITHFP-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a1, a4, t1 +; ILP32E-WITHFP-NEXT: xor a2, a3, t0 +; ILP32E-WITHFP-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-NEXT: seqz a1, a1 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a7 +; ILP32E-WITHFP-NEXT: add a0, a0, a6 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_many_scalars: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t1, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi a0, a0, 255 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: slli a1, a1, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: srli a1, a1, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a4, t1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, t0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a1, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_many_scalars: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 0(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t0, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t1, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi a0, a0, 255 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: slli a1, a1, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: srli a1, a1, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a4, t1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, t0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a1, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %a_ext = zext i8 %a to i32 + %b_ext = zext i16 %b to i32 + %1 = add i32 %a_ext, %b_ext + %2 = add i32 %1, %c + %3 = icmp eq i64 %d, %g + %4 = zext i1 %3 to i32 + %5 = add i32 %4, %2 + %6 = add i32 %5, %e + %7 = add i32 %6, %f + %8 = add i32 %7, %h + ret i32 %8 +} + +define i32 @caller_many_scalars() { +; ILP32E-FPELIM-LABEL: caller_many_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -20 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 20 +; ILP32E-FPELIM-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 8 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-NEXT: li a0, 7 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a4, 6 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: li a5, 5 +; ILP32E-FPELIM-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-NEXT: li a4, 0 +; ILP32E-FPELIM-NEXT: call callee_many_scalars@plt +; ILP32E-FPELIM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 20 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_many_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 8 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-NEXT: li a0, 7 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a4, 6 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: li a5, 5 +; ILP32E-WITHFP-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-NEXT: li a4, 0 +; ILP32E-WITHFP-NEXT: call callee_many_scalars@plt +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_many_scalars: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_many_scalars@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_many_scalars: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_many_scalars@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i32 5, i32 6, i64 7, i32 8) + ret i32 %1 +} + +; Check that i128 and fp128 are passed indirectly + +define i32 @callee_large_scalars(i128 %a, fp128 %b) { +; ILP32E-FPELIM-LABEL: callee_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a2, 0(a1) +; ILP32E-FPELIM-NEXT: lw a3, 0(a0) +; ILP32E-FPELIM-NEXT: lw a4, 4(a1) +; ILP32E-FPELIM-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-NEXT: lw a6, 12(a0) +; ILP32E-FPELIM-NEXT: lw a7, 4(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: xor a5, a6, a5 +; ILP32E-FPELIM-NEXT: xor a4, a7, a4 +; ILP32E-FPELIM-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a2, a3, a2 +; ILP32E-FPELIM-NEXT: or a0, a2, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a2, 0(a1) +; ILP32E-WITHFP-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-NEXT: lw a4, 4(a1) +; ILP32E-WITHFP-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-NEXT: lw a6, 12(a0) +; ILP32E-WITHFP-NEXT: lw a7, 4(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: xor a5, a6, a5 +; ILP32E-WITHFP-NEXT: xor a4, a7, a4 +; ILP32E-WITHFP-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a2, a3, a2 +; ILP32E-WITHFP-NEXT: or a0, a2, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalars: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %b_bitcast = bitcast fp128 %b to i128 + %1 = icmp eq i128 %a, %b_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars() { +; ILP32E-FPELIM-LABEL: caller_large_scalars: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 48 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: sw zero, 0(sp) +; ILP32E-FPELIM-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-NEXT: sw zero, 32(sp) +; ILP32E-FPELIM-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-NEXT: li a2, 1 +; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: mv a1, sp +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -48 +; ILP32E-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: sw zero, 0(sp) +; ILP32E-WITHFP-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-NEXT: sw zero, 32(sp) +; ILP32E-WITHFP-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-NEXT: li a2, 1 +; ILP32E-WITHFP-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-NEXT: mv a1, sp +; ILP32E-WITHFP-NEXT: sw a2, 24(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -48 +; ILP32E-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalars: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalars: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Check that arguments larger than 2*xlen are handled correctly when their +; address is passed on the stack rather than in memory + +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) { +; ILP32E-FPELIM-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a2, 0(a0) +; ILP32E-FPELIM-NEXT: lw a3, 0(a1) +; ILP32E-FPELIM-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-NEXT: lw a6, 12(a1) +; ILP32E-FPELIM-NEXT: lw a7, 4(a1) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: xor a5, a6, a5 +; ILP32E-FPELIM-NEXT: xor a4, a7, a4 +; ILP32E-FPELIM-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-NEXT: xor a2, a3, a2 +; ILP32E-FPELIM-NEXT: or a0, a2, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a0, 12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a1) +; ILP32E-WITHFP-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-NEXT: lw a6, 12(a1) +; ILP32E-WITHFP-NEXT: lw a7, 4(a1) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: xor a5, a6, a5 +; ILP32E-WITHFP-NEXT: xor a4, a7, a4 +; ILP32E-WITHFP-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-NEXT: xor a2, a3, a2 +; ILP32E-WITHFP-NEXT: or a0, a2, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalars_exhausted_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %j_bitcast = bitcast fp128 %j to i128 + %1 = icmp eq i128 %h, %j_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars_exhausted_regs() { +; ILP32E-FPELIM-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: li a0, 9 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 40 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a0, 7 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 52(sp) +; ILP32E-FPELIM-NEXT: sw zero, 48(sp) +; ILP32E-FPELIM-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-NEXT: li a6, 8 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: li a4, 5 +; ILP32E-FPELIM-NEXT: li a5, 6 +; ILP32E-FPELIM-NEXT: sw a6, 40(sp) +; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: li a0, 9 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: addi a0, sp, 40 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a0, 7 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 52(sp) +; ILP32E-WITHFP-NEXT: sw zero, 48(sp) +; ILP32E-WITHFP-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-NEXT: li a6, 8 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: li a4, 5 +; ILP32E-WITHFP-NEXT: li a5, 6 +; ILP32E-WITHFP-NEXT: sw a6, 40(sp) +; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 9 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalars_exhausted_regs: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 9 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_large_scalars_exhausted_regs( + i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, + fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Ensure that libcalls generated in the middle-end obey the calling convention + +define i32 @caller_mixed_scalar_libcalls(i64 %a) { +; ILP32E-FPELIM-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a2, a1 +; ILP32E-FPELIM-NEXT: mv a1, a0 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call __floatditf@plt +; ILP32E-FPELIM-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a2, a1 +; ILP32E-WITHFP-NEXT: mv a1, a0 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call __floatditf@plt +; ILP32E-WITHFP-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a2, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call __floatditf@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_mixed_scalar_libcalls: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a2, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call __floatditf@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = sitofp i64 %a to fp128 + %2 = bitcast fp128 %1 to i128 + %3 = trunc i128 %2 to i32 + ret i32 %3 +} + + +; Check passing of coerced integer arrays + +%struct.small = type { i32, i32* } + +define i32 @callee_small_coerced_struct([2 x i32] %a.coerce) { +; ILP32E-FPELIM-LABEL: callee_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: seqz a0, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: seqz a0, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_coerced_struct: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_coerced_struct: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = extractvalue [2 x i32] %a.coerce, 0 + %2 = extractvalue [2 x i32] %a.coerce, 1 + %3 = icmp eq i32 %1, %2 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +define i32 @caller_small_coerced_struct() { +; ILP32E-FPELIM-LABEL: caller_small_coerced_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: call callee_small_coerced_struct@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_coerced_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: call callee_small_coerced_struct@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_coerced_struct: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_coerced_struct@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_coerced_struct: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_coerced_struct@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call i32 @callee_small_coerced_struct([2 x i32] [i32 1, i32 2]) + ret i32 %1 +} + +; Check large struct arguments, which are passed byval + +%struct.large = type { i32, i32, i32, i32 } + +define i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %a) { +; ILP32E-FPELIM-LABEL: callee_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_struct: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_struct: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 0 + %2 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 3 + %3 = load i32, i32* %1 + %4 = load i32, i32* %2 + %5 = add i32 %3, %4 + ret i32 %5 +} + +define i32 @caller_large_struct() { +; ILP32E-FPELIM-LABEL: caller_large_struct: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -36 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-FPELIM-NEXT: sw ra, 32(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: sw a2, 8(sp) +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_struct@plt +; ILP32E-FPELIM-NEXT: lw ra, 32(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 36 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-NEXT: sw ra, 36(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 32(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: sw a0, -24(s0) +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: sw a1, -20(s0) +; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: sw a0, -40(s0) +; ILP32E-WITHFP-NEXT: sw a1, -36(s0) +; ILP32E-WITHFP-NEXT: sw a2, -32(s0) +; ILP32E-WITHFP-NEXT: sw a3, -28(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, -40 +; ILP32E-WITHFP-NEXT: call callee_large_struct@plt +; ILP32E-WITHFP-NEXT: lw ra, 36(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_struct: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_struct@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_struct: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -24(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -20(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -40(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -36(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -32(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, -28(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, s0, -40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_struct@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %ls = alloca %struct.large, align 4 + %1 = bitcast %struct.large* %ls to i8* + %a = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 0 + store i32 1, i32* %a + %b = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 1 + store i32 2, i32* %b + %c = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 2 + store i32 3, i32* %c + %d = getelementptr inbounds %struct.large, %struct.large* %ls, i32 0, i32 3 + store i32 4, i32* %d + %2 = call i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %ls) + ret i32 %2 +} + +; Check return of 2x xlen structs + +define %struct.small @callee_small_struct_ret() { +; ILP32E-FPELIM-LABEL: callee_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_small_struct_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_small_struct_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + ret %struct.small { i32 1, i32* null } +} + +define i32 @caller_small_struct_ret() { +; ILP32E-FPELIM-LABEL: caller_small_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: call callee_small_struct_ret@plt +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_small_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: call callee_small_struct_ret@plt +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_small_struct_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_small_struct_ret@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_0 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_small_struct_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_small_struct_ret@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call %struct.small @callee_small_struct_ret() + %2 = extractvalue %struct.small %1, 0 + %3 = extractvalue %struct.small %1, 1 + %4 = ptrtoint i32* %3 to i32 + %5 = add i32 %2, %4 + ret i32 %5 +} + +; Check return of >2x xlen scalars + +define fp128 @callee_large_scalar_ret() { +; ILP32E-FPELIM-LABEL: callee_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a1, 524272 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: lui a1, 524272 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + ret fp128 0xL00000000000000007FFF000000000000 +} + +define void @caller_large_scalar_ret() { +; ILP32E-FPELIM-LABEL: caller_large_scalar_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_scalar_ret@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_scalar_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-WITHFP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 32 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call callee_large_scalar_ret@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalar_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalar_ret@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalar_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalar_ret@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = call fp128 @callee_large_scalar_ret() + ret void +} + +; Check return of >2x xlen structs + +define void @callee_large_struct_ret(%struct.large* noalias sret(%struct.large) %agg.result) { +; ILP32E-FPELIM-LABEL: callee_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: li a1, 1 +; ILP32E-FPELIM-NEXT: sw a1, 0(a0) +; ILP32E-FPELIM-NEXT: li a1, 2 +; ILP32E-FPELIM-NEXT: sw a1, 4(a0) +; ILP32E-FPELIM-NEXT: li a1, 3 +; ILP32E-FPELIM-NEXT: sw a1, 8(a0) +; ILP32E-FPELIM-NEXT: li a1, 4 +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: callee_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 16 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: li a1, 1 +; ILP32E-WITHFP-NEXT: sw a1, 0(a0) +; ILP32E-WITHFP-NEXT: li a1, 2 +; ILP32E-WITHFP-NEXT: sw a1, 4(a0) +; ILP32E-WITHFP-NEXT: li a1, 3 +; ILP32E-WITHFP-NEXT: sw a1, 8(a0) +; ILP32E-WITHFP-NEXT: li a1, 4 +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_struct_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_struct_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 0 + store i32 1, i32* %a, align 4 + %b = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 1 + store i32 2, i32* %b, align 4 + %c = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 2 + store i32 3, i32* %c, align 4 + %d = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 3 + store i32 4, i32* %d, align 4 + ret void +} + +define i32 @caller_large_struct_ret() { +; ILP32E-FPELIM-LABEL: caller_large_struct_ret: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-NEXT: mv a0, sp +; ILP32E-FPELIM-NEXT: call callee_large_struct_ret@plt +; ILP32E-FPELIM-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 24 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller_large_struct_ret: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-NEXT: mv a0, sp +; ILP32E-WITHFP-NEXT: call callee_large_struct_ret@plt +; ILP32E-WITHFP-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lw a1, 12(sp) +; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 24 +; ILP32E-WITHFP-NEXT: ret +; +; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_struct_ret: +; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_struct_ret@plt +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 +; +; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_struct_ret: +; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_struct_ret@plt +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 + %1 = alloca %struct.large + call void @callee_large_struct_ret(%struct.large* sret(%struct.large) %1) + %2 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 0 + %3 = load i32, i32* %2 + %4 = getelementptr inbounds %struct.large, %struct.large* %1, i32 0, i32 3 + %5 = load i32, i32* %4 + %6 = add i32 %3, %5 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll @@ -0,0 +1,221 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-LP64E-FPELIM %s +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck -check-prefix=RV64I-LP64E-WITHFP %s + +; As well as calling convention details, we check that ra and fp are +; consistently stored to fp-8 and fp-16. + +; Any tests that would have identical output for some combination of the lp64* +; ABIs belong in calling-conv-*-common.ll. This file contains tests that will +; have different output across those ABIs. i.e. where some arguments would be +; passed according to the floating point ABI. + +; TODO: softened float values can be passed anyext. + +define i64 @callee_float_in_regs(i64 %a, float %b) nounwind { +; RV64I-LP64E-FPELIM-LABEL: callee_float_in_regs: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -16 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: mv s0, a0 +; RV64I-LP64E-FPELIM-NEXT: sext.w a0, a1 +; RV64I-LP64E-FPELIM-NEXT: call __fixsfdi@plt +; RV64I-LP64E-FPELIM-NEXT: add a0, s0, a0 +; RV64I-LP64E-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 16 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: callee_float_in_regs: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -24 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s1, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 24 +; RV64I-LP64E-WITHFP-NEXT: mv s1, a0 +; RV64I-LP64E-WITHFP-NEXT: sext.w a0, a1 +; RV64I-LP64E-WITHFP-NEXT: call __fixsfdi@plt +; RV64I-LP64E-WITHFP-NEXT: add a0, s1, a0 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s1, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 24 +; RV64I-LP64E-WITHFP-NEXT: ret + %b_fptosi = fptosi float %b to i64 + %1 = add i64 %a, %b_fptosi + ret i64 %1 +} + +define i64 @caller_float_in_regs() nounwind { +; RV64I-LP64E-FPELIM-LABEL: caller_float_in_regs: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -8 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: li a0, 1 +; RV64I-LP64E-FPELIM-NEXT: lui a1, 262144 +; RV64I-LP64E-FPELIM-NEXT: call callee_float_in_regs@plt +; RV64I-LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 8 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: caller_float_in_regs: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: li a0, 1 +; RV64I-LP64E-WITHFP-NEXT: lui a1, 262144 +; RV64I-LP64E-WITHFP-NEXT: call callee_float_in_regs@plt +; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: ret + %1 = call i64 @callee_float_in_regs(i64 1, float 2.0) + ret i64 %1 +} + +define i64 @callee_float_on_stack(i128 %a, i128 %b, i128 %c, i128 %d, float %e) nounwind { +; RV64I-LP64E-FPELIM-LABEL: callee_float_on_stack: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -16 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: addi s0, sp, 16 +; RV64I-LP64E-FPELIM-NEXT: andi sp, sp, -16 +; RV64I-LP64E-FPELIM-NEXT: lw a0, 16(s0) +; RV64I-LP64E-FPELIM-NEXT: addi sp, s0, -16 +; RV64I-LP64E-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 16 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: callee_float_on_stack: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: andi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: lw a0, 16(s0) +; RV64I-LP64E-WITHFP-NEXT: addi sp, s0, -16 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: ret + %1 = trunc i128 %d to i64 + %2 = bitcast float %e to i32 + %3 = sext i32 %2 to i64 + %4 = add i64 %1, %3 + ret i64 %3 +} + +define i64 @caller_float_on_stack() nounwind { +; RV64I-LP64E-FPELIM-LABEL: caller_float_on_stack: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -48 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: addi s0, sp, 48 +; RV64I-LP64E-FPELIM-NEXT: andi sp, sp, -16 +; RV64I-LP64E-FPELIM-NEXT: lui a0, 264704 +; RV64I-LP64E-FPELIM-NEXT: sd a0, 16(sp) +; RV64I-LP64E-FPELIM-NEXT: sd zero, 8(sp) +; RV64I-LP64E-FPELIM-NEXT: li a1, 4 +; RV64I-LP64E-FPELIM-NEXT: li a0, 1 +; RV64I-LP64E-FPELIM-NEXT: li a2, 2 +; RV64I-LP64E-FPELIM-NEXT: li a4, 3 +; RV64I-LP64E-FPELIM-NEXT: sd a1, 0(sp) +; RV64I-LP64E-FPELIM-NEXT: li a1, 0 +; RV64I-LP64E-FPELIM-NEXT: li a3, 0 +; RV64I-LP64E-FPELIM-NEXT: li a5, 0 +; RV64I-LP64E-FPELIM-NEXT: call callee_float_on_stack@plt +; RV64I-LP64E-FPELIM-NEXT: addi sp, s0, -48 +; RV64I-LP64E-FPELIM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 48 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: caller_float_on_stack: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -48 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 48 +; RV64I-LP64E-WITHFP-NEXT: andi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: lui a0, 264704 +; RV64I-LP64E-WITHFP-NEXT: sd a0, 16(sp) +; RV64I-LP64E-WITHFP-NEXT: sd zero, 8(sp) +; RV64I-LP64E-WITHFP-NEXT: li a1, 4 +; RV64I-LP64E-WITHFP-NEXT: li a0, 1 +; RV64I-LP64E-WITHFP-NEXT: li a2, 2 +; RV64I-LP64E-WITHFP-NEXT: li a4, 3 +; RV64I-LP64E-WITHFP-NEXT: sd a1, 0(sp) +; RV64I-LP64E-WITHFP-NEXT: li a1, 0 +; RV64I-LP64E-WITHFP-NEXT: li a3, 0 +; RV64I-LP64E-WITHFP-NEXT: li a5, 0 +; RV64I-LP64E-WITHFP-NEXT: call callee_float_on_stack@plt +; RV64I-LP64E-WITHFP-NEXT: addi sp, s0, -48 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 48 +; RV64I-LP64E-WITHFP-NEXT: ret + %1 = call i64 @callee_float_on_stack(i128 1, i128 2, i128 3, i128 4, float 5.0) + ret i64 %1 +} + +define float @callee_tiny_scalar_ret() nounwind { +; RV64I-LP64E-FPELIM-LABEL: callee_tiny_scalar_ret: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: lui a0, 260096 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: callee_tiny_scalar_ret: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: lui a0, 260096 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: ret + ret float 1.0 +} + +; The sign extension of the float return is necessary, as softened floats are +; passed anyext. + +define i64 @caller_tiny_scalar_ret() nounwind { +; RV64I-LP64E-FPELIM-LABEL: caller_tiny_scalar_ret: +; RV64I-LP64E-FPELIM: # %bb.0: +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, -8 +; RV64I-LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-FPELIM-NEXT: call callee_tiny_scalar_ret@plt +; RV64I-LP64E-FPELIM-NEXT: sext.w a0, a0 +; RV64I-LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-FPELIM-NEXT: addi sp, sp, 8 +; RV64I-LP64E-FPELIM-NEXT: ret +; +; RV64I-LP64E-WITHFP-LABEL: caller_tiny_scalar_ret: +; RV64I-LP64E-WITHFP: # %bb.0: +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, -16 +; RV64I-LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: call callee_tiny_scalar_ret@plt +; RV64I-LP64E-WITHFP-NEXT: sext.w a0, a0 +; RV64I-LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LP64E-WITHFP-NEXT: addi sp, sp, 16 +; RV64I-LP64E-WITHFP-NEXT: ret + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + %3 = sext i32 %2 to i64 + ret i64 %3 +} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IF-ILP32E + +; Exercises the ILP32E calling convention code in the case that f32 is a legal +; type. As well as testing that lowering is correct, these tests also aim to +; check that floating point load/store or integer load/store is chosen +; optimally when floats are passed on the stack. + +define float @onstack_f32_noop(i64 %a, i64 %b, i64 %c, i64 %d, float %e, float %f) nounwind { +; RV32IF-ILP32E-LABEL: onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: lw a0, 12(sp) +; RV32IF-ILP32E-NEXT: ret + ret float %f +} + +define float @onstack_f32_fadd(i64 %a, i64 %b, i64 %c, i64 %d, float %e, float %f) nounwind { +; RV32IF-ILP32E-LABEL: onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: flw fa5, 12(sp) +; RV32IF-ILP32E-NEXT: flw fa4, 8(sp) +; RV32IF-ILP32E-NEXT: fadd.s fa5, fa4, fa5 +; RV32IF-ILP32E-NEXT: fmv.x.w a0, fa5 +; RV32IF-ILP32E-NEXT: ret + %1 = fadd float %e, %f + ret float %1 +} + +define float @caller_onstack_f32_noop(float %a) nounwind { +; RV32IF-ILP32E-LABEL: caller_onstack_f32_noop: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -20 +; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: sw a0, 12(sp) +; RV32IF-ILP32E-NEXT: lui a0, 264704 +; RV32IF-ILP32E-NEXT: sw a0, 8(sp) +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: li a1, 4 +; RV32IF-ILP32E-NEXT: li a0, 1 +; RV32IF-ILP32E-NEXT: li a2, 2 +; RV32IF-ILP32E-NEXT: li a4, 3 +; RV32IF-ILP32E-NEXT: sw a1, 0(sp) +; RV32IF-ILP32E-NEXT: li a1, 0 +; RV32IF-ILP32E-NEXT: li a3, 0 +; RV32IF-ILP32E-NEXT: li a5, 0 +; RV32IF-ILP32E-NEXT: call onstack_f32_noop@plt +; RV32IF-ILP32E-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 20 +; RV32IF-ILP32E-NEXT: ret + %1 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float 5.0, float %a) + ret float %1 +} + +define float @caller_onstack_f32_fadd(float %a, float %b) nounwind { +; RV32IF-ILP32E-LABEL: caller_onstack_f32_fadd: +; RV32IF-ILP32E: # %bb.0: +; RV32IF-ILP32E-NEXT: addi sp, sp, -20 +; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; RV32IF-ILP32E-NEXT: fmv.w.x fa5, a1 +; RV32IF-ILP32E-NEXT: fmv.w.x fa4, a0 +; RV32IF-ILP32E-NEXT: fadd.s fa3, fa4, fa5 +; RV32IF-ILP32E-NEXT: fsub.s fa5, fa5, fa4 +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: li a0, 4 +; RV32IF-ILP32E-NEXT: sw a0, 0(sp) +; RV32IF-ILP32E-NEXT: fsw fa5, 12(sp) +; RV32IF-ILP32E-NEXT: li a0, 1 +; RV32IF-ILP32E-NEXT: li a2, 2 +; RV32IF-ILP32E-NEXT: li a4, 3 +; RV32IF-ILP32E-NEXT: fsw fa3, 8(sp) +; RV32IF-ILP32E-NEXT: li a1, 0 +; RV32IF-ILP32E-NEXT: li a3, 0 +; RV32IF-ILP32E-NEXT: li a5, 0 +; RV32IF-ILP32E-NEXT: call onstack_f32_noop@plt +; RV32IF-ILP32E-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; RV32IF-ILP32E-NEXT: addi sp, sp, 20 +; RV32IF-ILP32E-NEXT: ret + %1 = fadd float %a, %b + %2 = fsub float %b, %a + %3 = call float @onstack_f32_noop(i64 1, i64 2, i64 3, i64 4, float %1, float %2) + ret float %3 +} diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll --- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll +++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll @@ -6,12 +6,24 @@ ; RUN: llc -mtriple riscv32-unknown-elf -mattr=+f,+d -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-FD ; +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+e -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32E +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+e,+f -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32E-F +; ; RUN: llc -mtriple riscv64-unknown-elf -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64 ; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-F ; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f,+d -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-FD +; +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64E +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e,+f -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64E-F +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e,+f,+d -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64E-FD ; ; Checking for special return instructions (sret, mret). @@ -289,6 +301,124 @@ ; CHECK-RV32-FD-NEXT: addi sp, sp, 320 ; CHECK-RV32-FD-NEXT: mret ; +; CHECK-RV32E-LABEL: foo_with_call: +; CHECK-RV32E: # %bb.0: +; CHECK-RV32E-NEXT: addi sp, sp, -40 +; CHECK-RV32E-NEXT: sw ra, 36(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t0, 32(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t1, 28(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t2, 24(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a2, 12(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a3, 8(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a4, 4(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a5, 0(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: call otherfoo@plt +; CHECK-RV32E-NEXT: lw ra, 36(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t0, 32(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t1, 28(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t2, 24(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a1, 16(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a2, 12(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a4, 4(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a5, 0(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: addi sp, sp, 40 +; CHECK-RV32E-NEXT: mret +; +; CHECK-RV32E-F-LABEL: foo_with_call: +; CHECK-RV32E-F: # %bb.0: +; CHECK-RV32E-F-NEXT: addi sp, sp, -168 +; CHECK-RV32E-F-NEXT: sw ra, 164(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t0, 160(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t1, 156(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t2, 152(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a0, 148(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a2, 140(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a3, 136(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a4, 132(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a5, 128(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa0, 92(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa1, 88(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa2, 84(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa3, 80(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa4, 76(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa6, 68(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa7, 64(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft8, 60(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft9, 56(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft10, 52(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft11, 48(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs0, 44(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs1, 40(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs2, 36(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs3, 32(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs4, 28(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs5, 24(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs6, 20(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs7, 16(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs8, 12(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs9, 8(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs10, 4(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs11, 0(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: call otherfoo@plt +; CHECK-RV32E-F-NEXT: lw ra, 164(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t0, 160(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t1, 156(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t2, 152(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a0, 148(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a1, 144(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a2, 140(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a3, 136(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a4, 132(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa0, 92(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa1, 88(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa2, 84(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa3, 80(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa4, 76(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa6, 68(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa7, 64(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft8, 60(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft9, 56(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft10, 52(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft11, 48(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs0, 44(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs1, 40(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs2, 36(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs3, 32(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs4, 28(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs5, 24(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs6, 20(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs7, 16(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs8, 12(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs9, 8(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs10, 4(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs11, 0(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: addi sp, sp, 168 +; CHECK-RV32E-F-NEXT: mret +; ; CHECK-RV64-LABEL: foo_with_call: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: addi sp, sp, -128 @@ -533,6 +663,215 @@ ; CHECK-RV64-FD-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: addi sp, sp, 384 ; CHECK-RV64-FD-NEXT: mret +; +; CHECK-RV64E-LABEL: foo_with_call: +; CHECK-RV64E: # %bb.0: +; CHECK-RV64E-NEXT: addi sp, sp, -80 +; CHECK-RV64E-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t0, 64(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t1, 56(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t2, 48(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a2, 24(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a3, 16(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a4, 8(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a5, 0(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: call otherfoo@plt +; CHECK-RV64E-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t0, 64(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t1, 56(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t2, 48(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a3, 16(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a4, 8(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a5, 0(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: addi sp, sp, 80 +; CHECK-RV64E-NEXT: mret +; +; CHECK-RV64E-F-LABEL: foo_with_call: +; CHECK-RV64E-F: # %bb.0: +; CHECK-RV64E-F-NEXT: addi sp, sp, -208 +; CHECK-RV64E-F-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t0, 192(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t1, 184(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t2, 176(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa0, 92(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa1, 88(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa2, 84(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa3, 80(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa4, 76(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa6, 68(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa7, 64(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft8, 60(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft9, 56(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft10, 52(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft11, 48(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs0, 44(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs1, 40(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs2, 36(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs3, 32(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs4, 28(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs5, 24(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs6, 20(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs7, 16(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs8, 12(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs9, 8(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs10, 4(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs11, 0(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: call otherfoo@plt +; CHECK-RV64E-F-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t0, 192(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t1, 184(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa0, 92(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa1, 88(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa2, 84(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa3, 80(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa4, 76(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa6, 68(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa7, 64(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft8, 60(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft9, 56(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft10, 52(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft11, 48(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs0, 44(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs1, 40(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs2, 36(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs3, 32(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs4, 28(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs5, 24(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs6, 20(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs7, 16(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs8, 12(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs9, 8(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs10, 4(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs11, 0(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: addi sp, sp, 208 +; CHECK-RV64E-F-NEXT: mret +; +; CHECK-RV64E-FD-LABEL: foo_with_call: +; CHECK-RV64E-FD: # %bb.0: +; CHECK-RV64E-FD-NEXT: addi sp, sp, -336 +; CHECK-RV64E-FD-NEXT: sd ra, 328(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t0, 320(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t1, 312(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t2, 304(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a0, 296(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a5, 256(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa0, 184(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa1, 176(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa2, 168(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa3, 160(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa4, 152(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa5, 144(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa6, 136(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa7, 128(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft8, 120(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft9, 112(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft10, 104(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft11, 96(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: call otherfoo@plt +; CHECK-RV64E-FD-NEXT: ld ra, 328(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t0, 320(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t1, 312(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t2, 304(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a3, 272(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a4, 264(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a5, 256(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa0, 184(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa1, 176(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa2, 168(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa3, 160(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa4, 152(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa5, 144(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa6, 136(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa7, 128(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft8, 120(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft9, 112(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft10, 104(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft11, 96(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: addi sp, sp, 336 +; CHECK-RV64E-FD-NEXT: mret %call = call i32 @otherfoo() ret void } @@ -796,6 +1135,130 @@ ; CHECK-RV32-FD-NEXT: addi sp, sp, 336 ; CHECK-RV32-FD-NEXT: mret ; +; CHECK-RV32E-LABEL: foo_fp_with_call: +; CHECK-RV32E: # %bb.0: +; CHECK-RV32E-NEXT: addi sp, sp, -44 +; CHECK-RV32E-NEXT: sw ra, 40(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t0, 36(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t1, 32(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw t2, 28(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a2, 12(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a3, 8(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a4, 4(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: sw a5, 0(sp) # 4-byte Folded Spill +; CHECK-RV32E-NEXT: addi s0, sp, 44 +; CHECK-RV32E-NEXT: call otherfoo@plt +; CHECK-RV32E-NEXT: lw ra, 40(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t0, 36(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t1, 32(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw t2, 28(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a1, 16(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a2, 12(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a4, 4(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: lw a5, 0(sp) # 4-byte Folded Reload +; CHECK-RV32E-NEXT: addi sp, sp, 44 +; CHECK-RV32E-NEXT: mret +; +; CHECK-RV32E-F-LABEL: foo_fp_with_call: +; CHECK-RV32E-F: # %bb.0: +; CHECK-RV32E-F-NEXT: addi sp, sp, -172 +; CHECK-RV32E-F-NEXT: sw ra, 168(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t0, 164(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t1, 160(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw t2, 156(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw s0, 152(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a0, 148(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a2, 140(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a3, 136(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a4, 132(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: sw a5, 128(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa0, 92(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa1, 88(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa2, 84(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa3, 80(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa4, 76(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa6, 68(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fa7, 64(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft8, 60(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft9, 56(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft10, 52(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw ft11, 48(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs0, 44(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs1, 40(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs2, 36(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs3, 32(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs4, 28(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs5, 24(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs6, 20(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs7, 16(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs8, 12(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs9, 8(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs10, 4(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: fsw fs11, 0(sp) # 4-byte Folded Spill +; CHECK-RV32E-F-NEXT: addi s0, sp, 172 +; CHECK-RV32E-F-NEXT: call otherfoo@plt +; CHECK-RV32E-F-NEXT: lw ra, 168(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t0, 164(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t1, 160(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw t2, 156(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw s0, 152(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a0, 148(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a1, 144(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a2, 140(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a3, 136(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a4, 132(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa0, 92(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa1, 88(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa2, 84(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa3, 80(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa4, 76(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa6, 68(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fa7, 64(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft8, 60(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft9, 56(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft10, 52(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw ft11, 48(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs0, 44(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs1, 40(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs2, 36(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs3, 32(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs4, 28(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs5, 24(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs6, 20(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs7, 16(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs8, 12(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs9, 8(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs10, 4(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: flw fs11, 0(sp) # 4-byte Folded Reload +; CHECK-RV32E-F-NEXT: addi sp, sp, 172 +; CHECK-RV32E-F-NEXT: mret +; ; CHECK-RV64-LABEL: foo_fp_with_call: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: addi sp, sp, -144 @@ -1049,6 +1512,224 @@ ; CHECK-RV64-FD-NEXT: fld fs11, 8(sp) # 8-byte Folded Reload ; CHECK-RV64-FD-NEXT: addi sp, sp, 400 ; CHECK-RV64-FD-NEXT: mret +; +; CHECK-RV64E-LABEL: foo_fp_with_call: +; CHECK-RV64E: # %bb.0: +; CHECK-RV64E-NEXT: addi sp, sp, -88 +; CHECK-RV64E-NEXT: sd ra, 80(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t0, 72(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t1, 64(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd t2, 56(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a2, 24(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a3, 16(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a4, 8(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: sd a5, 0(sp) # 8-byte Folded Spill +; CHECK-RV64E-NEXT: addi s0, sp, 88 +; CHECK-RV64E-NEXT: call otherfoo@plt +; CHECK-RV64E-NEXT: ld ra, 80(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t0, 72(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t1, 64(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld t2, 56(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a3, 16(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a4, 8(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: ld a5, 0(sp) # 8-byte Folded Reload +; CHECK-RV64E-NEXT: addi sp, sp, 88 +; CHECK-RV64E-NEXT: mret +; +; CHECK-RV64E-F-LABEL: foo_fp_with_call: +; CHECK-RV64E-F: # %bb.0: +; CHECK-RV64E-F-NEXT: addi sp, sp, -216 +; CHECK-RV64E-F-NEXT: sd ra, 208(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t0, 200(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t1, 192(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd t2, 184(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: sd a5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa0, 92(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa1, 88(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa2, 84(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa3, 80(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa4, 76(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa6, 68(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fa7, 64(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft8, 60(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft9, 56(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft10, 52(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw ft11, 48(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs0, 44(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs1, 40(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs2, 36(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs3, 32(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs4, 28(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs5, 24(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs6, 20(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs7, 16(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs8, 12(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs9, 8(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs10, 4(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: fsw fs11, 0(sp) # 4-byte Folded Spill +; CHECK-RV64E-F-NEXT: addi s0, sp, 216 +; CHECK-RV64E-F-NEXT: call otherfoo@plt +; CHECK-RV64E-F-NEXT: ld ra, 208(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t0, 200(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t1, 192(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: ld a5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa0, 92(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa1, 88(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa2, 84(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa3, 80(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa4, 76(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa6, 68(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fa7, 64(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft8, 60(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft9, 56(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft10, 52(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw ft11, 48(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs0, 44(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs1, 40(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs2, 36(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs3, 32(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs4, 28(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs5, 24(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs6, 20(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs7, 16(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs8, 12(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs9, 8(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs10, 4(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: flw fs11, 0(sp) # 4-byte Folded Reload +; CHECK-RV64E-F-NEXT: addi sp, sp, 216 +; CHECK-RV64E-F-NEXT: mret +; +; CHECK-RV64E-FD-LABEL: foo_fp_with_call: +; CHECK-RV64E-FD: # %bb.0: +; CHECK-RV64E-FD-NEXT: addi sp, sp, -344 +; CHECK-RV64E-FD-NEXT: sd ra, 336(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t0, 328(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t1, 320(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd t2, 312(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd s0, 304(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a0, 296(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: sd a5, 256(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa0, 184(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa1, 176(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa2, 168(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa3, 160(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa4, 152(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa5, 144(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa6, 136(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fa7, 128(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft8, 120(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft9, 112(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft10, 104(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd ft11, 96(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill +; CHECK-RV64E-FD-NEXT: addi s0, sp, 344 +; CHECK-RV64E-FD-NEXT: call otherfoo@plt +; CHECK-RV64E-FD-NEXT: ld ra, 336(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t0, 328(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t1, 320(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld t2, 312(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld s0, 304(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a3, 272(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a4, 264(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: ld a5, 256(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa0, 184(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa1, 176(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa2, 168(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa3, 160(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa4, 152(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa5, 144(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa6, 136(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fa7, 128(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft8, 120(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft9, 112(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft10, 104(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld ft11, 96(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload +; CHECK-RV64E-FD-NEXT: addi sp, sp, 344 +; CHECK-RV64E-FD-NEXT: mret %call = call i32 @otherfoo() ret void } diff --git a/llvm/test/CodeGen/RISCV/rv32e.ll b/llvm/test/CodeGen/RISCV/rv32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32e.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +; TODO: Add more tests. + +define i32 @exhausted(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) { +; CHECK-LABEL: exhausted: +; CHECK: # %bb.0: +; CHECK-NEXT: lw t0, 0(sp) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a4, a5, a4 +; CHECK-NEXT: add a0, a4, a0 +; CHECK-NEXT: add a0, t0, a0 +; CHECK-NEXT: ret + %1 = add i32 %a, %b + %2 = add i32 %c, %1 + %3 = add i32 %d, %2 + %4 = add i32 %e, %3 + %5 = add i32 %f, %4 + %6 = add i32 %g, %5 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/RISCV/rv64e.ll b/llvm/test/CodeGen/RISCV/rv64e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64e.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv64 -mattr=+e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +; TODO: Add more tests. + +define i64 @exhausted(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) { +; CHECK-LABEL: exhausted: +; CHECK: # %bb.0: +; CHECK-NEXT: ld t0, 0(sp) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a4, a5, a4 +; CHECK-NEXT: add a0, a4, a0 +; CHECK-NEXT: add a0, t0, a0 +; CHECK-NEXT: ret + %1 = add i64 %a, %b + %2 = add i64 %c, %1 + %3 = add i64 %d, %2 + %4 = add i64 %e, %3 + %5 = add i64 %f, %4 + %6 = add i64 %g, %5 + ret i64 %6 +} diff --git a/llvm/test/CodeGen/RISCV/rve.ll b/llvm/test/CodeGen/RISCV/rve.ll deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rve.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: not --crash llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -mtriple=riscv64 -mattr=+e < %s 2>&1 | FileCheck %s - -; CHECK: LLVM ERROR: Codegen not yet implemented for RVE - -define void @nothing() nounwind { - ret void -} diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-LP64E declare void @callee(ptr, ptr) @@ -34,6 +38,33 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: .cfi_offset s1, -12 +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv s1, sp +; RV32I-ILP32E-NEXT: addi a0, a0, 3 +; RV32I-ILP32E-NEXT: andi a0, a0, -4 +; RV32I-ILP32E-NEXT: sub a0, sp, a0 +; RV32I-ILP32E-NEXT: mv sp, a0 +; RV32I-ILP32E-NEXT: mv a1, s1 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 @@ -62,6 +93,35 @@ ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -64 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 64 +; RV64I-LP64E-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: .cfi_offset s1, -24 +; RV64I-LP64E-NEXT: addi s0, sp, 64 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -64 +; RV64I-LP64E-NEXT: mv s1, sp +; RV64I-LP64E-NEXT: slli a0, a0, 32 +; RV64I-LP64E-NEXT: srli a0, a0, 32 +; RV64I-LP64E-NEXT: addi a0, a0, 7 +; RV64I-LP64E-NEXT: andi a0, a0, -8 +; RV64I-LP64E-NEXT: sub a0, sp, a0 +; RV64I-LP64E-NEXT: mv sp, a0 +; RV64I-LP64E-NEXT: mv a1, s1 +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: addi sp, s0, -64 +; RV64I-LP64E-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 64 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, i32 %n %2 = alloca i32, align 64 call void @callee(ptr %1, ptr %2) diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll @@ -1,11 +1,135 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32E ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-LP64E declare void @callee(ptr) +define void @caller16() { +; RV32I-LABEL: caller16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-ILP32E-LABEL: caller16: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -16 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 16 +; RV32I-ILP32E-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 16 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -16 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -16 +; RV32I-ILP32E-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 16 +; RV32I-ILP32E-NEXT: ret +; +; RV64I-LABEL: caller16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller16: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -32 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 32 +; RV64I-LP64E-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 32 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -16 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: addi sp, s0, -32 +; RV64I-LP64E-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 32 +; RV64I-LP64E-NEXT: ret + %1 = alloca i8, align 16 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign16() "no-realign-stack" { +; RV32I-LABEL: caller_no_realign16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-ILP32E-LABEL: caller_no_realign16: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; +; RV64I-LABEL: caller_no_realign16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign16: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret + %1 = alloca i8, align 16 + call void @callee(i8* %1) + ret void +} + define void @caller32() { ; RV32I-LABEL: caller32: ; RV32I: # %bb.0: @@ -26,6 +150,25 @@ ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -32 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 32 +; RV32I-ILP32E-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 32 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -32 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -32 +; RV32I-ILP32E-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 32 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 @@ -44,6 +187,25 @@ ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller32: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -32 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 32 +; RV64I-LP64E-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 32 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -32 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: addi sp, s0, -32 +; RV64I-LP64E-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 32 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 32 call void @callee(ptr %1) ret void @@ -62,6 +224,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign32: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign32: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -73,6 +247,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign32: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 32 call void @callee(ptr %1) ret void @@ -98,6 +284,25 @@ ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 64 +; RV32I-ILP32E-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 64 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -64 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -64 +; RV32I-ILP32E-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 64 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 @@ -116,6 +321,25 @@ ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller64: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -64 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 64 +; RV64I-LP64E-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 64 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -64 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: addi sp, s0, -64 +; RV64I-LP64E-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 64 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 64 call void @callee(ptr %1) ret void @@ -134,6 +358,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign64: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -145,6 +381,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign64: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 64 call void @callee(ptr %1) ret void @@ -170,6 +418,25 @@ ; RV32I-NEXT: addi sp, sp, 128 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 128 +; RV32I-ILP32E-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 128 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -128 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -128 +; RV32I-ILP32E-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 128 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -128 @@ -188,6 +455,25 @@ ; RV64I-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 128 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller128: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -128 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 128 +; RV64I-LP64E-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 128 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -128 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: addi sp, s0, -128 +; RV64I-LP64E-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 128 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 128 call void @callee(ptr %1) ret void @@ -206,6 +492,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign128: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign128: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -217,6 +515,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign128: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 128 call void @callee(ptr %1) ret void @@ -242,6 +552,25 @@ ; RV32I-NEXT: addi sp, sp, 256 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -256 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 256 +; RV32I-ILP32E-NEXT: sw ra, 252(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 248(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 256 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -256 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -256 +; RV32I-ILP32E-NEXT: lw ra, 252(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 248(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 256 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -256 @@ -260,6 +589,25 @@ ; RV64I-NEXT: ld s0, 240(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 256 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller256: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -256 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 256 +; RV64I-LP64E-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 256 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -256 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: addi sp, s0, -256 +; RV64I-LP64E-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 256 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 256 call void @callee(ptr %1) ret void @@ -278,6 +626,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign256: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign256: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -289,6 +649,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign256: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 256 call void @callee(ptr %1) ret void @@ -314,6 +686,25 @@ ; RV32I-NEXT: addi sp, sp, 1024 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -1024 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 1024 +; RV32I-ILP32E-NEXT: sw ra, 1020(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 1016(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 1024 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: andi sp, sp, -512 +; RV32I-ILP32E-NEXT: addi a0, sp, 512 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -1024 +; RV32I-ILP32E-NEXT: lw ra, 1020(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 1016(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 1024 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -1024 @@ -332,6 +723,25 @@ ; RV64I-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 1024 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller512: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -1024 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 1024 +; RV64I-LP64E-NEXT: sd ra, 1016(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 1008(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 1024 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: andi sp, sp, -512 +; RV64I-LP64E-NEXT: addi a0, sp, 512 +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: addi sp, s0, -1024 +; RV64I-LP64E-NEXT: ld ra, 1016(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 1024 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 512 call void @callee(ptr %1) ret void @@ -350,6 +760,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign512: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign512: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -361,6 +783,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign512: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 512 call void @callee(ptr %1) ret void @@ -388,6 +822,27 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: andi sp, sp, -1024 +; RV32I-ILP32E-NEXT: addi a0, sp, 1024 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: addi sp, s0, -2048 +; RV32I-ILP32E-NEXT: addi sp, sp, 4 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -408,6 +863,27 @@ ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller1024: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040 +; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: addi sp, sp, -8 +; RV64I-LP64E-NEXT: andi sp, sp, -1024 +; RV64I-LP64E-NEXT: addi a0, sp, 1024 +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: addi sp, s0, -2048 +; RV64I-LP64E-NEXT: addi sp, sp, 8 +; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 2040 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 1024 call void @callee(ptr %1) ret void @@ -426,6 +902,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign1024: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign1024: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -437,6 +925,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign1024: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 1024 call void @callee(ptr %1) ret void @@ -468,6 +968,31 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: addi sp, sp, -2048 +; RV32I-ILP32E-NEXT: addi sp, sp, -4 +; RV32I-ILP32E-NEXT: andi sp, sp, -2048 +; RV32I-ILP32E-NEXT: addi a0, sp, 2047 +; RV32I-ILP32E-NEXT: addi a0, a0, 1 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -492,6 +1017,31 @@ ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller2048: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040 +; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: addi sp, sp, -2048 +; RV64I-LP64E-NEXT: addi sp, sp, -8 +; RV64I-LP64E-NEXT: andi sp, sp, -2048 +; RV64I-LP64E-NEXT: addi a0, sp, 2047 +; RV64I-LP64E-NEXT: addi a0, a0, 1 +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: lui a0, 1 +; RV64I-LP64E-NEXT: sub sp, s0, a0 +; RV64I-LP64E-NEXT: addi sp, sp, 2040 +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 2040 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 2048 call void @callee(ptr %1) ret void @@ -510,6 +1060,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign2048: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign2048: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -521,6 +1083,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign2048: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 2048 call void @callee(ptr %1) ret void @@ -554,6 +1128,33 @@ ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 2044 +; RV32I-ILP32E-NEXT: sw ra, 2040(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: sw s0, 2036(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 +; RV32I-ILP32E-NEXT: addi s0, sp, 2044 +; RV32I-ILP32E-NEXT: .cfi_def_cfa s0, 0 +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: sub sp, sp, a0 +; RV32I-ILP32E-NEXT: srli a0, sp, 12 +; RV32I-ILP32E-NEXT: slli sp, a0, 12 +; RV32I-ILP32E-NEXT: lui a0, 1 +; RV32I-ILP32E-NEXT: add a0, sp, a0 +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lui a0, 2 +; RV32I-ILP32E-NEXT: sub sp, s0, a0 +; RV32I-ILP32E-NEXT: addi a0, a0, -2044 +; RV32I-ILP32E-NEXT: add sp, sp, a0 +; RV32I-ILP32E-NEXT: lw ra, 2040(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: lw s0, 2036(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 2044 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 @@ -580,6 +1181,33 @@ ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller4096: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 2040 +; RV64I-LP64E-NEXT: sd ra, 2032(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: sd s0, 2024(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: .cfi_offset s0, -16 +; RV64I-LP64E-NEXT: addi s0, sp, 2040 +; RV64I-LP64E-NEXT: .cfi_def_cfa s0, 0 +; RV64I-LP64E-NEXT: lui a0, 2 +; RV64I-LP64E-NEXT: addiw a0, a0, -2040 +; RV64I-LP64E-NEXT: sub sp, sp, a0 +; RV64I-LP64E-NEXT: srli a0, sp, 12 +; RV64I-LP64E-NEXT: slli sp, a0, 12 +; RV64I-LP64E-NEXT: lui a0, 1 +; RV64I-LP64E-NEXT: add a0, sp, a0 +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: lui a0, 2 +; RV64I-LP64E-NEXT: sub sp, s0, a0 +; RV64I-LP64E-NEXT: addiw a0, a0, -2040 +; RV64I-LP64E-NEXT: add sp, sp, a0 +; RV64I-LP64E-NEXT: ld ra, 2032(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: ld s0, 2024(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 2040 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 4096 call void @callee(ptr %1) ret void @@ -598,6 +1226,18 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32I-ILP32E-LABEL: caller_no_realign4096: +; RV32I-ILP32E: # %bb.0: +; RV32I-ILP32E-NEXT: addi sp, sp, -8 +; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 8 +; RV32I-ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 +; RV32I-ILP32E-NEXT: mv a0, sp +; RV32I-ILP32E-NEXT: call callee@plt +; RV32I-ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-ILP32E-NEXT: addi sp, sp, 8 +; RV32I-ILP32E-NEXT: ret +; ; RV64I-LABEL: caller_no_realign4096: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -609,6 +1249,18 @@ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-LP64E-LABEL: caller_no_realign4096: +; RV64I-LP64E: # %bb.0: +; RV64I-LP64E-NEXT: addi sp, sp, -16 +; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 16 +; RV64I-LP64E-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LP64E-NEXT: .cfi_offset ra, -8 +; RV64I-LP64E-NEXT: mv a0, sp +; RV64I-LP64E-NEXT: call callee@plt +; RV64I-LP64E-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LP64E-NEXT: addi sp, sp, 16 +; RV64I-LP64E-NEXT: ret %1 = alloca i8, align 4096 call void @callee(ptr %1) ret void diff --git a/llvm/test/CodeGen/RISCV/target-abi-valid.ll b/llvm/test/CodeGen/RISCV/target-abi-valid.ll --- a/llvm/test/CodeGen/RISCV/target-abi-valid.ll +++ b/llvm/test/CodeGen/RISCV/target-abi-valid.ll @@ -2,6 +2,8 @@ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32 < %s \ @@ -10,6 +12,8 @@ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv64 -target-abi lp64 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s +; RUN: llc -mtriple=riscv64 -target-abi lp64e < %s \ +; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64 < %s \ ; RUN: | FileCheck -check-prefix=CHECK-IMP %s ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64 < %s \ @@ -33,8 +37,3 @@ ; CHECK-IMP-NEXT: ret ret void } - -; RUN: not --crash llc -mtriple=riscv32 -target-abi ilp32e < %s 2>&1 \ -; RUN: | FileCheck -check-prefix=CHECK-UNIMP %s - -; CHECK-UNIMP: LLVM ERROR: Don't know how to lower this ABI diff --git a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare void @abort() + +define i32 @caller(i32 %a) { +; ILP32E-LABEL: caller: +; ILP32E: # %bb.0: # %entry +; ILP32E-NEXT: addi sp, sp, -8 +; ILP32E-NEXT: .cfi_def_cfa_offset 8 +; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-NEXT: .cfi_offset ra, -4 +; ILP32E-NEXT: .cfi_offset s0, -8 +; ILP32E-NEXT: mv s0, a0 +; ILP32E-NEXT: li a0, 1 +; ILP32E-NEXT: lui a2, 262144 +; ILP32E-NEXT: li a1, 0 +; ILP32E-NEXT: call va_double@plt +; ILP32E-NEXT: mv a0, s0 +; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 8 +; ILP32E-NEXT: ret +; +; ILP32E-WITHFP-LABEL: caller: +; ILP32E-WITHFP: # %bb.0: # %entry +; ILP32E-WITHFP-NEXT: addi sp, sp, -12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 12 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -4 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 +; ILP32E-WITHFP-NEXT: .cfi_offset s1, -12 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; ILP32E-WITHFP-NEXT: mv s1, a0 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: lui a2, 262144 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call va_double@plt +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 12 +; ILP32E-WITHFP-NEXT: ret +entry: + call void (i32, ...) @va_double(i32 1, double 2.000000e+00) + ret i32 %a +} + +define void @va_double(i32 %n, ...) { +; ILP32E-LABEL: va_double: +; ILP32E: # %bb.0: # %entry +; ILP32E-NEXT: addi sp, sp, -32 +; ILP32E-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-NEXT: .cfi_offset ra, -28 +; ILP32E-NEXT: sw a5, 28(sp) +; ILP32E-NEXT: sw a4, 24(sp) +; ILP32E-NEXT: sw a3, 20(sp) +; ILP32E-NEXT: sw a2, 16(sp) +; ILP32E-NEXT: sw a1, 12(sp) +; ILP32E-NEXT: addi a0, sp, 19 +; ILP32E-NEXT: andi a1, a0, -8 +; ILP32E-NEXT: addi a0, a1, 8 +; ILP32E-NEXT: sw a0, 0(sp) +; ILP32E-NEXT: lw a0, 0(a1) +; ILP32E-NEXT: lw a1, 4(a1) +; ILP32E-NEXT: lui a3, 262144 +; ILP32E-NEXT: li a2, 0 +; ILP32E-NEXT: call __eqdf2@plt +; ILP32E-NEXT: bnez a0, .LBB1_2 +; ILP32E-NEXT: # %bb.1: # %if.end +; ILP32E-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-NEXT: addi sp, sp, 32 +; ILP32E-NEXT: ret +; ILP32E-NEXT: .LBB1_2: # %if.then +; ILP32E-NEXT: call abort@plt +; +; ILP32E-WITHFP-LABEL: va_double: +; ILP32E-WITHFP: # %bb.0: # %entry +; ILP32E-WITHFP-NEXT: addi sp, sp, -36 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 36 +; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 12 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, a1, 8 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: lw a1, 4(a1) +; ILP32E-WITHFP-NEXT: lui a3, 262144 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: call __eqdf2@plt +; ILP32E-WITHFP-NEXT: bnez a0, .LBB1_2 +; ILP32E-WITHFP-NEXT: # %bb.1: # %if.end +; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 36 +; ILP32E-WITHFP-NEXT: ret +; ILP32E-WITHFP-NEXT: .LBB1_2: # %if.then +; ILP32E-WITHFP-NEXT: call abort@plt +entry: + %args = alloca i8*, align 4 + %args1 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %args1) + %argp.cur = load i8*, i8** %args, align 4 + %0 = ptrtoint i8* %argp.cur to i32 + %1 = add i32 %0, 7 + %2 = and i32 %1, -8 + %argp.cur.aligned = inttoptr i32 %2 to i8* + %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 8 + store i8* %argp.next, i8** %args, align 4 + %3 = bitcast i8* %argp.cur.aligned to double* + %4 = load double, double* %3, align 8 + %cmp = fcmp une double %4, 2.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + call void @abort() + unreachable + +if.end: + %args2 = bitcast i8** %args to i8* + call void @llvm.va_end(i8* %args2) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -11,6 +11,10 @@ ; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32D-ILP32-ILP32F-ILP32D-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-FPELIM %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=ILP32E-WITHFP %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s ; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64f \ @@ -21,6 +25,10 @@ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -frame-pointer=all < %s \ ; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-WITHFP %s +; RUN: llc -mtriple=riscv64 -target-abi lp64e -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=LP64E-FPELIM %s +; RUN: llc -mtriple=riscv64 -target-abi lp64e -frame-pointer=all -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=LP64E-WITHFP %s ; The same vararg calling convention is used for ilp32/ilp32f/ilp32d and for ; lp64/lp64f/lp64d. Different CHECK lines are required for RV32D due to slight @@ -97,6 +105,44 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va1: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 32 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: addi a1, sp, 16 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -138,6 +184,44 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: addi a0, sp, 28 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lw a0, 24(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: .cfi_def_cfa_offset 80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: .cfi_offset ra, -56 +; LP64E-WITHFP-NEXT: .cfi_offset s0, -64 +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48 +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 12 +; LP64E-WITHFP-NEXT: sd a0, -24(s0) +; LP64E-WITHFP-NEXT: lw a0, 8(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %argp.cur = load ptr, ptr %va, align 4 @@ -202,6 +286,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va1_va_arg: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: addi a1, sp, 16 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -238,6 +355,39 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 32 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 16 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, i32 @@ -338,6 +488,62 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va1_va_arg_alloca: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -40 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: addi s0, sp, 16 +; ILP32E-FPELIM-NEXT: mv s1, a1 +; ILP32E-FPELIM-NEXT: sw a5, 20(s0) +; ILP32E-FPELIM-NEXT: sw a4, 16(s0) +; ILP32E-FPELIM-NEXT: sw a3, 12(s0) +; ILP32E-FPELIM-NEXT: sw a2, 8(s0) +; ILP32E-FPELIM-NEXT: sw a1, 4(s0) +; ILP32E-FPELIM-NEXT: addi a0, s0, 8 +; ILP32E-FPELIM-NEXT: sw a0, -16(s0) +; ILP32E-FPELIM-NEXT: addi a0, a1, 3 +; ILP32E-FPELIM-NEXT: andi a0, a0, -4 +; ILP32E-FPELIM-NEXT: sub a0, sp, a0 +; ILP32E-FPELIM-NEXT: mv sp, a0 +; ILP32E-FPELIM-NEXT: call notdead@plt +; ILP32E-FPELIM-NEXT: mv a0, s1 +; ILP32E-FPELIM-NEXT: addi sp, s0, -16 +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 40 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_va_arg_alloca: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a1, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: sub a0, sp, a0 +; ILP32E-WITHFP-NEXT: mv sp, a0 +; ILP32E-WITHFP-NEXT: call notdead@plt +; ILP32E-WITHFP-NEXT: mv a0, s1 +; ILP32E-WITHFP-NEXT: addi sp, s0, -16 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 40 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg_alloca: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -401,6 +607,66 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_va_arg_alloca: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -80 +; LP64E-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: addi s0, sp, 32 +; LP64E-FPELIM-NEXT: mv s1, a1 +; LP64E-FPELIM-NEXT: sd a5, 40(s0) +; LP64E-FPELIM-NEXT: sd a4, 32(s0) +; LP64E-FPELIM-NEXT: sd a3, 24(s0) +; LP64E-FPELIM-NEXT: sd a2, 16(s0) +; LP64E-FPELIM-NEXT: sd a1, 8(s0) +; LP64E-FPELIM-NEXT: addi a0, s0, 16 +; LP64E-FPELIM-NEXT: sd a0, -32(s0) +; LP64E-FPELIM-NEXT: slli a0, a1, 32 +; LP64E-FPELIM-NEXT: srli a0, a0, 32 +; LP64E-FPELIM-NEXT: addi a0, a0, 7 +; LP64E-FPELIM-NEXT: andi a0, a0, -8 +; LP64E-FPELIM-NEXT: sub a0, sp, a0 +; LP64E-FPELIM-NEXT: mv sp, a0 +; LP64E-FPELIM-NEXT: call notdead@plt +; LP64E-FPELIM-NEXT: mv a0, s1 +; LP64E-FPELIM-NEXT: addi sp, s0, -32 +; LP64E-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 80 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_va_arg_alloca: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv s1, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 16 +; LP64E-WITHFP-NEXT: sd a0, -32(s0) +; LP64E-WITHFP-NEXT: slli a0, a1, 32 +; LP64E-WITHFP-NEXT: srli a0, a0, 32 +; LP64E-WITHFP-NEXT: addi a0, a0, 7 +; LP64E-WITHFP-NEXT: andi a0, a0, -8 +; LP64E-WITHFP-NEXT: sub a0, sp, a0 +; LP64E-WITHFP-NEXT: mv sp, a0 +; LP64E-WITHFP-NEXT: call notdead@plt +; LP64E-WITHFP-NEXT: mv a0, s1 +; LP64E-WITHFP-NEXT: addi sp, s0, -32 +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, i32 @@ -451,6 +717,33 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va1_caller: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: lui a2, 261888 +; ILP32E-FPELIM-NEXT: li a3, 2 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: call va1@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va1_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a2, 261888 +; ILP32E-WITHFP-NEXT: li a3, 2 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call va1@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va1_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -477,6 +770,33 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a1, 1023 +; LP64E-FPELIM-NEXT: slli a1, a1, 52 +; LP64E-FPELIM-NEXT: li a2, 2 +; LP64E-FPELIM-NEXT: call va1@plt +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a1, 1023 +; LP64E-WITHFP-NEXT: slli a1, a1, 52 +; LP64E-WITHFP-NEXT: li a2, 2 +; LP64E-WITHFP-NEXT: call va1@plt +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret %1 = call i32 (ptr, ...) @va1(ptr undef, double 1.0, i32 2) ret void } @@ -547,6 +867,45 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va2: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 19 +; ILP32E-FPELIM-NEXT: andi a0, a0, -8 +; ILP32E-FPELIM-NEXT: addi a1, sp, 27 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(a0) +; ILP32E-FPELIM-NEXT: lw a0, 0(a0) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a1, s0, 19 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(a0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -599,6 +958,55 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a0, sp, 24 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lw a0, 8(sp) +; LP64E-FPELIM-NEXT: addiw a0, a0, 7 +; LP64E-FPELIM-NEXT: slli a1, a0, 32 +; LP64E-FPELIM-NEXT: srli a1, a1, 32 +; LP64E-FPELIM-NEXT: addi a1, a1, 8 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: srliw a0, a0, 3 +; LP64E-FPELIM-NEXT: slli a0, a0, 3 +; LP64E-FPELIM-NEXT: ld a0, 0(a0) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 8 +; LP64E-WITHFP-NEXT: sd a0, -24(s0) +; LP64E-WITHFP-NEXT: lw a0, -24(s0) +; LP64E-WITHFP-NEXT: addiw a0, a0, 7 +; LP64E-WITHFP-NEXT: slli a1, a0, 32 +; LP64E-WITHFP-NEXT: srli a1, a1, 32 +; LP64E-WITHFP-NEXT: addi a1, a1, 8 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: srliw a0, a0, 3 +; LP64E-WITHFP-NEXT: slli a0, a0, 3 +; LP64E-WITHFP-NEXT: ld a0, 0(a0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %argp.cur = load i32, ptr %va, align 4 @@ -683,6 +1091,49 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va2_va_arg: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 19 +; ILP32E-FPELIM-NEXT: andi a1, a0, -8 +; ILP32E-FPELIM-NEXT: addi a0, a1, 4 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: lw a0, 0(a1) +; ILP32E-FPELIM-NEXT: addi a2, a1, 8 +; ILP32E-FPELIM-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-NEXT: lw a1, 4(a1) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, a1, 4 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: addi a2, a1, 8 +; ILP32E-WITHFP-NEXT: sw a2, -12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 4(a1) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -719,6 +1170,39 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 32 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 16 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, double @@ -764,6 +1248,31 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va2_caller: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: lui a2, 261888 +; ILP32E-FPELIM-NEXT: li a1, 0 +; ILP32E-FPELIM-NEXT: call va2@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va2_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: lui a2, 261888 +; ILP32E-WITHFP-NEXT: li a1, 0 +; ILP32E-WITHFP-NEXT: call va2@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va2_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -788,6 +1297,31 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a1, 1023 +; LP64E-FPELIM-NEXT: slli a1, a1, 52 +; LP64E-FPELIM-NEXT: call va2@plt +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a1, 1023 +; LP64E-WITHFP-NEXT: slli a1, a1, 52 +; LP64E-WITHFP-NEXT: call va2@plt +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret %1 = call i64 (ptr, ...) @va2(ptr undef, double 1.000000e+00) ret void } @@ -864,6 +1398,49 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va3: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 27 +; ILP32E-FPELIM-NEXT: andi a0, a0, -8 +; ILP32E-FPELIM-NEXT: addi a3, sp, 35 +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-NEXT: lw a0, 0(a0) +; ILP32E-FPELIM-NEXT: add a2, a2, a3 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: sltu a1, a0, a1 +; ILP32E-FPELIM-NEXT: add a1, a2, a1 +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a3, s0, 19 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: add a2, a2, a3 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -916,6 +1493,55 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -48 +; LP64E-FPELIM-NEXT: sd a5, 40(sp) +; LP64E-FPELIM-NEXT: sd a4, 32(sp) +; LP64E-FPELIM-NEXT: sd a3, 24(sp) +; LP64E-FPELIM-NEXT: sd a2, 16(sp) +; LP64E-FPELIM-NEXT: addi a0, sp, 16 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lw a0, 8(sp) +; LP64E-FPELIM-NEXT: addiw a0, a0, 7 +; LP64E-FPELIM-NEXT: slli a2, a0, 32 +; LP64E-FPELIM-NEXT: srli a2, a2, 32 +; LP64E-FPELIM-NEXT: addi a2, a2, 8 +; LP64E-FPELIM-NEXT: sd a2, 8(sp) +; LP64E-FPELIM-NEXT: srliw a0, a0, 3 +; LP64E-FPELIM-NEXT: slli a0, a0, 3 +; LP64E-FPELIM-NEXT: ld a0, 0(a0) +; LP64E-FPELIM-NEXT: add a0, a1, a0 +; LP64E-FPELIM-NEXT: addi sp, sp, 48 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 0(s0) +; LP64E-WITHFP-NEXT: mv a0, s0 +; LP64E-WITHFP-NEXT: sd a0, -24(s0) +; LP64E-WITHFP-NEXT: lw a0, -24(s0) +; LP64E-WITHFP-NEXT: addiw a0, a0, 7 +; LP64E-WITHFP-NEXT: slli a2, a0, 32 +; LP64E-WITHFP-NEXT: srli a2, a2, 32 +; LP64E-WITHFP-NEXT: addi a2, a2, 8 +; LP64E-WITHFP-NEXT: sd a2, -24(s0) +; LP64E-WITHFP-NEXT: srliw a0, a0, 3 +; LP64E-WITHFP-NEXT: slli a0, a0, 3 +; LP64E-WITHFP-NEXT: ld a0, 0(a0) +; LP64E-WITHFP-NEXT: add a0, a1, a0 +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %argp.cur = load i32, ptr %va, align 4 @@ -1007,6 +1633,53 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va3_va_arg: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 27 +; ILP32E-FPELIM-NEXT: andi a0, a0, -8 +; ILP32E-FPELIM-NEXT: addi a3, a0, 4 +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) +; ILP32E-FPELIM-NEXT: lw a3, 0(a0) +; ILP32E-FPELIM-NEXT: addi a4, a0, 8 +; ILP32E-FPELIM-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-NEXT: add a0, a1, a3 +; ILP32E-FPELIM-NEXT: sltu a1, a0, a1 +; ILP32E-FPELIM-NEXT: add a2, a2, a4 +; ILP32E-FPELIM-NEXT: add a1, a2, a1 +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_va_arg: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -32 +; ILP32E-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 16 +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 11 +; ILP32E-WITHFP-NEXT: andi a0, a0, -8 +; ILP32E-WITHFP-NEXT: addi a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -12(s0) +; ILP32E-WITHFP-NEXT: lw a3, 0(a0) +; ILP32E-WITHFP-NEXT: addi a4, a0, 8 +; ILP32E-WITHFP-NEXT: sw a4, -12(s0) +; ILP32E-WITHFP-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-NEXT: add a0, a1, a3 +; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32E-WITHFP-NEXT: add a2, a2, a4 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 +; ILP32E-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 @@ -1041,6 +1714,37 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -48 +; LP64E-FPELIM-NEXT: sd a5, 40(sp) +; LP64E-FPELIM-NEXT: sd a4, 32(sp) +; LP64E-FPELIM-NEXT: sd a3, 24(sp) +; LP64E-FPELIM-NEXT: sd a2, 16(sp) +; LP64E-FPELIM-NEXT: addi a3, sp, 24 +; LP64E-FPELIM-NEXT: add a0, a1, a2 +; LP64E-FPELIM-NEXT: sd a3, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 48 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 0(s0) +; LP64E-WITHFP-NEXT: addi a3, s0, 8 +; LP64E-WITHFP-NEXT: add a0, a1, a2 +; LP64E-WITHFP-NEXT: sd a3, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, double @@ -1096,6 +1800,37 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va3_caller: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -4 +; ILP32E-FPELIM-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: li a0, 2 +; ILP32E-FPELIM-NEXT: li a1, 1111 +; ILP32E-FPELIM-NEXT: lui a4, 262144 +; ILP32E-FPELIM-NEXT: li a2, 0 +; ILP32E-FPELIM-NEXT: li a3, 0 +; ILP32E-FPELIM-NEXT: call va3@plt +; ILP32E-FPELIM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 4 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va3_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -8 +; ILP32E-WITHFP-NEXT: sw ra, 4(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 0(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 8 +; ILP32E-WITHFP-NEXT: li a0, 2 +; ILP32E-WITHFP-NEXT: li a1, 1111 +; ILP32E-WITHFP-NEXT: lui a4, 262144 +; ILP32E-WITHFP-NEXT: li a2, 0 +; ILP32E-WITHFP-NEXT: li a3, 0 +; ILP32E-WITHFP-NEXT: call va3@plt +; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 8 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 @@ -1124,6 +1859,35 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a2, 1 +; LP64E-FPELIM-NEXT: slli a2, a2, 62 +; LP64E-FPELIM-NEXT: li a0, 2 +; LP64E-FPELIM-NEXT: li a1, 1111 +; LP64E-FPELIM-NEXT: call va3@plt +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a2, 1 +; LP64E-WITHFP-NEXT: slli a2, a2, 62 +; LP64E-WITHFP-NEXT: li a0, 2 +; LP64E-WITHFP-NEXT: li a1, 1111 +; LP64E-WITHFP-NEXT: call va3@plt +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret %1 = call i64 (i32, i64, ...) @va3(i32 2, i64 1111, double 2.000000e+00) ret void } @@ -1257,6 +2021,87 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va4_va_copy: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -40 +; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: mv s0, a1 +; ILP32E-FPELIM-NEXT: sw a5, 36(sp) +; ILP32E-FPELIM-NEXT: sw a4, 32(sp) +; ILP32E-FPELIM-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: call notdead@plt +; ILP32E-FPELIM-NEXT: lw a0, 4(sp) +; ILP32E-FPELIM-NEXT: addi a0, a0, 3 +; ILP32E-FPELIM-NEXT: andi a0, a0, -4 +; ILP32E-FPELIM-NEXT: addi a1, a0, 4 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a1, 0(a0) +; ILP32E-FPELIM-NEXT: addi a0, a0, 7 +; ILP32E-FPELIM-NEXT: andi a0, a0, -4 +; ILP32E-FPELIM-NEXT: addi a2, a0, 4 +; ILP32E-FPELIM-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-NEXT: lw a2, 0(a0) +; ILP32E-FPELIM-NEXT: addi a0, a0, 7 +; ILP32E-FPELIM-NEXT: andi a0, a0, -4 +; ILP32E-FPELIM-NEXT: addi a3, a0, 4 +; ILP32E-FPELIM-NEXT: sw a3, 4(sp) +; ILP32E-FPELIM-NEXT: lw a0, 0(a0) +; ILP32E-FPELIM-NEXT: add a1, a1, s0 +; ILP32E-FPELIM-NEXT: add a1, a1, a2 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 +; ILP32E-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 40 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va4_va_copy: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -44 +; ILP32E-WITHFP-NEXT: sw ra, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 20 +; ILP32E-WITHFP-NEXT: mv s1, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a0, s0, 8 +; ILP32E-WITHFP-NEXT: sw a0, -16(s0) +; ILP32E-WITHFP-NEXT: sw a0, -20(s0) +; ILP32E-WITHFP-NEXT: call notdead@plt +; ILP32E-WITHFP-NEXT: lw a0, -16(s0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 3 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a1, a0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -16(s0) +; ILP32E-WITHFP-NEXT: lw a1, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a2, a0, 4 +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) +; ILP32E-WITHFP-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-NEXT: addi a0, a0, 7 +; ILP32E-WITHFP-NEXT: andi a0, a0, -4 +; ILP32E-WITHFP-NEXT: addi a3, a0, 4 +; ILP32E-WITHFP-NEXT: sw a3, -16(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: add a1, a1, s1 +; ILP32E-WITHFP-NEXT: add a1, a1, a2 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 +; ILP32E-WITHFP-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 44 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va4_va_copy: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 @@ -1341,6 +2186,87 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 112 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va4_va_copy: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -80 +; LP64E-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: mv s0, a1 +; LP64E-FPELIM-NEXT: sd a5, 72(sp) +; LP64E-FPELIM-NEXT: sd a4, 64(sp) +; LP64E-FPELIM-NEXT: sd a3, 56(sp) +; LP64E-FPELIM-NEXT: sd a2, 48(sp) +; LP64E-FPELIM-NEXT: sd a1, 40(sp) +; LP64E-FPELIM-NEXT: addi a0, sp, 48 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: sd a0, 0(sp) +; LP64E-FPELIM-NEXT: call notdead@plt +; LP64E-FPELIM-NEXT: ld a0, 8(sp) +; LP64E-FPELIM-NEXT: addi a0, a0, 3 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a1, a0, 8 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: ld a1, 0(a0) +; LP64E-FPELIM-NEXT: addi a0, a0, 11 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a2, a0, 8 +; LP64E-FPELIM-NEXT: sd a2, 8(sp) +; LP64E-FPELIM-NEXT: ld a2, 0(a0) +; LP64E-FPELIM-NEXT: addi a0, a0, 11 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a3, a0, 8 +; LP64E-FPELIM-NEXT: sd a3, 8(sp) +; LP64E-FPELIM-NEXT: ld a0, 0(a0) +; LP64E-FPELIM-NEXT: add a1, a1, s0 +; LP64E-FPELIM-NEXT: add a1, a1, a2 +; LP64E-FPELIM-NEXT: addw a0, a1, a0 +; LP64E-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 80 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va4_va_copy: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -88 +; LP64E-WITHFP-NEXT: sd ra, 32(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s1, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 40 +; LP64E-WITHFP-NEXT: mv s1, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 16 +; LP64E-WITHFP-NEXT: sd a0, -32(s0) +; LP64E-WITHFP-NEXT: sd a0, -40(s0) +; LP64E-WITHFP-NEXT: call notdead@plt +; LP64E-WITHFP-NEXT: ld a0, -32(s0) +; LP64E-WITHFP-NEXT: addi a0, a0, 3 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a1, a0, 8 +; LP64E-WITHFP-NEXT: sd a1, -32(s0) +; LP64E-WITHFP-NEXT: ld a1, 0(a0) +; LP64E-WITHFP-NEXT: addi a0, a0, 11 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a2, a0, 8 +; LP64E-WITHFP-NEXT: sd a2, -32(s0) +; LP64E-WITHFP-NEXT: ld a2, 0(a0) +; LP64E-WITHFP-NEXT: addi a0, a0, 11 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a3, a0, 8 +; LP64E-WITHFP-NEXT: sd a3, -32(s0) +; LP64E-WITHFP-NEXT: ld a0, 0(a0) +; LP64E-WITHFP-NEXT: add a1, a1, s1 +; LP64E-WITHFP-NEXT: add a1, a1, a2 +; LP64E-WITHFP-NEXT: addw a0, a1, a0 +; LP64E-WITHFP-NEXT: ld ra, 32(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s1, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 88 +; LP64E-WITHFP-NEXT: ret %vargs = alloca ptr %wargs = alloca ptr call void @llvm.va_start(ptr %vargs) @@ -1498,6 +2424,104 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va5_aligned_stack_caller: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -64 +; ILP32E-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-FPELIM-NEXT: addi s0, sp, 64 +; ILP32E-FPELIM-NEXT: andi sp, sp, -16 +; ILP32E-FPELIM-NEXT: li a0, 17 +; ILP32E-FPELIM-NEXT: sw a0, 24(sp) +; ILP32E-FPELIM-NEXT: li a0, 16 +; ILP32E-FPELIM-NEXT: sw a0, 20(sp) +; ILP32E-FPELIM-NEXT: li a0, 15 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262236 +; ILP32E-FPELIM-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lui a0, 377487 +; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: li a0, 14 +; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: li a0, 4 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262153 +; ILP32E-FPELIM-NEXT: addi a0, a0, 491 +; ILP32E-FPELIM-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-NEXT: lui a0, 545260 +; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 +; ILP32E-FPELIM-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-NEXT: lui a0, 964690 +; ILP32E-FPELIM-NEXT: addi a0, a0, -328 +; ILP32E-FPELIM-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-NEXT: lui a0, 335544 +; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: lui a0, 688509 +; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 +; ILP32E-FPELIM-NEXT: li a0, 1 +; ILP32E-FPELIM-NEXT: li a1, 11 +; ILP32E-FPELIM-NEXT: addi a2, sp, 32 +; ILP32E-FPELIM-NEXT: li a3, 12 +; ILP32E-FPELIM-NEXT: li a4, 13 +; ILP32E-FPELIM-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-NEXT: call va5_aligned_stack_callee@plt +; ILP32E-FPELIM-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-FPELIM-NEXT: addi sp, sp, 64 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va5_aligned_stack_caller: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -64 +; ILP32E-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 64 +; ILP32E-WITHFP-NEXT: andi sp, sp, -16 +; ILP32E-WITHFP-NEXT: li a0, 17 +; ILP32E-WITHFP-NEXT: sw a0, 24(sp) +; ILP32E-WITHFP-NEXT: li a0, 16 +; ILP32E-WITHFP-NEXT: sw a0, 20(sp) +; ILP32E-WITHFP-NEXT: li a0, 15 +; ILP32E-WITHFP-NEXT: sw a0, 16(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262236 +; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: lui a0, 377487 +; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: sw a0, 8(sp) +; ILP32E-WITHFP-NEXT: li a0, 14 +; ILP32E-WITHFP-NEXT: sw a0, 4(sp) +; ILP32E-WITHFP-NEXT: li a0, 4 +; ILP32E-WITHFP-NEXT: sw a0, 0(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a0, a0, 491 +; ILP32E-WITHFP-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 +; ILP32E-WITHFP-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi a0, a0, -328 +; ILP32E-WITHFP-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: lui a0, 688509 +; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 +; ILP32E-WITHFP-NEXT: li a0, 1 +; ILP32E-WITHFP-NEXT: li a1, 11 +; ILP32E-WITHFP-NEXT: addi a2, sp, 32 +; ILP32E-WITHFP-NEXT: li a3, 12 +; ILP32E-WITHFP-NEXT: li a4, 13 +; ILP32E-WITHFP-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee@plt +; ILP32E-WITHFP-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 64 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va5_aligned_stack_caller: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48 @@ -1560,6 +2584,73 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 48 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va5_aligned_stack_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -56 +; LP64E-FPELIM-NEXT: sd ra, 48(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a0, 17 +; LP64E-FPELIM-NEXT: sd a0, 40(sp) +; LP64E-FPELIM-NEXT: li a0, 16 +; LP64E-FPELIM-NEXT: lui a1, %hi(.LCPI11_0) +; LP64E-FPELIM-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; LP64E-FPELIM-NEXT: sd a0, 32(sp) +; LP64E-FPELIM-NEXT: li a0, 15 +; LP64E-FPELIM-NEXT: sd a0, 24(sp) +; LP64E-FPELIM-NEXT: sd a1, 16(sp) +; LP64E-FPELIM-NEXT: li a0, 14 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lui a0, 2384 +; LP64E-FPELIM-NEXT: addiw a0, a0, 761 +; LP64E-FPELIM-NEXT: slli a6, a0, 11 +; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_1) +; LP64E-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_2) +; LP64E-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(a0) +; LP64E-FPELIM-NEXT: li a0, 1 +; LP64E-FPELIM-NEXT: li a1, 11 +; LP64E-FPELIM-NEXT: li a4, 12 +; LP64E-FPELIM-NEXT: li a5, 13 +; LP64E-FPELIM-NEXT: sd a6, 0(sp) +; LP64E-FPELIM-NEXT: call va5_aligned_stack_callee@plt +; LP64E-FPELIM-NEXT: ld ra, 48(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 56 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va5_aligned_stack_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 64 +; LP64E-WITHFP-NEXT: li a0, 17 +; LP64E-WITHFP-NEXT: sd a0, 40(sp) +; LP64E-WITHFP-NEXT: li a0, 16 +; LP64E-WITHFP-NEXT: lui a1, %hi(.LCPI11_0) +; LP64E-WITHFP-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; LP64E-WITHFP-NEXT: sd a0, 32(sp) +; LP64E-WITHFP-NEXT: li a0, 15 +; LP64E-WITHFP-NEXT: sd a0, 24(sp) +; LP64E-WITHFP-NEXT: sd a1, 16(sp) +; LP64E-WITHFP-NEXT: li a0, 14 +; LP64E-WITHFP-NEXT: sd a0, 8(sp) +; LP64E-WITHFP-NEXT: lui a0, 2384 +; LP64E-WITHFP-NEXT: addiw a0, a0, 761 +; LP64E-WITHFP-NEXT: slli a6, a0, 11 +; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_1) +; LP64E-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_2) +; LP64E-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(a0) +; LP64E-WITHFP-NEXT: li a0, 1 +; LP64E-WITHFP-NEXT: li a1, 11 +; LP64E-WITHFP-NEXT: li a4, 12 +; LP64E-WITHFP-NEXT: li a5, 13 +; LP64E-WITHFP-NEXT: sd a6, 0(sp) +; LP64E-WITHFP-NEXT: call va5_aligned_stack_callee@plt +; LP64E-WITHFP-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret %1 = call i32 (i32, ...) @va5_aligned_stack_callee(i32 1, i32 11, fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, i64 20000000000, i32 14, double 2.720000e+00, i32 15, [2 x i32] [i32 16, i32 17]) @@ -1624,6 +2715,39 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va6_no_fixed_args: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: addi sp, sp, -32 +; ILP32E-FPELIM-NEXT: sw a5, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: sw a0, 8(sp) +; ILP32E-FPELIM-NEXT: addi a1, sp, 12 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va6_no_fixed_args: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -48 +; ILP32E-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a0, 0(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 4 +; ILP32E-WITHFP-NEXT: sw a1, -12(s0) +; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 @@ -1660,6 +2784,39 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va6_no_fixed_args: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: sd a0, 16(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 24 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va6_no_fixed_args: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a0, 0(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 8 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret %va = alloca ptr call void @llvm.va_start(ptr %va) %1 = va_arg ptr %va, i32 @@ -1777,6 +2934,68 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add sp, sp, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; +; ILP32E-FPELIM-LABEL: va_large_stack: +; ILP32E-FPELIM: # %bb.0: +; ILP32E-FPELIM-NEXT: lui a0, 24414 +; ILP32E-FPELIM-NEXT: addi a0, a0, 288 +; ILP32E-FPELIM-NEXT: sub sp, sp, a0 +; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 100000032 +; ILP32E-FPELIM-NEXT: mv a0, a1 +; ILP32E-FPELIM-NEXT: lui a6, 24414 +; ILP32E-FPELIM-NEXT: add a6, sp, a6 +; ILP32E-FPELIM-NEXT: sw a5, 284(a6) +; ILP32E-FPELIM-NEXT: lui a5, 24414 +; ILP32E-FPELIM-NEXT: add a5, sp, a5 +; ILP32E-FPELIM-NEXT: sw a4, 280(a5) +; ILP32E-FPELIM-NEXT: lui a4, 24414 +; ILP32E-FPELIM-NEXT: add a4, sp, a4 +; ILP32E-FPELIM-NEXT: sw a3, 276(a4) +; ILP32E-FPELIM-NEXT: lui a3, 24414 +; ILP32E-FPELIM-NEXT: add a3, sp, a3 +; ILP32E-FPELIM-NEXT: sw a2, 272(a3) +; ILP32E-FPELIM-NEXT: lui a2, 24414 +; ILP32E-FPELIM-NEXT: add a2, sp, a2 +; ILP32E-FPELIM-NEXT: sw a1, 268(a2) +; ILP32E-FPELIM-NEXT: lui a1, 24414 +; ILP32E-FPELIM-NEXT: addi a1, a1, 272 +; ILP32E-FPELIM-NEXT: add a1, sp, a1 +; ILP32E-FPELIM-NEXT: sw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lui a1, 24414 +; ILP32E-FPELIM-NEXT: addi a1, a1, 288 +; ILP32E-FPELIM-NEXT: add sp, sp, a1 +; ILP32E-FPELIM-NEXT: ret +; +; ILP32E-WITHFP-LABEL: va_large_stack: +; ILP32E-WITHFP: # %bb.0: +; ILP32E-WITHFP-NEXT: addi sp, sp, -2044 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa_offset 2044 +; ILP32E-WITHFP-NEXT: sw ra, 2016(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: sw s0, 2012(sp) # 4-byte Folded Spill +; ILP32E-WITHFP-NEXT: .cfi_offset ra, -28 +; ILP32E-WITHFP-NEXT: .cfi_offset s0, -32 +; ILP32E-WITHFP-NEXT: addi s0, sp, 2020 +; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 +; ILP32E-WITHFP-NEXT: lui a0, 24414 +; ILP32E-WITHFP-NEXT: addi a0, a0, -1740 +; ILP32E-WITHFP-NEXT: sub sp, sp, a0 +; ILP32E-WITHFP-NEXT: mv a0, a1 +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: addi a1, s0, 8 +; ILP32E-WITHFP-NEXT: lui a2, 24414 +; ILP32E-WITHFP-NEXT: sub a2, s0, a2 +; ILP32E-WITHFP-NEXT: sw a1, -272(a2) +; ILP32E-WITHFP-NEXT: lui a1, 24414 +; ILP32E-WITHFP-NEXT: addi a1, a1, -1740 +; ILP32E-WITHFP-NEXT: add sp, sp, a1 +; ILP32E-WITHFP-NEXT: lw ra, 2016(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: lw s0, 2012(sp) # 4-byte Folded Reload +; ILP32E-WITHFP-NEXT: addi sp, sp, 2044 +; ILP32E-WITHFP-NEXT: ret +; ; LP64-LP64F-LP64D-FPELIM-LABEL: va_large_stack: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 @@ -1848,6 +3067,70 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 1952(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 2032 ; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va_large_stack: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: addiw a0, a0, 320 +; LP64E-FPELIM-NEXT: sub sp, sp, a0 +; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 100000064 +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a1, 280(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a5, 312(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a4, 304(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a3, 296(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a2, 288(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: addiw a0, a0, 284 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: lw a0, 280(a0) +; LP64E-FPELIM-NEXT: lui a1, 24414 +; LP64E-FPELIM-NEXT: addiw a1, a1, 320 +; LP64E-FPELIM-NEXT: add sp, sp, a1 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va_large_stack: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -2040 +; LP64E-WITHFP-NEXT: .cfi_def_cfa_offset 2040 +; LP64E-WITHFP-NEXT: sd ra, 1984(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 1976(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: .cfi_offset ra, -56 +; LP64E-WITHFP-NEXT: .cfi_offset s0, -64 +; LP64E-WITHFP-NEXT: addi s0, sp, 1992 +; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48 +; LP64E-WITHFP-NEXT: lui a0, 24414 +; LP64E-WITHFP-NEXT: addiw a0, a0, -1704 +; LP64E-WITHFP-NEXT: sub sp, sp, a0 +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 12 +; LP64E-WITHFP-NEXT: lui a1, 24414 +; LP64E-WITHFP-NEXT: sub a1, s0, a1 +; LP64E-WITHFP-NEXT: sd a0, -288(a1) +; LP64E-WITHFP-NEXT: lw a0, 8(s0) +; LP64E-WITHFP-NEXT: lui a1, 24414 +; LP64E-WITHFP-NEXT: addiw a1, a1, -1704 +; LP64E-WITHFP-NEXT: add sp, sp, a1 +; LP64E-WITHFP-NEXT: ld ra, 1984(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 1976(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 2040 +; LP64E-WITHFP-NEXT: ret %large = alloca [ 100000000 x i8 ] %va = alloca ptr call void @llvm.va_start(ptr %va) diff --git a/llvm/test/MC/RISCV/option-invalid.s b/llvm/test/MC/RISCV/option-invalid.s --- a/llvm/test/MC/RISCV/option-invalid.s +++ b/llvm/test/MC/RISCV/option-invalid.s @@ -56,9 +56,6 @@ # CHECK: :[[#@LINE+1]]:12: warning: unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'arch', 'relax' or 'norelax' .option bar -# CHECK: :[[#@LINE+1]]:16: error: unknown extension feature -.option arch, -i - # CHECK: :[[#@LINE+1]]:12: error: .option pop with no .option push .option pop diff --git a/llvm/test/MC/RISCV/target-abi-invalid.s b/llvm/test/MC/RISCV/target-abi-invalid.s --- a/llvm/test/MC/RISCV/target-abi-invalid.s +++ b/llvm/test/MC/RISCV/target-abi-invalid.s @@ -30,7 +30,7 @@ # RUN: | FileCheck -check-prefix=RV32E-LP64 %s # RUN: llvm-mc -triple=riscv32 -mattr=+e,+f -target-abi lp64f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EF-LP64F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-LP64D %s # RUN: llvm-mc -triple=riscv32 -mattr=+e -target-abi lp64e %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32E-LP64E %s @@ -42,6 +42,7 @@ # RV32EF-LP64F: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32EFD-LP64D: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32E-LP64E: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) +# RV32EFD-LP64D: LLVM ERROR: ILP32E must not be used with the D ISA extension # RUN: llvm-mc -triple=riscv32 -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32I-ILP32F %s @@ -69,15 +70,17 @@ # RUN: | FileCheck -check-prefix=RV32EF-ILP32F %s # RUN: llvm-mc -triple=riscv32 -mattr=+e,+f -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EF-ILP32F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32f < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-ILP32F %s -# RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32d < %s 2>&1 \ +# RUN: not --crash llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi ilp32d < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-ILP32D %s # RV32E-ILP32: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) # RV32EF-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) # RV32EFD-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) +# RV32EFD-ILP32F: LLVM ERROR: ILP32E must not be used with the D ISA extension # RV32EFD-ILP32D: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) +# RV32EFD-ILP32D: LLVM ERROR: ILP32E must not be used with the D ISA extension # RUN: llvm-mc -triple=riscv64 -mattr=+e -target-abi lp64 < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV64EF-LP64F %s