diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -577,7 +577,8 @@ // Mask VTs are custom-expanded into a series of standard nodes setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, - ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, + ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, + ISD::SCALAR_TO_VECTOR}, VT, Custom); setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, @@ -703,9 +704,9 @@ ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); - setOperationAction( - {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, - VT, Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, + VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); @@ -798,9 +799,9 @@ setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction( - {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, - VT, Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, + VT, Custom); setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); @@ -877,6 +878,8 @@ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); @@ -4138,6 +4141,25 @@ return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::SCALAR_TO_VECTOR: { + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + SDValue Scalar = Op.getOperand(0); + if (VT.getVectorElementType() == MVT::i1) { + MVT WideVT = VT.changeVectorElementType(MVT::i8); + SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar); + return DAG.getNode(ISD::TRUNCATE, DL, VT, V); + } + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) + ContainerVT = getContainerForFixedLengthVector(VT); + SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; + SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Scalar, VL); + if (VT.isFixedLengthVector()) + V = convertFromScalableVector(VT, V, DAG, Subtarget); + return V; + } case ISD::VSCALE: { MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -7,8 +7,6 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-LABEL: load_large_vector: ; ZVE32X: # %bb.0: -; ZVE32X-NEXT: addi sp, sp, -16 -; ZVE32X-NEXT: .cfi_def_cfa_offset 16 ; ZVE32X-NEXT: ld a1, 80(a0) ; ZVE32X-NEXT: ld a2, 72(a0) ; ZVE32X-NEXT: ld a3, 56(a0) @@ -19,49 +17,47 @@ ; ZVE32X-NEXT: ld a0, 0(a0) ; ZVE32X-NEXT: xor a4, a5, a4 ; ZVE32X-NEXT: snez a4, a4 -; ZVE32X-NEXT: sb a4, 12(sp) +; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; ZVE32X-NEXT: vmv.s.x v8, a4 +; ZVE32X-NEXT: vand.vi v8, v8, 1 +; ZVE32X-NEXT: vmsne.vi v0, v8, 0 +; ZVE32X-NEXT: vmv.v.i v8, 0 +; ZVE32X-NEXT: vmerge.vim v9, v8, 1, v0 ; ZVE32X-NEXT: xor a0, a0, a7 ; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: sb a0, 15(sp) -; ZVE32X-NEXT: xor a0, a6, a3 -; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: sb a0, 13(sp) -; ZVE32X-NEXT: xor a1, a2, a1 -; ZVE32X-NEXT: snez a0, a1 -; ZVE32X-NEXT: sb a0, 14(sp) -; ZVE32X-NEXT: addi a0, sp, 12 -; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vlm.v v0, (a0) -; ZVE32X-NEXT: addi a0, sp, 15 -; ZVE32X-NEXT: vlm.v v8, (a0) -; ZVE32X-NEXT: vmv.v.i v9, 0 -; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0 +; ZVE32X-NEXT: vmv.s.x v10, a0 +; ZVE32X-NEXT: vand.vi v10, v10, 1 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmv.v.i v11, 0 -; ZVE32X-NEXT: vmv1r.v v0, v8 -; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0 +; ZVE32X-NEXT: vmv.v.i v10, 0 +; ZVE32X-NEXT: vmerge.vim v11, v10, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v8, v10, 1 +; ZVE32X-NEXT: vslideup.vi v11, v9, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0 -; ZVE32X-NEXT: addi a0, sp, 13 +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 +; ZVE32X-NEXT: xor a0, a6, a3 +; ZVE32X-NEXT: snez a0, a0 +; ZVE32X-NEXT: vmv.s.x v11, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vlm.v v0, (a0) -; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0 +; ZVE32X-NEXT: vand.vi v11, v11, 1 +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v8, v10, 2 +; ZVE32X-NEXT: vslideup.vi v9, v11, 2 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0 -; ZVE32X-NEXT: addi a0, sp, 14 +; ZVE32X-NEXT: vmsne.vi v0, v9, 0 +; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 +; ZVE32X-NEXT: xor a1, a2, a1 +; ZVE32X-NEXT: snez a0, a1 +; ZVE32X-NEXT: vmv.s.x v10, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vlm.v v0, (a0) -; ZVE32X-NEXT: vmerge.vim v9, v9, 1, v0 +; ZVE32X-NEXT: vand.vi v10, v10, 1 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 +; ZVE32X-NEXT: vmerge.vim v8, v8, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vslideup.vi v8, v9, 3 -; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: addi sp, sp, 16 +; ZVE32X-NEXT: vslideup.vi v9, v8, 3 +; ZVE32X-NEXT: vmsne.vi v0, v9, 0 ; ZVE32X-NEXT: ret ; ; ZVE64X-LABEL: load_large_vector: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -11,14 +11,8 @@ ; ; RV64-LABEL: load_v5i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: ld a0, 0(a0) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %x = load <5 x i8>, ptr %p ret <5 x i8> %x @@ -39,10 +33,8 @@ ; RV32-NEXT: slli a4, a4, 24 ; RV32-NEXT: or a3, a4, a3 ; RV32-NEXT: or a1, a3, a1 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vi v9, v8, 1 ; RV32-NEXT: vslidedown.vi v10, v8, 2 @@ -76,10 +68,8 @@ ; RV64-NEXT: slli a4, a4, 24 ; RV64-NEXT: or a3, a4, a3 ; RV64-NEXT: or a1, a3, a1 -; RV64-NEXT: sw a1, 0(sp) -; RV64-NEXT: mv a1, sp ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vi v9, v8, 1 ; RV64-NEXT: vslidedown.vi v10, v8, 2 @@ -111,14 +101,8 @@ ; ; RV64-LABEL: load_v6i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: ld a0, 0(a0) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %x = load <6 x i8>, ptr %p ret <6 x i8> %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -220,35 +220,20 @@ ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 ; RV32ZVE32F-NEXT: lw a1, 8(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: sb a1, 15(sp) -; RV32ZVE32F-NEXT: sb a0, 14(sp) -; RV32ZVE32F-NEXT: addi a0, sp, 15 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV32ZVE32F-NEXT: vle8.v v9, (a0) -; RV32ZVE32F-NEXT: addi a0, sp, 14 -; RV32ZVE32F-NEXT: vle8.v v10, (a0) ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVE32F-NEXT: vmv.s.x v9, a1 +; RV32ZVE32F-NEXT: vmv.s.x v10, a0 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t -; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi sp, sp, -16 -; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV64ZVE32F-NEXT: sb a1, 15(sp) -; RV64ZVE32F-NEXT: sb a0, 14(sp) -; RV64ZVE32F-NEXT: addi a0, sp, 15 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vle8.v v9, (a0) -; RV64ZVE32F-NEXT: addi a0, sp, 14 -; RV64ZVE32F-NEXT: vle8.v v8, (a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a1 +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -257,7 +242,6 @@ ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB4_4 ; RV64ZVE32F-NEXT: .LBB4_2: # %else2 -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -268,7 +252,6 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vse8.v v8, (a3) -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i8> call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m) @@ -782,35 +765,20 @@ ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 ; RV32ZVE32F-NEXT: lw a1, 8(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: sh a1, 14(sp) -; RV32ZVE32F-NEXT: sh a0, 12(sp) -; RV32ZVE32F-NEXT: addi a0, sp, 14 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV32ZVE32F-NEXT: vle16.v v9, (a0) -; RV32ZVE32F-NEXT: addi a0, sp, 12 -; RV32ZVE32F-NEXT: vle16.v v10, (a0) ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV32ZVE32F-NEXT: vmv.s.x v9, a1 +; RV32ZVE32F-NEXT: vmv.s.x v10, a0 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t -; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi sp, sp, -16 -; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV64ZVE32F-NEXT: sh a1, 14(sp) -; RV64ZVE32F-NEXT: sh a0, 12(sp) -; RV64ZVE32F-NEXT: addi a0, sp, 14 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vle16.v v9, (a0) -; RV64ZVE32F-NEXT: addi a0, sp, 12 -; RV64ZVE32F-NEXT: vle16.v v8, (a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a1 +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 @@ -820,7 +788,6 @@ ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB13_4 ; RV64ZVE32F-NEXT: .LBB13_2: # %else2 -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -831,7 +798,6 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vse16.v v8, (a3) -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i16> call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -445,63 +445,72 @@ ; ; CHECK-V-LABEL: stest_f16i32: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -64 -; CHECK-V-NEXT: .cfi_def_cfa_offset 64 -; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 ; CHECK-V-NEXT: addiw a1, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v8, a1 +; CHECK-V-NEXT: vmin.vx v8, v10, a1 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 64 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -595,62 +604,71 @@ ; ; CHECK-V-LABEL: utesth_f16i32: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -64 -; CHECK-V-NEXT: .cfi_def_cfa_offset 64 -; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vminu.vx v10, v8, a0 +; CHECK-V-NEXT: vminu.vx v10, v10, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 64 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -754,63 +772,72 @@ ; ; CHECK-V-LABEL: ustest_f16i32: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -64 -; CHECK-V-NEXT: .cfi_def_cfa_offset 64 -; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vmin.vx v8, v8, a0 +; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 64 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -1360,16 +1387,16 @@ ; ; CHECK-V-LABEL: stest_f16i16: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -96 -; CHECK-V-NEXT: .cfi_def_cfa_offset 96 -; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -80 +; CHECK-V-NEXT: .cfi_def_cfa_offset 80 +; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -1378,6 +1405,10 @@ ; CHECK-V-NEXT: .cfi_offset s4, -48 ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -1388,88 +1419,97 @@ ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 28(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 20(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 12(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 8(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 4(sp) -; CHECK-V-NEXT: addi a0, sp, 28 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle32.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 20 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 4 -; CHECK-V-NEXT: addi a0, sp, 12 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 5 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 6 -; CHECK-V-NEXT: addi a0, sp, 4 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 7 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 8 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v8, a0 +; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 96 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -1635,16 +1675,16 @@ ; ; CHECK-V-LABEL: utesth_f16i16: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -96 -; CHECK-V-NEXT: .cfi_def_cfa_offset 96 -; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -80 +; CHECK-V-NEXT: .cfi_def_cfa_offset 80 +; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -1653,6 +1693,10 @@ ; CHECK-V-NEXT: .cfi_offset s4, -48 ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -1663,86 +1707,95 @@ ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 28(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 20(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 12(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 8(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 4(sp) -; CHECK-V-NEXT: addi a0, sp, 28 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle32.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 20 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 4 -; CHECK-V-NEXT: addi a0, sp, 12 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 5 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 6 -; CHECK-V-NEXT: addi a0, sp, 4 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 7 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vminu.vx v10, v8, a0 +; CHECK-V-NEXT: vminu.vx v10, v10, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 96 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret entry: %conv = fptoui <8 x half> %x to <8 x i32> @@ -1930,16 +1983,16 @@ ; ; CHECK-V-LABEL: ustest_f16i16: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -96 -; CHECK-V-NEXT: .cfi_def_cfa_offset 96 -; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -80 +; CHECK-V-NEXT: .cfi_def_cfa_offset 80 +; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -1948,6 +2001,10 @@ ; CHECK-V-NEXT: .cfi_offset s4, -48 ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -1958,87 +2015,96 @@ ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 28(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 20(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 12(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 8(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 4(sp) -; CHECK-V-NEXT: addi a0, sp, 28 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle32.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 20 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 4 -; CHECK-V-NEXT: addi a0, sp, 12 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 5 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 6 -; CHECK-V-NEXT: addi a0, sp, 4 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 7 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v8, a0 +; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 96 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -2134,19 +2200,19 @@ ; ; CHECK-V-LABEL: stest_f64i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -2155,7 +2221,7 @@ ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti@plt @@ -2211,22 +2277,17 @@ ; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB18_16: # %entry -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd s0, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i128> @@ -2273,19 +2334,19 @@ ; ; CHECK-V-LABEL: utest_f64i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -2294,7 +2355,7 @@ ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt @@ -2304,22 +2365,17 @@ ; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd a2, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptoui <2 x double> %x to <2 x i128> @@ -2390,19 +2446,19 @@ ; ; CHECK-V-LABEL: ustest_f64i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -2411,7 +2467,7 @@ ; CHECK-V-NEXT: mv s1, a0 ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti@plt @@ -2448,22 +2504,17 @@ ; CHECK-V-NEXT: and a2, a2, a3 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd a2, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i128> @@ -2557,19 +2608,19 @@ ; ; CHECK-V-LABEL: stest_f32i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -2578,7 +2629,7 @@ ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti@plt @@ -2634,22 +2685,17 @@ ; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB21_16: # %entry -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd s0, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -2696,19 +2742,19 @@ ; ; CHECK-V-LABEL: utest_f32i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -2717,7 +2763,7 @@ ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt @@ -2727,22 +2773,17 @@ ; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd a2, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptoui <2 x float> %x to <2 x i128> @@ -2813,19 +2854,19 @@ ; ; CHECK-V-LABEL: ustest_f32i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -2834,7 +2875,7 @@ ; CHECK-V-NEXT: mv s1, a0 ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti@plt @@ -2871,22 +2912,17 @@ ; CHECK-V-NEXT: and a2, a2, a3 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd a2, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -2982,12 +3018,12 @@ ; ; CHECK-V-LABEL: stest_f16i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -48 -; CHECK-V-NEXT: .cfi_def_cfa_offset 48 -; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -32 +; CHECK-V-NEXT: .cfi_def_cfa_offset 32 +; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -3051,20 +3087,15 @@ ; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB24_16: # %entry -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: sd s0, 0(sp) -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v9, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 32 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -3113,12 +3144,12 @@ ; ; CHECK-V-LABEL: utesth_f16i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -48 -; CHECK-V-NEXT: .cfi_def_cfa_offset 48 -; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -32 +; CHECK-V-NEXT: .cfi_def_cfa_offset 32 +; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -3137,20 +3168,15 @@ ; CHECK-V-NEXT: and a2, a2, s1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: sd a2, 0(sp) -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v9, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 32 ; CHECK-V-NEXT: ret entry: %conv = fptoui <2 x half> %x to <2 x i128> @@ -3223,12 +3249,12 @@ ; ; CHECK-V-LABEL: ustest_f16i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -48 -; CHECK-V-NEXT: .cfi_def_cfa_offset 48 -; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -32 +; CHECK-V-NEXT: .cfi_def_cfa_offset 32 +; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -3274,20 +3300,15 @@ ; CHECK-V-NEXT: and a2, a2, a3 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: sd a2, 0(sp) -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v9, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 32 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -3732,63 +3753,72 @@ ; ; CHECK-V-LABEL: stest_f16i32_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -64 -; CHECK-V-NEXT: .cfi_def_cfa_offset 64 -; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 ; CHECK-V-NEXT: addiw a1, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v8, a1 +; CHECK-V-NEXT: vmin.vx v8, v10, a1 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 64 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -3880,62 +3910,71 @@ ; ; CHECK-V-LABEL: utesth_f16i32_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -64 -; CHECK-V-NEXT: .cfi_def_cfa_offset 64 -; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vminu.vx v10, v8, a0 +; CHECK-V-NEXT: vminu.vx v10, v10, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 64 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -4038,63 +4077,72 @@ ; ; CHECK-V-LABEL: ustest_f16i32_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -64 -; CHECK-V-NEXT: .cfi_def_cfa_offset 64 -; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vmin.vx v8, v8, a0 +; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 64 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -4632,16 +4680,16 @@ ; ; CHECK-V-LABEL: stest_f16i16_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -96 -; CHECK-V-NEXT: .cfi_def_cfa_offset 96 -; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -80 +; CHECK-V-NEXT: .cfi_def_cfa_offset 80 +; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -4650,6 +4698,10 @@ ; CHECK-V-NEXT: .cfi_offset s4, -48 ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -4660,88 +4712,97 @@ ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 28(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 20(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 12(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 8(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 4(sp) -; CHECK-V-NEXT: addi a0, sp, 28 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle32.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 20 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 4 -; CHECK-V-NEXT: addi a0, sp, 12 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 5 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 6 -; CHECK-V-NEXT: addi a0, sp, 4 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 7 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 8 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v8, a0 +; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 96 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -4903,16 +4964,16 @@ ; ; CHECK-V-LABEL: utesth_f16i16_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -96 -; CHECK-V-NEXT: .cfi_def_cfa_offset 96 -; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -80 +; CHECK-V-NEXT: .cfi_def_cfa_offset 80 +; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -4921,6 +4982,10 @@ ; CHECK-V-NEXT: .cfi_offset s4, -48 ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -4931,86 +4996,95 @@ ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 28(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 0(sp) -; CHECK-V-NEXT: mv a0, s5 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 24(sp) -; CHECK-V-NEXT: mv a0, s4 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 20(sp) -; CHECK-V-NEXT: mv a0, s3 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 16(sp) -; CHECK-V-NEXT: mv a0, s2 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 12(sp) -; CHECK-V-NEXT: mv a0, s1 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 8(sp) -; CHECK-V-NEXT: mv a0, s0 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 4(sp) -; CHECK-V-NEXT: addi a0, sp, 28 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle32.v v8, (a0) +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 24 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: mv a0, s5 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 20 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: mv a0, s4 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 ; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: mv a0, s3 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 4 -; CHECK-V-NEXT: addi a0, sp, 12 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: mv a0, s2 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 5 -; CHECK-V-NEXT: addi a0, sp, 8 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: mv a0, s1 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 6 -; CHECK-V-NEXT: addi a0, sp, 4 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: mv a0, s0 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 7 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vminu.vx v10, v8, a0 +; CHECK-V-NEXT: vminu.vx v10, v10, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 96 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret entry: %conv = fptoui <8 x half> %x to <8 x i32> @@ -5197,16 +5271,16 @@ ; ; CHECK-V-LABEL: ustest_f16i16_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -96 -; CHECK-V-NEXT: .cfi_def_cfa_offset 96 -; CHECK-V-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s6, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -80 +; CHECK-V-NEXT: .cfi_def_cfa_offset 80 +; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s6, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -5215,6 +5289,10 @@ ; CHECK-V-NEXT: .cfi_offset s4, -48 ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -5225,87 +5303,96 @@ ; CHECK-V-NEXT: lhu a0, 8(a0) ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 28(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 0(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 24(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 20(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 16(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 12(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 8(sp) +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv a0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: sw a0, 4(sp) -; CHECK-V-NEXT: addi a0, sp, 28 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle32.v v8, (a0) -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 2 -; CHECK-V-NEXT: addi a0, sp, 20 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 3 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 4 -; CHECK-V-NEXT: addi a0, sp, 12 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 5 -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) -; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 6 -; CHECK-V-NEXT: addi a0, sp, 4 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vle32.v v10, (a0) ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-V-NEXT: vslideup.vi v8, v10, 7 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vmin.vx v8, v8, a0 +; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 -; CHECK-V-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 96 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 80 ; CHECK-V-NEXT: ret entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -5425,19 +5512,19 @@ ; ; CHECK-V-LABEL: stest_f64i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -5446,7 +5533,7 @@ ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti@plt @@ -5491,22 +5578,17 @@ ; CHECK-V-NEXT: .LBB45_13: # %entry ; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB45_14: # %entry -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd s0, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret ; CHECK-V-NEXT: .LBB45_15: # %entry ; CHECK-V-NEXT: mv a4, a3 @@ -5595,19 +5677,19 @@ ; ; CHECK-V-LABEL: utest_f64i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -5616,7 +5698,7 @@ ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt @@ -5634,22 +5716,17 @@ ; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd a2, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptoui <2 x double> %x to <2 x i128> @@ -5722,19 +5799,19 @@ ; ; CHECK-V-LABEL: ustest_f64i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -5743,7 +5820,7 @@ ; CHECK-V-NEXT: mv s1, a0 ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti@plt @@ -5782,22 +5859,17 @@ ; CHECK-V-NEXT: neg a2, a2 ; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB47_8: # %entry -; CHECK-V-NEXT: sd a1, 24(sp) -; CHECK-V-NEXT: sd a0, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a1 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i128> @@ -5915,19 +5987,19 @@ ; ; CHECK-V-LABEL: stest_f32i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -5936,7 +6008,7 @@ ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti@plt @@ -5981,22 +6053,17 @@ ; CHECK-V-NEXT: .LBB48_13: # %entry ; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB48_14: # %entry -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd s0, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret ; CHECK-V-NEXT: .LBB48_15: # %entry ; CHECK-V-NEXT: mv a4, a3 @@ -6085,19 +6152,19 @@ ; ; CHECK-V-LABEL: utest_f32i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -6106,7 +6173,7 @@ ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt @@ -6124,22 +6191,17 @@ ; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: sd a0, 24(sp) -; CHECK-V-NEXT: sd a2, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptoui <2 x float> %x to <2 x i128> @@ -6212,19 +6274,19 @@ ; ; CHECK-V-LABEL: ustest_f32i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -80 -; CHECK-V-NEXT: .cfi_def_cfa_offset 80 -; CHECK-V-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -64 +; CHECK-V-NEXT: .cfi_def_cfa_offset 64 +; CHECK-V-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 @@ -6233,7 +6295,7 @@ ; CHECK-V-NEXT: mv s1, a0 ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-V-NEXT: addi a0, sp, 48 +; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti@plt @@ -6272,22 +6334,17 @@ ; CHECK-V-NEXT: neg a2, a2 ; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB50_8: # %entry -; CHECK-V-NEXT: sd a1, 24(sp) -; CHECK-V-NEXT: sd a0, 32(sp) -; CHECK-V-NEXT: addi a0, sp, 24 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v8, (a0) -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a1 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 -; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 80 +; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 64 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -6407,12 +6464,12 @@ ; ; CHECK-V-LABEL: stest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -48 -; CHECK-V-NEXT: .cfi_def_cfa_offset 48 -; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -32 +; CHECK-V-NEXT: .cfi_def_cfa_offset 32 +; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -6466,20 +6523,15 @@ ; CHECK-V-NEXT: .LBB51_13: # %entry ; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB51_14: # %entry -; CHECK-V-NEXT: sd a0, 8(sp) -; CHECK-V-NEXT: sd s0, 0(sp) -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v9, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 32 ; CHECK-V-NEXT: ret ; CHECK-V-NEXT: .LBB51_15: # %entry ; CHECK-V-NEXT: mv a4, a3 @@ -6570,12 +6622,12 @@ ; ; CHECK-V-LABEL: utesth_f16i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -48 -; CHECK-V-NEXT: .cfi_def_cfa_offset 48 -; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -32 +; CHECK-V-NEXT: .cfi_def_cfa_offset 32 +; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -6603,20 +6655,15 @@ ; CHECK-V-NEXT: seqz a2, s2 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: sd a1, 8(sp) -; CHECK-V-NEXT: sd a0, 0(sp) -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v9, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a1 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 32 ; CHECK-V-NEXT: ret entry: %conv = fptoui <2 x half> %x to <2 x i128> @@ -6691,12 +6738,12 @@ ; ; CHECK-V-LABEL: ustest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -48 -; CHECK-V-NEXT: .cfi_def_cfa_offset 48 -; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -32 +; CHECK-V-NEXT: .cfi_def_cfa_offset 32 +; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 @@ -6745,20 +6792,15 @@ ; CHECK-V-NEXT: neg a2, a2 ; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: .LBB53_8: # %entry -; CHECK-V-NEXT: sd a1, 8(sp) -; CHECK-V-NEXT: sd a0, 0(sp) -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v9, (a0) -; CHECK-V-NEXT: mv a0, sp -; CHECK-V-NEXT: vle64.v v8, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v9, a1 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 32 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128>