diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1515,6 +1515,7 @@ ReplaceNode(Node, Extract.getNode()); return; } + case ISD::SPLAT_VECTOR: case RISCVISD::VMV_V_X_VL: case RISCVISD::VFMV_V_F_VL: { // Try to match splat of a scalar load to a strided load with stride of x0. @@ -1531,7 +1532,10 @@ break; SDValue VL; - selectVLOp(Node->getOperand(1), VL); + if (Node->getOpcode() == ISD::SPLAT_VECTOR) + VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); + else + selectVLOp(Node->getOperand(1), VL); unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) @@ -38,12 +38,12 @@ define half @vreduce_fadd_v2f16(<2 x half>* %x, half %s) { ; CHECK-LABEL: vreduce_fadd_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -75,12 +75,12 @@ define half @vreduce_fadd_v4f16(<4 x half>* %x, half %s) { ; CHECK-LABEL: vreduce_fadd_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -112,12 +112,12 @@ define half @vreduce_fadd_v8f16(<8 x half>* %x, half %s) { ; CHECK-LABEL: vreduce_fadd_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI6_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI6_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -149,12 +149,12 @@ define half @vreduce_fadd_v16f16(<16 x half>* %x, half %s) { ; CHECK-LABEL: vreduce_fadd_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI8_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -184,35 +184,20 @@ declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>) define half @vreduce_fadd_v32f16(<32 x half>* %x, half %s) { -; RV32-LABEL: vreduce_fadd_v32f16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: lui a2, %hi(.LCPI10_0) -; RV32-NEXT: flh ft0, %lo(.LCPI10_0)(a2) -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV32-NEXT: vfmv.v.f v12, ft0 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV32-NEXT: vfredusum.vs v8, v8, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fadd.h fa0, fa0, ft0 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fadd_v32f16: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI10_0) -; RV64-NEXT: flh ft0, %lo(.LCPI10_0)(a1) -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV64-NEXT: vfmv.v.f v12, ft0 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV64-NEXT: vfredusum.vs v8, v8, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fadd.h fa0, fa0, ft0 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fadd_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v12, (a0), zero +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v12 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret %v = load <32 x half>, <32 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v) ret half %red @@ -238,35 +223,20 @@ declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>) define half @vreduce_fadd_v64f16(<64 x half>* %x, half %s) { -; RV32-LABEL: vreduce_fadd_v64f16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 64 -; RV32-NEXT: lui a2, %hi(.LCPI12_0) -; RV32-NEXT: flh ft0, %lo(.LCPI12_0)(a2) -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV32-NEXT: vfmv.v.f v16, ft0 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; RV32-NEXT: vfredusum.vs v8, v8, v16 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fadd.h fa0, fa0, ft0 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fadd_v64f16: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI12_0) -; RV64-NEXT: flh ft0, %lo(.LCPI12_0)(a1) -; RV64-NEXT: li a1, 64 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; RV64-NEXT: vfmv.v.f v16, ft0 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; RV64-NEXT: vfredusum.vs v8, v8, v16 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fadd.h fa0, fa0, ft0 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fadd_v64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v16, (a0), zero +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v16 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret %v = load <64 x half>, <64 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v) ret half %red @@ -299,11 +269,11 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_0) +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v16, (a0), zero ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -374,12 +344,12 @@ define float @vreduce_fadd_v2f32(<2 x float>* %x, float %s) { ; CHECK-LABEL: vreduce_fadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI18_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI18_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -411,12 +381,12 @@ define float @vreduce_fadd_v4f32(<4 x float>* %x, float %s) { ; CHECK-LABEL: vreduce_fadd_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI20_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI20_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -448,12 +418,12 @@ define float @vreduce_fadd_v8f32(<8 x float>* %x, float %s) { ; CHECK-LABEL: vreduce_fadd_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI22_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI22_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI22_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI22_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -485,12 +455,12 @@ define float @vreduce_fadd_v16f32(<16 x float>* %x, float %s) { ; CHECK-LABEL: vreduce_fadd_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI24_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI24_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v12, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI24_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI24_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v12, (a0), zero ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -520,35 +490,20 @@ declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>) define float @vreduce_fadd_v32f32(<32 x float>* %x, float %s) { -; RV32-LABEL: vreduce_fadd_v32f32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: lui a2, %hi(.LCPI26_0) -; RV32-NEXT: flw ft0, %lo(.LCPI26_0)(a2) -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV32-NEXT: vfmv.v.f v16, ft0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-NEXT: vfredusum.vs v8, v8, v16 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fadd.s fa0, fa0, ft0 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fadd_v32f32: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI26_0) -; RV64-NEXT: flw ft0, %lo(.LCPI26_0)(a1) -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; RV64-NEXT: vfmv.v.f v16, ft0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV64-NEXT: vfredusum.vs v8, v8, v16 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fadd.s fa0, fa0, ft0 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fadd_v32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI26_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI26_0) +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v16, (a0), zero +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v16 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret %v = load <32 x float>, <32 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v) ret float %red @@ -581,11 +536,11 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI28_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI28_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI28_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0) +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v16, (a0), zero ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -656,12 +611,12 @@ define double @vreduce_fadd_v2f64(<2 x double>* %x, double %s) { ; CHECK-LABEL: vreduce_fadd_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI32_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI32_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -693,12 +648,12 @@ define double @vreduce_fadd_v4f64(<4 x double>* %x, double %s) { ; CHECK-LABEL: vreduce_fadd_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -730,12 +685,12 @@ define double @vreduce_fadd_v8f64(<8 x double>* %x, double %s) { ; CHECK-LABEL: vreduce_fadd_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI36_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v12, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI36_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v12, (a0), zero ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -767,12 +722,12 @@ define double @vreduce_fadd_v16f64(<16 x double>* %x, double %s) { ; CHECK-LABEL: vreduce_fadd_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI38_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI38_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -808,11 +763,11 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI40_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI40_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI40_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -851,12 +806,12 @@ define half @vreduce_fmin_v2f16(<2 x half>* %x) { ; CHECK-LABEL: vreduce_fmin_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI42_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI42_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI42_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -871,12 +826,12 @@ define half @vreduce_fmin_v4f16(<4 x half>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI43_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -889,12 +844,12 @@ define half @vreduce_fmin_v4f16_nonans(<4 x half>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f16_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI44_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI44_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI44_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -907,12 +862,12 @@ define half @vreduce_fmin_v4f16_nonans_noinfs(<4 x half>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI45_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI45_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI45_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI45_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -932,11 +887,11 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI46_0)(a0) ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI46_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v16, (a0), zero ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -951,12 +906,12 @@ define float @vreduce_fmin_v2f32(<2 x float>* %x) { ; CHECK-LABEL: vreduce_fmin_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI47_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI47_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI47_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI47_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -971,12 +926,12 @@ define float @vreduce_fmin_v4f32(<4 x float>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI48_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI48_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI48_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI48_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -989,12 +944,12 @@ define float @vreduce_fmin_v4f32_nonans(<4 x float>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f32_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI49_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI49_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI49_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1007,12 +962,12 @@ define float @vreduce_fmin_v4f32_nonans_noinfs(<4 x float>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI50_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI50_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI50_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1029,20 +984,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a2, a0, 384 -; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: addi a0, a0, 256 -; CHECK-NEXT: vle32.v v0, (a0) -; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vle32.v v0, (a2) +; CHECK-NEXT: vfmin.vv v16, v24, v16 +; CHECK-NEXT: vfmin.vv v8, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI51_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI51_0)(a0) -; CHECK-NEXT: vfmin.vv v16, v24, v0 -; CHECK-NEXT: vfmin.vv v8, v16, v8 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0) +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v16, (a0), zero ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1057,12 +1012,12 @@ define double @vreduce_fmin_v2f64(<2 x double>* %x) { ; CHECK-LABEL: vreduce_fmin_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI52_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI52_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI52_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1077,12 +1032,12 @@ define double @vreduce_fmin_v4f64(<4 x double>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI53_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI53_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI53_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1095,12 +1050,12 @@ define double @vreduce_fmin_v4f64_nonans(<4 x double>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f64_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI54_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI54_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI54_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI54_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1113,12 +1068,12 @@ define double @vreduce_fmin_v4f64_nonans_noinfs(<4 x double>* %x) { ; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI55_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI55_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI55_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI55_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1137,11 +1092,11 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI56_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI56_0)(a0) ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI56_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI56_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1156,12 +1111,12 @@ define half @vreduce_fmax_v2f16(<2 x half>* %x) { ; CHECK-LABEL: vreduce_fmax_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI57_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI57_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI57_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI57_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1176,12 +1131,12 @@ define half @vreduce_fmax_v4f16(<4 x half>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI58_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI58_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI58_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1194,12 +1149,12 @@ define half @vreduce_fmax_v4f16_nonans(<4 x half>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f16_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI59_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI59_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI59_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI59_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1212,12 +1167,12 @@ define half @vreduce_fmax_v4f16_nonans_noinfs(<4 x half>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f16_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI60_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI60_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI60_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI60_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1237,11 +1192,11 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI61_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI61_0)(a0) ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI61_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0) +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v16, (a0), zero ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1256,12 +1211,12 @@ define float @vreduce_fmax_v2f32(<2 x float>* %x) { ; CHECK-LABEL: vreduce_fmax_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI62_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI62_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI62_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI62_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1276,12 +1231,12 @@ define float @vreduce_fmax_v4f32(<4 x float>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI63_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI63_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI63_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI63_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1294,12 +1249,12 @@ define float @vreduce_fmax_v4f32_nonans(<4 x float>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f32_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI64_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI64_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI64_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI64_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1312,12 +1267,12 @@ define float @vreduce_fmax_v4f32_nonans_noinfs(<4 x float>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI65_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI65_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI65_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1334,20 +1289,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a2, a0, 384 -; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: addi a0, a0, 256 -; CHECK-NEXT: vle32.v v0, (a0) -; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vle32.v v0, (a2) +; CHECK-NEXT: vfmax.vv v16, v24, v16 +; CHECK-NEXT: vfmax.vv v8, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI66_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI66_0)(a0) -; CHECK-NEXT: vfmax.vv v16, v24, v0 -; CHECK-NEXT: vfmax.vv v8, v16, v8 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI66_0) +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v16, (a0), zero ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1362,12 +1317,12 @@ define double @vreduce_fmax_v2f64(<2 x double>* %x) { ; CHECK-LABEL: vreduce_fmax_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI67_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI67_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI67_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI67_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1382,12 +1337,12 @@ define double @vreduce_fmax_v4f64(<4 x double>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI68_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI68_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI68_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI68_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1400,12 +1355,12 @@ define double @vreduce_fmax_v4f64_nonans(<4 x double>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f64_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI69_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI69_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI69_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI69_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1418,12 +1373,12 @@ define double @vreduce_fmax_v4f64_nonans_noinfs(<4 x double>* %x) { ; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI70_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI70_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI70_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI70_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1442,11 +1397,11 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI71_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI71_0)(a0) ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI71_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI71_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -10,9 +10,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -41,9 +41,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -72,9 +72,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 @@ -101,9 +101,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -132,9 +132,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fadd.s fa0, fa0, ft0 @@ -161,9 +161,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v10, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -192,9 +192,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fadd.d fa0, fa0, ft0 @@ -221,9 +221,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -252,9 +252,9 @@ ; CHECK-LABEL: vreduce_fadd_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI16_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v12, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI16_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v12, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s ft0, v8 @@ -283,9 +283,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI18_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -298,9 +298,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI19_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -313,9 +313,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI20_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -330,9 +330,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI21_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI21_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -347,9 +347,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI22_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -362,12 +362,12 @@ define half @vreduce_fmin_nxv64f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI23_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v16, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -382,9 +382,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI24_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -397,9 +397,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI25_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI25_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -412,9 +412,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI26_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI26_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI26_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -429,9 +429,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI27_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI27_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI27_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -445,9 +445,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI28_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI28_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v10, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -461,12 +461,12 @@ define float @vreduce_fmin_nxv32f32( %v) { ; CHECK-LABEL: vreduce_fmin_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI29_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI29_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI29_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI29_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v16, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -481,9 +481,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI30_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -495,9 +495,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI31_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -509,9 +509,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI32_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -525,9 +525,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI33_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -542,9 +542,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI34_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v12, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v12, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -558,12 +558,12 @@ define double @vreduce_fmin_nxv16f64( %v) { ; CHECK-LABEL: vreduce_fmin_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI35_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -578,9 +578,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -593,9 +593,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI37_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI37_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -608,9 +608,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -625,9 +625,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI39_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI39_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -642,9 +642,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI40_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v9, (a0), zero ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -657,12 +657,12 @@ define half @vreduce_fmax_nxv64f16( %v) { ; CHECK-LABEL: vreduce_fmax_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI41_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI41_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vlse16.v v16, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -677,9 +677,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI42_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -692,9 +692,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI43_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI43_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI43_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -707,9 +707,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI44_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI44_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -724,9 +724,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI45_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI45_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI45_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -740,9 +740,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI46_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v10, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -756,12 +756,12 @@ define float @vreduce_fmax_nxv32f32( %v) { ; CHECK-LABEL: vreduce_fmax_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI47_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI47_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI47_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI47_0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v16, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -776,9 +776,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI48_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI48_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI48_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -790,9 +790,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI49_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI49_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -804,9 +804,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI50_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI50_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -820,9 +820,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI51_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI51_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -837,9 +837,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI52_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI52_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v12, ft0 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v12, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -853,12 +853,12 @@ define double @vreduce_fmax_nxv16f64( %v) { ; CHECK-LABEL: vreduce_fmax_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI53_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI53_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: lui a0, %hi(.LCPI53_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0) +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8