diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -187,6 +187,7 @@ bool doPeepholeSExtW(SDNode *Node); bool doPeepholeMaskedRVV(MachineSDNode *Node); bool doPeepholeMergeVVMFold(); + bool doPeepholeNoRegPassThru(); bool performVMergeToVMv(SDNode *N); bool performCombineVMergeAndVOps(SDNode *N); }; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -149,6 +149,14 @@ MadeChange |= doPeepholeMergeVVMFold(); + // After we're done with everything else, convert IMPLICIT_DEF + // passthru operands to NoRegister. This is required to workaround + // an optimization deficiency in MachineCSE. This really should + // be merged back into each of the patterns (i.e. there's no good + // reason not to go directly to NoReg), but is being done this way + // to allow easy backporting. + MadeChange |= doPeepholeNoRegPassThru(); + if (MadeChange) CurDAG->RemoveDeadNodes(); } @@ -3593,6 +3601,44 @@ return MadeChange; } +/// If our passthru is an implicit_def, use noreg instead. This side +/// steps issues with MachineCSE not being able to CSE expressions with +/// IMPLICIT_DEF operands while preserving the semantic intent. See +/// pr64282 for context. Note that this transform is the last one +/// performed at ISEL DAG to DAG. +bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { + bool MadeChange = false; + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + const unsigned Opc = N->getMachineOpcode(); + if (!RISCVVPseudosTable::getPseudoInfo(Opc) || + !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) || + !isImplicitDef(N->getOperand(0))) + continue; + + SmallVector Ops; + Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0))); + for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { + SDValue Op = N->getOperand(I); + Ops.push_back(Op); + } + + MachineSDNode *Result = + CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + Result->setFlags(N->getFlags()); + CurDAG->setNodeMemRefs(Result, cast(N)->memoperands()); + ReplaceUses(N, Result); + MadeChange = true; + } + return MadeChange; +} + + // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -160,9 +160,13 @@ // lanes are undefined. return true; - // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands - // are solely IMPLICIT_DEFS), the pass through lanes are undefined. + // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose + // operands are solely IMPLICIT_DEFS, then the pass through lanes are + // undefined. const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + if (UseMO.getReg() == RISCV::NoRegister) + return true; + if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { if (UseMI->isImplicitDef()) return true; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -52,16 +52,16 @@ /// /// Currently, the policy is represented via the following instrinsic families: /// * _MASK - Can represent all three policy states for both tail and mask. If -/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise, -/// policy operand and tablegen flags drive the interpretation. (If policy -/// operand is not present - there are a couple, thought we're rapidly -/// removing them - a non-undefined policy defaults to "tail agnostic", and -/// "mask undisturbed". Since this is the only variant with a mask, all -/// other variants are "mask undefined". +/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined". +/// Otherwise, policy operand and tablegen flags drive the interpretation. +/// (If policy operand is not present - there are a couple, though we're +/// rapidly removing them - a non-undefined policy defaults to "tail +/// agnostic", and "mask undisturbed". Since this is the only variant with +/// a mask, all other variants are "mask undefined". /// * Unsuffixed w/ both passthrough and policy operand. Can represent all -/// three policy states. If passthrough is IMPLICIT_DEF, then represents -/// "undefined". Otherwise, policy operand and tablegen flags drive the -/// interpretation. +/// three policy states. If passthrough is IMPLICIT_DEF (or NoReg), then +/// represents "undefined". Otherwise, policy operand and tablegen flags +/// drive the interpretation. /// * Unsuffixed w/o passthrough or policy operand -- Does not have a /// passthrough operand, and thus represents the "undefined" state. Note /// that terminology in code frequently refers to these as "TA" which is @@ -70,8 +70,8 @@ /// * _TU w/o policy operand -- Has a passthrough operand, and always /// represents the tail undisturbed state. /// * _TU w/policy operand - Can represent all three policy states. If -/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise, -/// policy operand and tablegen flags drive the interpretation. +/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined". +/// Otherwise, policy operand and tablegen flags drive the interpretation. /// //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp --- a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp +++ b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp @@ -9,7 +9,8 @@ // This file implements a function pass that initializes undef vector value to // temporary pseudo instruction and remove it in expandpseudo pass to prevent // register allocation resulting in a constraint violated result for vector -// instruction. +// instruction. It also rewrites the NoReg tied operand back to an +// IMPLICIT_DEF. // // RISC-V vector instruction has register overlapping constraint for certain // instructions, and will cause illegal instruction trap if violated, we use @@ -30,6 +31,12 @@ // // See also: https://github.com/llvm/llvm-project/issues/50157 // +// Additionally, this pass rewrites tied operands of vector instructions +// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of +// operands to the above.) We use NoReg to side step a MachineCSE +// optimization quality problem but need to convert back before +// TwoAddressInstruction. See pr64282 for context. +// //===----------------------------------------------------------------------===// #include "RISCV.h" @@ -244,6 +251,26 @@ bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { MachineInstr &MI = *I; + + // If we used NoReg to represent the passthru, switch this back to being + // an IMPLICIT_DEF before TwoAddressInstructions. + unsigned UseOpIdx; + if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { + MachineOperand &UseMO = MI.getOperand(UseOpIdx); + if (UseMO.getReg() == RISCV::NoRegister) { + const TargetRegisterClass *RC = + TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF); + Register NewDest = MRI->createVirtualRegister(RC); + // We don't have a way to update dead lanes, so keep track of the + // new register so that we avoid querying it later. + NewRegs.insert(NewDest); + BuildMI(MBB, I, I->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), NewDest); + UseMO.setReg(NewDest); + Changed = true; + } + } + if (ST->enableSubRegLiveness() && isEarlyClobberMI(MI)) Changed |= handleSubReg(MF, MI, DLD); if (MI.isImplicitDef()) { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -273,6 +273,7 @@ void addPreRegAlloc() override; void addPostRegAlloc() override; void addOptimizedRegAlloc() override; + void addFastRegAlloc() override; }; } // namespace @@ -392,12 +393,17 @@ } void RISCVPassConfig::addOptimizedRegAlloc() { - if (getOptimizeRegAlloc()) - insertPass(&DetectDeadLanesID, &RISCVInitUndefID); + insertPass(&DetectDeadLanesID, &RISCVInitUndefID); TargetPassConfig::addOptimizedRegAlloc(); } +void RISCVPassConfig::addFastRegAlloc() { + addPass(createRISCVInitUndefPass()); + TargetPassConfig::addFastRegAlloc(); +} + + void RISCVPassConfig::addPostRegAlloc() { if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) addPass(createRISCVRedundantCopyEliminationPass()); diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -42,6 +42,7 @@ ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass +; CHECK-NEXT: RISC-V init undef pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator diff --git a/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll b/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll @@ -17,11 +17,11 @@ ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: mv s1, sp ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: addi a0, s1, 64 -; CHECK-NEXT: sd a0, 0(sp) -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: addi a0, s1, 64 ; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: sd a0, 0(sp) ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: li a2, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -103,15 +103,15 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: ret @@ -122,15 +122,15 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) @@ -157,15 +157,15 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv128: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_1) diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -1094,16 +1094,16 @@ ; RV32-NEXT: vsrl.vx v10, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: vand.vx v10, v10, a2 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vand.vv v10, v10, v11 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: mv a3, sp +; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v12, v12, v11 +; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsll.vx v10, v8, a0 ; RV32-NEXT: vand.vx v12, v8, a2 @@ -1142,35 +1142,35 @@ ; ; RV64-LABEL: bitreverse_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsll.vi v9, v9, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v10, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v11, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v11, v11, a4 -; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsrl.vx v9, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v10, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vsrl.vx v10, v8, a2 -; RV64-NEXT: vsrl.vx v11, v8, a4 -; RV64-NEXT: vand.vx v11, v11, a3 +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsrl.vi v11, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v11, v11, a4 ; RV64-NEXT: vor.vv v10, v11, v10 -; RV64-NEXT: vsrl.vi v11, v8, 24 -; RV64-NEXT: vand.vx v11, v11, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v11 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vand.vx v10, v8, a3 +; RV64-NEXT: vsll.vi v10, v10, 24 +; RV64-NEXT: vand.vx v11, v8, a4 +; RV64-NEXT: vsll.vi v11, v11, 8 +; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsll.vx v11, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 @@ -1237,16 +1237,16 @@ ; RV32-NEXT: vsrl.vx v12, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: vand.vx v12, v12, a2 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v12, v12, v14 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: mv a3, sp +; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v14 +; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsll.vx v12, v8, a0 ; RV32-NEXT: vand.vx v16, v8, a2 @@ -1285,35 +1285,35 @@ ; ; RV64-LABEL: bitreverse_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v12, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v14, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v14, v14, a4 -; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsrl.vx v10, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v12, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vsrl.vx v12, v8, a2 -; RV64-NEXT: vsrl.vx v14, v8, a4 -; RV64-NEXT: vand.vx v14, v14, a3 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsrl.vi v14, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v14, v14, a4 ; RV64-NEXT: vor.vv v12, v14, v12 -; RV64-NEXT: vsrl.vi v14, v8, 24 -; RV64-NEXT: vand.vx v14, v14, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v14 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vand.vx v14, v8, a4 +; RV64-NEXT: vsll.vi v14, v14, 8 +; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsll.vx v14, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v14, v8 ; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 @@ -1380,16 +1380,16 @@ ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: vand.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v20 -; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: mv a3, sp +; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vand.vv v24, v24, v20 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vsll.vx v16, v8, a0 ; RV32-NEXT: vand.vx v24, v8, a2 @@ -1428,35 +1428,35 @@ ; ; RV64-LABEL: bitreverse_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v16, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v20, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v20, v20, a4 -; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsrl.vx v12, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v16, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vsrl.vx v16, v8, a2 -; RV64-NEXT: vsrl.vx v20, v8, a4 -; RV64-NEXT: vand.vx v20, v20, a3 +; RV64-NEXT: vsrl.vi v16, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vsrl.vi v20, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v20, v20, a4 ; RV64-NEXT: vor.vv v16, v20, v16 -; RV64-NEXT: vsrl.vi v20, v8, 24 -; RV64-NEXT: vand.vx v20, v20, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v20 +; RV64-NEXT: vor.vv v12, v16, v12 +; RV64-NEXT: vand.vx v16, v8, a3 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vand.vx v20, v8, a4 +; RV64-NEXT: vsll.vi v20, v20, 8 +; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsll.vx v20, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v20, v8 ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 @@ -1524,37 +1524,37 @@ ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a0 ; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v0, v0, v24 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a2 -; RV32-NEXT: vsll.vx v0, v0, a1 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: vsll.vx v24, v24, a1 ; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: vor.vv v0, v16, v0 -; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: vor.vv v24, v16, v24 +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vand.vx v8, v8, a3 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vsll.vi v16, v16, 8 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: addi a0, sp, 40 ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vor.vv v8, v0, v8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v24 @@ -1585,35 +1585,35 @@ ; ; RV64-LABEL: bitreverse_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v24, v8, a1 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v24, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v0, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v0, v0, a4 -; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsrl.vx v16, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v24, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsrl.vx v24, v8, a2 -; RV64-NEXT: vsrl.vx v0, v8, a4 -; RV64-NEXT: vand.vx v0, v0, a3 +; RV64-NEXT: vsrl.vi v24, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vsrl.vi v0, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v0, v0, a4 ; RV64-NEXT: vor.vv v24, v0, v24 -; RV64-NEXT: vsrl.vi v0, v8, 24 -; RV64-NEXT: vand.vx v0, v0, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v0 +; RV64-NEXT: vor.vv v16, v24, v16 +; RV64-NEXT: vand.vx v24, v8, a3 +; RV64-NEXT: vsll.vi v24, v24, 24 +; RV64-NEXT: vand.vx v0, v8, a4 +; RV64-NEXT: vsll.vi v0, v0, 8 +; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsll.vx v0, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v0, v8 ; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2208,35 +2208,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3, v0.t -; RV32-NEXT: vor.vv v9, v10, v9, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t +; RV32-NEXT: vsll.vx v9, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3, v0.t +; RV32-NEXT: vor.vv v9, v9, v10, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4, v0.t -; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vand.vx v10, v8, a4, v0.t +; RV32-NEXT: vsll.vi v10, v10, 24, v0.t ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12, v0.t -; RV32-NEXT: vor.vv v10, v11, v10, v0.t -; RV32-NEXT: vor.vv v9, v10, v9, v0.t -; RV32-NEXT: vsll.vx v10, v8, a1, v0.t -; RV32-NEXT: vand.vx v11, v8, a3, v0.t -; RV32-NEXT: vsll.vx v11, v11, a2, v0.t -; RV32-NEXT: vor.vv v10, v10, v11, v0.t -; RV32-NEXT: vand.vx v11, v8, a4, v0.t -; RV32-NEXT: vsll.vi v11, v11, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v11, v8, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t +; RV32-NEXT: vand.vv v12, v8, v11, v0.t +; RV32-NEXT: vsll.vi v12, v12, 8, v0.t +; RV32-NEXT: vor.vv v10, v10, v12, v0.t +; RV32-NEXT: vor.vv v9, v9, v10, v0.t +; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t +; RV32-NEXT: vand.vx v12, v12, a2, v0.t +; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t +; RV32-NEXT: vand.vx v12, v12, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma @@ -2358,35 +2358,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vsll.vx v9, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v9, v10 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vx v10, v8, a4 +; RV32-NEXT: vsll.vi v10, v10, 24 ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12 -; RV32-NEXT: vor.vv v10, v11, v10 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: vand.vx v11, v8, a3 -; RV32-NEXT: vsll.vx v11, v11, a2 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vand.vx v11, v8, a4 -; RV32-NEXT: vsll.vi v11, v11, 24 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v11, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma @@ -2512,35 +2512,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t +; RV32-NEXT: vsll.vx v10, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3, v0.t +; RV32-NEXT: vor.vv v10, v10, v12, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t +; RV32-NEXT: vand.vx v12, v8, a4, v0.t +; RV32-NEXT: vsll.vi v12, v12, 24, v0.t ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16, v0.t -; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t -; RV32-NEXT: vsll.vx v12, v8, a1, v0.t -; RV32-NEXT: vand.vx v14, v8, a3, v0.t -; RV32-NEXT: vsll.vx v14, v14, a2, v0.t -; RV32-NEXT: vor.vv v12, v12, v14, v0.t -; RV32-NEXT: vand.vx v14, v8, a4, v0.t -; RV32-NEXT: vsll.vi v14, v14, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v14, v8, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vand.vv v16, v8, v14, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vor.vv v12, v12, v16, v0.t +; RV32-NEXT: vor.vv v10, v10, v12, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t +; RV32-NEXT: vand.vx v16, v16, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vv v8, v8, v14, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma @@ -2662,35 +2662,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v10, v12 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16 -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vand.vx v14, v8, a4 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma @@ -2816,35 +2816,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t +; RV32-NEXT: vsll.vx v12, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3, v0.t +; RV32-NEXT: vor.vv v16, v12, v16, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v20, v16, a4, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vand.vx v12, v8, a4, v0.t +; RV32-NEXT: vsll.vi v20, v12, 24, v0.t ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v12, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vor.vv v20, v24, v20, v0.t -; RV32-NEXT: vor.vv v12, v20, v12, v0.t -; RV32-NEXT: vsll.vx v20, v8, a1, v0.t -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vx v24, v24, a2, v0.t +; RV32-NEXT: vand.vv v24, v8, v12, v0.t +; RV32-NEXT: vsll.vi v24, v24, 8, v0.t ; RV32-NEXT: vor.vv v20, v20, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v20, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v16, v16, v20, v0.t +; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v20, v24, v20, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vor.vv v8, v8, v20, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma @@ -2966,35 +2966,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v24 -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vx v20, v20, a2 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a3 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma @@ -3125,73 +3125,73 @@ ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4, v0.t -; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a5, sp, 48 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v24, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma @@ -3338,42 +3338,42 @@ ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v0, v16, 24 ; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma @@ -3507,73 +3507,73 @@ ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4, v0.t -; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a5, sp, 48 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v24, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma @@ -3720,42 +3720,42 @@ ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v0, v16, 24 ; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -348,16 +348,16 @@ ; RV32-NEXT: vsrl.vx v10, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: vand.vx v10, v10, a2 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vand.vv v10, v10, v11 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v12, v12, v11 +; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsll.vx v10, v8, a0 ; RV32-NEXT: vand.vx v12, v8, a2 @@ -375,35 +375,35 @@ ; ; RV64-LABEL: bswap_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsll.vi v9, v9, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v10, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v11, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v11, v11, a4 -; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsrl.vx v9, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v10, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vsrl.vx v10, v8, a2 -; RV64-NEXT: vsrl.vx v11, v8, a4 -; RV64-NEXT: vand.vx v11, v11, a3 +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsrl.vi v11, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v11, v11, a4 ; RV64-NEXT: vor.vv v10, v11, v10 -; RV64-NEXT: vsrl.vi v11, v8, 24 -; RV64-NEXT: vand.vx v11, v11, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v11 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vand.vx v10, v8, a3 +; RV64-NEXT: vsll.vi v10, v10, 24 +; RV64-NEXT: vand.vx v11, v8, a4 +; RV64-NEXT: vsll.vi v11, v11, 8 +; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsll.vx v11, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv1i64: @@ -431,16 +431,16 @@ ; RV32-NEXT: vsrl.vx v12, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: vand.vx v12, v12, a2 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v12, v12, v14 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v14 +; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsll.vx v12, v8, a0 ; RV32-NEXT: vand.vx v16, v8, a2 @@ -458,35 +458,35 @@ ; ; RV64-LABEL: bswap_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v12, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v14, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v14, v14, a4 -; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsrl.vx v10, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v12, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vsrl.vx v12, v8, a2 -; RV64-NEXT: vsrl.vx v14, v8, a4 -; RV64-NEXT: vand.vx v14, v14, a3 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsrl.vi v14, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v14, v14, a4 ; RV64-NEXT: vor.vv v12, v14, v12 -; RV64-NEXT: vsrl.vi v14, v8, 24 -; RV64-NEXT: vand.vx v14, v14, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v14 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vand.vx v14, v8, a4 +; RV64-NEXT: vsll.vi v14, v14, 8 +; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsll.vx v14, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v14, v8 ; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv2i64: @@ -514,16 +514,16 @@ ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: vand.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v20 -; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vand.vv v24, v24, v20 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vsll.vx v16, v8, a0 ; RV32-NEXT: vand.vx v24, v8, a2 @@ -541,35 +541,35 @@ ; ; RV64-LABEL: bswap_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v16, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v20, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v20, v20, a4 -; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsrl.vx v12, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v16, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vsrl.vx v16, v8, a2 -; RV64-NEXT: vsrl.vx v20, v8, a4 -; RV64-NEXT: vand.vx v20, v20, a3 +; RV64-NEXT: vsrl.vi v16, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vsrl.vi v20, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v20, v20, a4 ; RV64-NEXT: vor.vv v16, v20, v16 -; RV64-NEXT: vsrl.vi v20, v8, 24 -; RV64-NEXT: vand.vx v20, v20, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v20 +; RV64-NEXT: vor.vv v12, v16, v12 +; RV64-NEXT: vand.vx v16, v8, a3 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vand.vx v20, v8, a4 +; RV64-NEXT: vsll.vi v20, v20, 8 +; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsll.vx v20, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v20, v8 ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv4i64: @@ -596,37 +596,37 @@ ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v24, v8, a0 +; RV32-NEXT: vsrl.vx v16, v8, a0 ; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vx v0, v0, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a2 ; RV32-NEXT: vsll.vx v0, v0, a1 -; RV32-NEXT: vsll.vx v24, v8, a0 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 ; RV32-NEXT: vand.vx v8, v8, a3 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 @@ -638,35 +638,35 @@ ; ; RV64-LABEL: bswap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v24, v8, a1 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v24, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v0, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v0, v0, a4 -; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsrl.vx v16, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v24, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsrl.vx v24, v8, a2 -; RV64-NEXT: vsrl.vx v0, v8, a4 -; RV64-NEXT: vand.vx v0, v0, a3 +; RV64-NEXT: vsrl.vi v24, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vsrl.vi v0, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v0, v0, a4 ; RV64-NEXT: vor.vv v24, v0, v24 -; RV64-NEXT: vsrl.vi v0, v8, 24 -; RV64-NEXT: vand.vx v0, v0, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v0 +; RV64-NEXT: vor.vv v16, v24, v16 +; RV64-NEXT: vand.vx v24, v8, a3 +; RV64-NEXT: vsll.vi v24, v24, 24 +; RV64-NEXT: vand.vx v0, v8, a4 +; RV64-NEXT: vsll.vi v0, v0, 8 +; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsll.vx v0, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v0, v8 ; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -700,35 +700,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3, v0.t -; RV32-NEXT: vor.vv v9, v10, v9, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t +; RV32-NEXT: vsll.vx v9, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3, v0.t +; RV32-NEXT: vor.vv v9, v9, v10, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4, v0.t -; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vand.vx v10, v8, a4, v0.t +; RV32-NEXT: vsll.vi v10, v10, 24, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12, v0.t -; RV32-NEXT: vor.vv v10, v11, v10, v0.t -; RV32-NEXT: vor.vv v9, v10, v9, v0.t -; RV32-NEXT: vsll.vx v10, v8, a1, v0.t -; RV32-NEXT: vand.vx v11, v8, a3, v0.t -; RV32-NEXT: vsll.vx v11, v11, a2, v0.t -; RV32-NEXT: vor.vv v10, v10, v11, v0.t -; RV32-NEXT: vand.vx v11, v8, a4, v0.t -; RV32-NEXT: vsll.vi v11, v11, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v11, v8, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t +; RV32-NEXT: vand.vv v12, v8, v11, v0.t +; RV32-NEXT: vsll.vi v12, v12, 8, v0.t +; RV32-NEXT: vor.vv v10, v10, v12, v0.t +; RV32-NEXT: vor.vv v9, v9, v10, v0.t +; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t +; RV32-NEXT: vand.vx v12, v12, a2, v0.t +; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t +; RV32-NEXT: vand.vx v12, v12, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -784,35 +784,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vsll.vx v9, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v9, v10 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vx v10, v8, a4 +; RV32-NEXT: vsll.vi v10, v10, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12 -; RV32-NEXT: vor.vv v10, v11, v10 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: vand.vx v11, v8, a3 -; RV32-NEXT: vsll.vx v11, v11, a2 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vand.vx v11, v8, a4 -; RV32-NEXT: vsll.vi v11, v11, 24 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v11, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -872,35 +872,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t +; RV32-NEXT: vsll.vx v10, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3, v0.t +; RV32-NEXT: vor.vv v10, v10, v12, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t +; RV32-NEXT: vand.vx v12, v8, a4, v0.t +; RV32-NEXT: vsll.vi v12, v12, 24, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16, v0.t -; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t -; RV32-NEXT: vsll.vx v12, v8, a1, v0.t -; RV32-NEXT: vand.vx v14, v8, a3, v0.t -; RV32-NEXT: vsll.vx v14, v14, a2, v0.t -; RV32-NEXT: vor.vv v12, v12, v14, v0.t -; RV32-NEXT: vand.vx v14, v8, a4, v0.t -; RV32-NEXT: vsll.vi v14, v14, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v14, v8, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vand.vv v16, v8, v14, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vor.vv v12, v12, v16, v0.t +; RV32-NEXT: vor.vv v10, v10, v12, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t +; RV32-NEXT: vand.vx v16, v16, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vv v8, v8, v14, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -956,35 +956,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v10, v12 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16 -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vand.vx v14, v8, a4 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1044,35 +1044,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t +; RV32-NEXT: vsll.vx v12, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3, v0.t +; RV32-NEXT: vor.vv v16, v12, v16, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v20, v16, a4, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vand.vx v12, v8, a4, v0.t +; RV32-NEXT: vsll.vi v20, v12, 24, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v12, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vor.vv v20, v24, v20, v0.t -; RV32-NEXT: vor.vv v12, v20, v12, v0.t -; RV32-NEXT: vsll.vx v20, v8, a1, v0.t -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vx v24, v24, a2, v0.t +; RV32-NEXT: vand.vv v24, v8, v12, v0.t +; RV32-NEXT: vsll.vi v24, v24, 8, v0.t ; RV32-NEXT: vor.vv v20, v20, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v20, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v16, v16, v20, v0.t +; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v20, v24, v20, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vor.vv v8, v8, v20, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1128,35 +1128,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v24 -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vx v20, v20, a2 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a3 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1221,73 +1221,73 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4, v0.t -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v24, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 @@ -1368,41 +1368,41 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v0, v16, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -1470,73 +1470,73 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4, v0.t -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v24, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 @@ -1617,41 +1617,41 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v0, v16, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -1841,35 +1841,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3, v0.t -; RV32-NEXT: vor.vv v9, v10, v9, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t +; RV32-NEXT: vsll.vx v9, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3, v0.t +; RV32-NEXT: vor.vv v9, v9, v10, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4, v0.t -; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vand.vx v10, v8, a4, v0.t +; RV32-NEXT: vsll.vi v10, v10, 24, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12, v0.t -; RV32-NEXT: vor.vv v10, v11, v10, v0.t -; RV32-NEXT: vor.vv v9, v10, v9, v0.t -; RV32-NEXT: vsll.vx v10, v8, a1, v0.t -; RV32-NEXT: vand.vx v11, v8, a3, v0.t -; RV32-NEXT: vsll.vx v11, v11, a2, v0.t -; RV32-NEXT: vor.vv v10, v10, v11, v0.t -; RV32-NEXT: vand.vx v11, v8, a4, v0.t -; RV32-NEXT: vsll.vi v11, v11, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v11, v8, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t +; RV32-NEXT: vand.vv v12, v8, v11, v0.t +; RV32-NEXT: vsll.vi v12, v12, 8, v0.t +; RV32-NEXT: vor.vv v10, v10, v12, v0.t +; RV32-NEXT: vor.vv v9, v9, v10, v0.t +; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t +; RV32-NEXT: vand.vx v12, v12, a2, v0.t +; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t +; RV32-NEXT: vand.vx v12, v12, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -265,10 +265,10 @@ ; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v24, v8, v24 -; CHECK-NEXT: vadd.vv v8, v0, v16 -; CHECK-NEXT: vadd.vx v8, v8, a4 -; CHECK-NEXT: vadd.vx v16, v24, a4 +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vv v24, v0, v16 +; CHECK-NEXT: vadd.vx v16, v8, a4 +; CHECK-NEXT: vadd.vx v8, v24, a4 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/cross-block-cse.ll b/llvm/test/CodeGen/RISCV/rvv/cross-block-cse.ll --- a/llvm/test/CodeGen/RISCV/rvv/cross-block-cse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cross-block-cse.ll @@ -1,18 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -O3 -mtriple=riscv64 -mattr=+v | FileCheck %s -; TODO: The case below demonstrates a regression in cross block CSE of vector -; instructions with undefined passthru operands. The second vadd.vv should be -; removed. +; The case below demonstrates cross block CSE of vector instructions with +; undefined passthru operands. define void @foo( %x, %y, ptr %p1, ptr %p2, i1 zeroext %cond) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vs1r.v v10, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: bnez a2, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %falsebb -; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vs1r.v v8, (a1) ; CHECK-NEXT: .LBB0_2: # %mergebb ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2752,12 +2752,13 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 @@ -2765,7 +2766,7 @@ ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v24, v0, a2 +; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: sw a2, 44(sp) @@ -2782,186 +2783,166 @@ ; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a2, 20(sp) ; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB46_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: .LBB46_2: +; RV32-NEXT: sub a2, a0, a1 +; RV32-NEXT: sltu a3, a0, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v24, v16, v24, v0.t ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmul.vv v16, v24, v16, v0.t ; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v8, v16, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: sub a1, a0, a1 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: bltu a0, a1, .LBB46_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB46_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 @@ -3083,15 +3064,9 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -3105,73 +3080,67 @@ ; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a2, a1, 257 -; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB47_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: .LBB47_2: +; RV32-NEXT: sub a2, a0, a1 +; RV32-NEXT: sltu a3, a0, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vsrl.vi v24, v16, 1 ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: vlse64.v v0, (a3), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vsub.vv v16, v16, v24 ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vadd.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vadd.vv v24, v16, v24 ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v16, v16, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v16, a2 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: sub a1, a0, a1 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsrl.vx v16, v16, a2 +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: bltu a0, a1, .LBB47_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB47_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 @@ -3180,11 +3149,11 @@ ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v16, v8, v16 -; RV32-NEXT: vand.vv v8, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb @@ -3193,18 +3162,17 @@ ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a2 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v8, v8, a2 +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1536,19 +1536,19 @@ ; RV32F-LABEL: cttz_nxv1i64: ; RV32F: # %bb.0: ; RV32F-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32F-NEXT: vmseq.vx v0, v8, zero ; RV32F-NEXT: vrsub.vi v9, v8, 0 -; RV32F-NEXT: vand.vv v9, v8, v9 +; RV32F-NEXT: vand.vv v8, v8, v9 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v10, v9 -; RV32F-NEXT: vsrl.vi v9, v10, 23 +; RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; RV32F-NEXT: vsrl.vi v8, v9, 23 ; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV32F-NEXT: vzext.vf2 v10, v9 +; RV32F-NEXT: vzext.vf2 v9, v8 ; RV32F-NEXT: li a1, 127 -; RV32F-NEXT: vsub.vx v9, v10, a1 -; RV32F-NEXT: vmseq.vx v0, v8, zero +; RV32F-NEXT: vsub.vx v8, v9, a1 ; RV32F-NEXT: li a1, 64 -; RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32F-NEXT: fsrm a0 ; RV32F-NEXT: ret ; @@ -1574,17 +1574,17 @@ ; RV32D-LABEL: cttz_nxv1i64: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32D-NEXT: vmseq.vx v0, v8, zero ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v9, v8, v9 +; RV32D-NEXT: vand.vv v8, v8, v9 ; RV32D-NEXT: fsrmi a0, 1 -; RV32D-NEXT: vfcvt.f.xu.v v9, v9 +; RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; RV32D-NEXT: li a1, 52 -; RV32D-NEXT: vsrl.vx v9, v9, a1 +; RV32D-NEXT: vsrl.vx v8, v8, a1 ; RV32D-NEXT: li a1, 1023 -; RV32D-NEXT: vsub.vx v9, v9, a1 -; RV32D-NEXT: vmseq.vx v0, v8, zero +; RV32D-NEXT: vsub.vx v8, v8, a1 ; RV32D-NEXT: li a1, 64 -; RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 +; RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32D-NEXT: fsrm a0 ; RV32D-NEXT: ret ; @@ -1706,19 +1706,19 @@ ; RV32F-LABEL: cttz_nxv2i64: ; RV32F: # %bb.0: ; RV32F-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32F-NEXT: vmseq.vx v0, v8, zero ; RV32F-NEXT: vrsub.vi v10, v8, 0 -; RV32F-NEXT: vand.vv v10, v8, v10 +; RV32F-NEXT: vand.vv v8, v8, v10 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v12, v10 -; RV32F-NEXT: vsrl.vi v10, v12, 23 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: vsrl.vi v8, v10, 23 ; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32F-NEXT: vzext.vf2 v12, v10 +; RV32F-NEXT: vzext.vf2 v10, v8 ; RV32F-NEXT: li a1, 127 -; RV32F-NEXT: vsub.vx v10, v12, a1 -; RV32F-NEXT: vmseq.vx v0, v8, zero +; RV32F-NEXT: vsub.vx v8, v10, a1 ; RV32F-NEXT: li a1, 64 -; RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32F-NEXT: fsrm a0 ; RV32F-NEXT: ret ; @@ -1744,17 +1744,17 @@ ; RV32D-LABEL: cttz_nxv2i64: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32D-NEXT: vmseq.vx v0, v8, zero ; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v10, v8, v10 +; RV32D-NEXT: vand.vv v8, v8, v10 ; RV32D-NEXT: fsrmi a0, 1 -; RV32D-NEXT: vfcvt.f.xu.v v10, v10 +; RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; RV32D-NEXT: li a1, 52 -; RV32D-NEXT: vsrl.vx v10, v10, a1 +; RV32D-NEXT: vsrl.vx v8, v8, a1 ; RV32D-NEXT: li a1, 1023 -; RV32D-NEXT: vsub.vx v10, v10, a1 -; RV32D-NEXT: vmseq.vx v0, v8, zero +; RV32D-NEXT: vsub.vx v8, v8, a1 ; RV32D-NEXT: li a1, 64 -; RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32D-NEXT: fsrm a0 ; RV32D-NEXT: ret ; @@ -1876,19 +1876,19 @@ ; RV32F-LABEL: cttz_nxv4i64: ; RV32F: # %bb.0: ; RV32F-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32F-NEXT: vmseq.vx v0, v8, zero ; RV32F-NEXT: vrsub.vi v12, v8, 0 -; RV32F-NEXT: vand.vv v12, v8, v12 +; RV32F-NEXT: vand.vv v8, v8, v12 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v16, v12 -; RV32F-NEXT: vsrl.vi v12, v16, 23 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: vsrl.vi v8, v12, 23 ; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32F-NEXT: vzext.vf2 v16, v12 +; RV32F-NEXT: vzext.vf2 v12, v8 ; RV32F-NEXT: li a1, 127 -; RV32F-NEXT: vsub.vx v12, v16, a1 -; RV32F-NEXT: vmseq.vx v0, v8, zero +; RV32F-NEXT: vsub.vx v8, v12, a1 ; RV32F-NEXT: li a1, 64 -; RV32F-NEXT: vmerge.vxm v8, v12, a1, v0 +; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32F-NEXT: fsrm a0 ; RV32F-NEXT: ret ; @@ -1914,17 +1914,17 @@ ; RV32D-LABEL: cttz_nxv4i64: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32D-NEXT: vmseq.vx v0, v8, zero ; RV32D-NEXT: vrsub.vi v12, v8, 0 -; RV32D-NEXT: vand.vv v12, v8, v12 +; RV32D-NEXT: vand.vv v8, v8, v12 ; RV32D-NEXT: fsrmi a0, 1 -; RV32D-NEXT: vfcvt.f.xu.v v12, v12 +; RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; RV32D-NEXT: li a1, 52 -; RV32D-NEXT: vsrl.vx v12, v12, a1 +; RV32D-NEXT: vsrl.vx v8, v8, a1 ; RV32D-NEXT: li a1, 1023 -; RV32D-NEXT: vsub.vx v12, v12, a1 -; RV32D-NEXT: vmseq.vx v0, v8, zero +; RV32D-NEXT: vsub.vx v8, v8, a1 ; RV32D-NEXT: li a1, 64 -; RV32D-NEXT: vmerge.vxm v8, v12, a1, v0 +; RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32D-NEXT: fsrm a0 ; RV32D-NEXT: ret ; @@ -2046,19 +2046,19 @@ ; RV32F-LABEL: cttz_nxv8i64: ; RV32F: # %bb.0: ; RV32F-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32F-NEXT: vmseq.vx v0, v8, zero ; RV32F-NEXT: vrsub.vi v16, v8, 0 -; RV32F-NEXT: vand.vv v16, v8, v16 +; RV32F-NEXT: vand.vv v8, v8, v16 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v24, v16 -; RV32F-NEXT: vsrl.vi v16, v24, 23 +; RV32F-NEXT: vfncvt.f.xu.w v16, v8 +; RV32F-NEXT: vsrl.vi v8, v16, 23 ; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32F-NEXT: vzext.vf2 v24, v16 +; RV32F-NEXT: vzext.vf2 v16, v8 ; RV32F-NEXT: li a1, 127 -; RV32F-NEXT: vsub.vx v16, v24, a1 -; RV32F-NEXT: vmseq.vx v0, v8, zero +; RV32F-NEXT: vsub.vx v8, v16, a1 ; RV32F-NEXT: li a1, 64 -; RV32F-NEXT: vmerge.vxm v8, v16, a1, v0 +; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32F-NEXT: fsrm a0 ; RV32F-NEXT: ret ; @@ -2084,17 +2084,17 @@ ; RV32D-LABEL: cttz_nxv8i64: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32D-NEXT: vmseq.vx v0, v8, zero ; RV32D-NEXT: vrsub.vi v16, v8, 0 -; RV32D-NEXT: vand.vv v16, v8, v16 +; RV32D-NEXT: vand.vv v8, v8, v16 ; RV32D-NEXT: fsrmi a0, 1 -; RV32D-NEXT: vfcvt.f.xu.v v16, v16 +; RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; RV32D-NEXT: li a1, 52 -; RV32D-NEXT: vsrl.vx v16, v16, a1 +; RV32D-NEXT: vsrl.vx v8, v8, a1 ; RV32D-NEXT: li a1, 1023 -; RV32D-NEXT: vsub.vx v16, v16, a1 -; RV32D-NEXT: vmseq.vx v0, v8, zero +; RV32D-NEXT: vsub.vx v8, v8, a1 ; RV32D-NEXT: li a1, 64 -; RV32D-NEXT: vmerge.vxm v8, v16, a1, v0 +; RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32D-NEXT: fsrm a0 ; RV32D-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -154,9 +154,9 @@ ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vmerge.vim v24, v16, 1, v0 ; RV32-NEXT: vs8r.v v24, (a3) +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 -; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vs8r.v v8, (a2) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 @@ -194,9 +194,9 @@ ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vmerge.vim v24, v16, 1, v0 ; RV64-NEXT: vs8r.v v24, (a3) +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 -; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vs8r.v v8, (a2) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -599,10 +599,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v9, v8, a0 +; CHECK-NEXT: vmv.x.s a1, v9 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -640,10 +640,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v10, v8, a0 +; CHECK-NEXT: vmv.x.s a1, v10 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -681,10 +681,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v12, v8, a0 +; CHECK-NEXT: vmv.x.s a1, v12 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -722,10 +722,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v16, v8, a0 +; CHECK-NEXT: vmv.x.s a1, v16 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -957,9 +957,9 @@ ; CHECK-LABEL: extractelt_nxv16i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v8, 1 -; CHECK-NEXT: vmv.x.s a1, v16 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 0 ret i64 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -146,9 +146,9 @@ ; RV32ELEN32-LABEL: bitcast_v8i8_i64: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ELEN32-NEXT: vslidedown.vi v9, v8, 1 -; RV32ELEN32-NEXT: vmv.x.s a1, v9 ; RV32ELEN32-NEXT: vmv.x.s a0, v8 +; RV32ELEN32-NEXT: vslidedown.vi v8, v8, 1 +; RV32ELEN32-NEXT: vmv.x.s a1, v8 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_v8i8_i64: @@ -184,9 +184,9 @@ ; RV32ELEN32-LABEL: bitcast_v4i16_i64: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ELEN32-NEXT: vslidedown.vi v9, v8, 1 -; RV32ELEN32-NEXT: vmv.x.s a1, v9 ; RV32ELEN32-NEXT: vmv.x.s a0, v8 +; RV32ELEN32-NEXT: vslidedown.vi v8, v8, 1 +; RV32ELEN32-NEXT: vmv.x.s a1, v8 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_v4i16_i64: @@ -222,9 +222,9 @@ ; RV32ELEN32-LABEL: bitcast_v2i32_i64: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ELEN32-NEXT: vslidedown.vi v9, v8, 1 -; RV32ELEN32-NEXT: vmv.x.s a1, v9 ; RV32ELEN32-NEXT: vmv.x.s a0, v8 +; RV32ELEN32-NEXT: vslidedown.vi v8, v8, 1 +; RV32ELEN32-NEXT: vmv.x.s a1, v8 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_v2i32_i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1396,9 +1396,10 @@ ; RV32-NEXT: vand.vx v11, v11, a3, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t ; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1530,9 +1531,10 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4 ; RV32-NEXT: vsll.vi v10, v10, 24 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1663,31 +1665,31 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v14, v14, a3, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v14, v14, a4, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: li a5, 85 +; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t +; RV32-NEXT: li a4, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v18, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vand.vv v16, v16, v18, v0.t -; RV32-NEXT: vor.vv v14, v16, v14, v0.t +; RV32-NEXT: vand.vv v14, v14, v16, v0.t +; RV32-NEXT: vsrl.vi v18, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v18, v18, a4, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t ; RV32-NEXT: vsll.vx v14, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v14, v14, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a3, v0.t +; RV32-NEXT: vsll.vx v18, v18, a2, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a4, v0.t +; RV32-NEXT: vsll.vi v18, v18, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v18, v8, v0.t ; RV32-NEXT: vor.vv v8, v14, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t @@ -1790,39 +1792,39 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v10, v12 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: li a5, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a5, v0 +; RV32-NEXT: vmerge.vxm v14, v14, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16 -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vand.vx v14, v8, a4 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v14, v16, v14 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -1934,35 +1936,35 @@ ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v20, v20, a3, v0.t -; RV32-NEXT: vor.vv v16, v20, v16, v0.t -; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v20, a4, v0.t -; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t -; RV32-NEXT: lui a5, 5 -; RV32-NEXT: addi a5, a5, 1365 +; RV32-NEXT: vor.vv v20, v20, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: lui a4, 5 +; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v20, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vand.vv v28, v28, v20, v0.t -; RV32-NEXT: vor.vv v24, v28, v24, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v28, v28, a4, v0.t +; RV32-NEXT: vor.vv v24, v24, v28, v0.t +; RV32-NEXT: vor.vv v20, v24, v20, v0.t ; RV32-NEXT: vsll.vx v24, v8, a1, v0.t ; RV32-NEXT: vand.vx v28, v8, a3, v0.t ; RV32-NEXT: vsll.vx v28, v28, a2, v0.t ; RV32-NEXT: vor.vv v24, v24, v28, v0.t ; RV32-NEXT: vand.vx v28, v8, a4, v0.t ; RV32-NEXT: vsll.vi v28, v28, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v20, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -2063,40 +2065,40 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: lui a5, 5 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: vmv.v.i v20, 0 ; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 +; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v24 -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vx v20, v20, a2 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a3 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vand.vv v20, v24, v20 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -2202,36 +2204,54 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vx v24, v8, a3, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a5, a1, -256 -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: addi a4, a1, -256 +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsll.vx v24, v24, a5, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v24, a6, v0.t +; RV32-NEXT: vand.vx v24, v8, a6, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2240,103 +2260,189 @@ ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v16, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v24, v8, a5, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a6, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV32-NEXT: vand.vx v16, v24, a4, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a6, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 4, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2441,37 +2547,37 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: li a4, 32 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: lui a5, 349525 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: vmerge.vxm v24, v24, a6, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a6 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a6 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vand.vx v8, v8, a6 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 @@ -2582,36 +2688,54 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vx v24, v8, a3, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a5, a1, -256 -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: addi a4, a1, -256 +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsll.vx v24, v24, a5, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v24, a6, v0.t +; RV32-NEXT: vand.vx v24, v8, a6, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2620,103 +2744,189 @@ ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v16, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v24, v8, a5, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a6, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV32-NEXT: vand.vx v16, v24, a4, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a6, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 4, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2821,37 +3031,37 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: li a4, 32 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: lui a5, 349525 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: vmerge.vxm v24, v24, a6, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a6 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a6 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vand.vx v8, v8, a6 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -95,10 +95,10 @@ ; RV32-NEXT: vand.vx v9, v9, a1 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vand.vx v10, v8, a1 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -134,10 +134,10 @@ ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 24 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v8, v8, 24 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: lui a1, 61681 @@ -183,38 +183,38 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vmv.v.i v0, 5 -; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vand.vv v10, v10, v9 -; RV32-NEXT: vsrl.vi v11, v8, 24 -; RV32-NEXT: lui a1, 4080 -; RV32-NEXT: vand.vx v11, v11, a1 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v11, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v12, v8, a3 -; RV32-NEXT: lui a4, 16 -; RV32-NEXT: addi a4, a4, -256 +; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vv v11, v11, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vor.vv v11, v12, v11 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: vsll.vi v9, v9, 8 -; RV32-NEXT: vand.vx v11, v8, a1 -; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vor.vv v11, v11, v12 ; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vsll.vx v11, v8, a2 +; RV32-NEXT: vsll.vx v11, v8, a1 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vx v8, v8, a3 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -518,10 +518,10 @@ ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV32-NEXT: lui a1, 61681 @@ -557,10 +557,10 @@ ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: lui a1, 61681 @@ -598,10 +598,10 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV32-NEXT: lui a3, 61681 @@ -628,10 +628,10 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v11, v9 +; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 @@ -664,10 +664,10 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: lui a3, 61681 @@ -694,10 +694,10 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v11, v9 +; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 @@ -746,31 +746,31 @@ ; LMULMAX2-RV32-NEXT: addi a3, a3, -256 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 ; LMULMAX2-RV32-NEXT: li a4, 85 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a4 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV32-NEXT: lui a4, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a4, v0 +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 8 +; LMULMAX2-RV32-NEXT: vand.vv v14, v14, v12 ; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 24 ; LMULMAX2-RV32-NEXT: lui a4, 4080 ; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: vor.vv v10, v14, v10 +; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a1 ; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 ; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vv v14, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 8 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v12 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 ; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vor.vv v8, v14, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV32-NEXT: lui a1, 61681 @@ -870,40 +870,40 @@ ; LMULMAX1-RV32-LABEL: bitreverse_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV32-NEXT: li a2, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v9, v10, a2 +; LMULMAX1-RV32-NEXT: li a3, 40 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v10, a3 +; LMULMAX1-RV32-NEXT: lui a4, 16 +; LMULMAX1-RV32-NEXT: addi a4, a4, -256 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v9 +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a2, v0 +; LMULMAX1-RV32-NEXT: lui a5, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a5, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v10, 24 -; LMULMAX1-RV32-NEXT: lui a2, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: li a3, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v12, v10, a3 -; LMULMAX1-RV32-NEXT: li a4, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v13, v10, a4 -; LMULMAX1-RV32-NEXT: lui a5, 16 -; LMULMAX1-RV32-NEXT: addi a5, a5, -256 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v10, 8 +; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v13, v10, 24 +; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vand.vv v12, v10, v9 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 8 -; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v10, a3 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vor.vv v11, v12, v11 +; LMULMAX1-RV32-NEXT: vsll.vx v12, v10, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vand.vv v13, v10, v9 +; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 8 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV32-NEXT: vsll.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v13, v10 -; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v12 +; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 24 +; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v13 +; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 4 ; LMULMAX1-RV32-NEXT: lui a6, 61681 @@ -935,26 +935,26 @@ ; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v14 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v10, v10 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a2 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v15 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a2 ; LMULMAX1-RV32-NEXT: vsrl.vx v15, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vx v16, v8, a4 +; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a4 +; LMULMAX1-RV32-NEXT: vor.vv v11, v15, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v15, v8, 8 +; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v16, v8, 24 ; LMULMAX1-RV32-NEXT: vand.vx v16, v16, a5 -; LMULMAX1-RV32-NEXT: vor.vv v15, v16, v15 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v15 +; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v16 +; LMULMAX1-RV32-NEXT: vor.vv v11, v15, v11 +; LMULMAX1-RV32-NEXT: vsll.vx v15, v8, a2 +; LMULMAX1-RV32-NEXT: vand.vx v16, v8, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v16, v16, a3 +; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v16 ; LMULMAX1-RV32-NEXT: vand.vv v9, v8, v9 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vx v15, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v15, v15, 24 -; LMULMAX1-RV32-NEXT: vor.vv v9, v15, v9 -; LMULMAX1-RV32-NEXT: vsll.vx v15, v8, a3 ; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -436,9 +436,10 @@ ; RV32-NEXT: vand.vx v11, v11, a3, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t ; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -513,9 +514,10 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4 ; RV32-NEXT: vsll.vi v10, v10, 24 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -589,31 +591,31 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v14, v14, a3, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v14, v14, a4, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: li a5, 85 +; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t +; RV32-NEXT: li a4, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v18, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vand.vv v16, v16, v18, v0.t -; RV32-NEXT: vor.vv v14, v16, v14, v0.t +; RV32-NEXT: vand.vv v14, v14, v16, v0.t +; RV32-NEXT: vsrl.vi v18, v8, 24, v0.t +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v18, v18, a0, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t ; RV32-NEXT: vsll.vx v14, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v14, v14, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a3, v0.t +; RV32-NEXT: vsll.vx v18, v18, a2, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a0, v0.t +; RV32-NEXT: vsll.vi v18, v18, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v18, v8, v0.t ; RV32-NEXT: vor.vv v8, v14, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: ret @@ -659,39 +661,39 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v10, v12 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: li a5, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a5, v0 +; RV32-NEXT: vmerge.vxm v14, v14, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16 -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vand.vx v14, v8, a4 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v14, v16, v14 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v4i64_unmasked: @@ -746,35 +748,35 @@ ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v20, v20, a3, v0.t -; RV32-NEXT: vor.vv v16, v20, v16, v0.t -; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v20, a4, v0.t -; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t -; RV32-NEXT: lui a5, 5 -; RV32-NEXT: addi a5, a5, 1365 +; RV32-NEXT: vor.vv v20, v20, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: lui a4, 5 +; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v20, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vand.vv v28, v28, v20, v0.t -; RV32-NEXT: vor.vv v24, v28, v24, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 24, v0.t +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v28, v28, a0, v0.t +; RV32-NEXT: vor.vv v24, v24, v28, v0.t +; RV32-NEXT: vor.vv v20, v24, v20, v0.t ; RV32-NEXT: vsll.vx v24, v8, a1, v0.t ; RV32-NEXT: vand.vx v28, v8, a3, v0.t ; RV32-NEXT: vsll.vx v28, v28, a2, v0.t ; RV32-NEXT: vor.vv v24, v24, v28, v0.t -; RV32-NEXT: vand.vx v28, v8, a4, v0.t +; RV32-NEXT: vand.vx v28, v8, a0, v0.t ; RV32-NEXT: vsll.vi v28, v28, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v20, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v8i64: @@ -818,40 +820,40 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: lui a5, 5 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: vmv.v.i v20, 0 ; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 +; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v24 -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vx v20, v20, a2 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a3 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vand.vv v20, v24, v20 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v8i64_unmasked: @@ -900,36 +902,54 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: lui a6, 349525 ; RV32-NEXT: addi a6, a6, 1365 @@ -938,75 +958,71 @@ ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a6 ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vx v24, v24, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a4, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1084,37 +1100,37 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: li a4, 32 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: lui a5, 349525 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a0 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vand.vx v8, v8, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 @@ -1170,36 +1186,54 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: lui a6, 349525 ; RV32-NEXT: addi a6, a6, 1365 @@ -1208,75 +1242,71 @@ ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a6 ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vx v24, v24, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a4, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1354,37 +1384,37 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: li a4, 32 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: lui a5, 349525 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a0 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vand.vx v8, v8, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -43,10 +43,10 @@ ; RV32-NEXT: vand.vx v9, v9, a1 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vand.vx v10, v8, a1 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret @@ -61,10 +61,10 @@ ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 24 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v8, v8, 24 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret @@ -89,38 +89,38 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vmv.v.i v0, 5 -; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vand.vv v10, v10, v9 -; RV32-NEXT: vsrl.vi v11, v8, 24 -; RV32-NEXT: lui a1, 4080 -; RV32-NEXT: vand.vx v11, v11, a1 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v11, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v12, v8, a3 -; RV32-NEXT: lui a4, 16 -; RV32-NEXT: addi a4, a4, -256 +; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vv v11, v11, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vor.vv v11, v12, v11 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: vsll.vi v9, v9, 8 -; RV32-NEXT: vand.vx v11, v8, a1 -; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vor.vv v11, v11, v12 ; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vsll.vx v11, v8, a2 +; RV32-NEXT: vsll.vx v11, v8, a1 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vx v8, v8, a3 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -253,10 +253,10 @@ ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -271,10 +271,10 @@ ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -291,19 +291,19 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v11, v9 +; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) @@ -321,19 +321,19 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v11, v9 +; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) @@ -367,31 +367,31 @@ ; LMULMAX2-RV32-NEXT: addi a3, a3, -256 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 ; LMULMAX2-RV32-NEXT: li a4, 85 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a4 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV32-NEXT: lui a4, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a4, v0 +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 8 +; LMULMAX2-RV32-NEXT: vand.vv v14, v14, v12 ; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 24 ; LMULMAX2-RV32-NEXT: lui a4, 4080 ; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: vor.vv v10, v14, v10 +; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a1 ; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 ; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vv v14, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 8 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v12 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 ; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vor.vv v8, v14, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -434,64 +434,64 @@ ; LMULMAX1-RV32-LABEL: bswap_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) +; LMULMAX1-RV32-NEXT: li a2, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 +; LMULMAX1-RV32-NEXT: li a3, 40 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a3 +; LMULMAX1-RV32-NEXT: lui a4, 16 +; LMULMAX1-RV32-NEXT: addi a4, a4, -256 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 +; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 +; LMULMAX1-RV32-NEXT: lui a5, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 24 -; LMULMAX1-RV32-NEXT: lui a2, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: li a3, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v12, v9, a3 -; LMULMAX1-RV32-NEXT: li a4, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v13, v9, a4 -; LMULMAX1-RV32-NEXT: lui a5, 16 -; LMULMAX1-RV32-NEXT: addi a5, a5, -256 -; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 8 -; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v9, a3 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vsrl.vx v12, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vx v13, v8, a4 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 8 +; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 24 +; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 8 -; LMULMAX1-RV32-NEXT: vand.vx v12, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 24 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 ; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v12, v8, a3 +; LMULMAX1-RV32-NEXT: vsll.vx v12, v8, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vand.vv v13, v8, v11 +; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 8 ; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a4 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v13 ; LMULMAX1-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 +; LMULMAX1-RV32-NEXT: vsrl.vx v12, v9, a3 +; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a4 +; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 8 +; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 24 +; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX1-RV32-NEXT: vsll.vx v12, v9, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vand.vv v11, v9, v11 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vor.vv v9, v12, v9 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: bswap_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -412,9 +412,9 @@ ; LMULMAX4-NEXT: addi s0, sp, 256 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 ; LMULMAX4-NEXT: andi sp, sp, -128 +; LMULMAX4-NEXT: addi a0, sp, 64 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: addi a0, sp, 64 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: mv a0, sp ; LMULMAX4-NEXT: li a1, 1 @@ -516,9 +516,9 @@ ; LMULMAX4-NEXT: sd a0, 136(sp) ; LMULMAX4-NEXT: li a0, 13 ; LMULMAX4-NEXT: sd a0, 0(sp) +; LMULMAX4-NEXT: addi a0, sp, 72 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: addi a0, sp, 72 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: addi a0, sp, 8 ; LMULMAX4-NEXT: li a1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -567,15 +567,16 @@ ; ; LMULMAX2-RV32F-LABEL: ctlz_v2i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 ; LMULMAX2-RV32F-NEXT: li a1, 64 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -587,12 +588,12 @@ ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: li a1, 64 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1263,15 +1264,16 @@ ; ; LMULMAX2-RV32F-LABEL: ctlz_v4i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v12, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 ; LMULMAX2-RV32F-NEXT: li a1, 64 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma @@ -1283,16 +1285,16 @@ ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v11, 23 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v10, v8 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: li a1, 64 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vminu.vx v8, v12, a1 +; LMULMAX2-RV64F-NEXT: vminu.vx v8, v10, a1 ; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64F-NEXT: ret ; @@ -1887,15 +1889,16 @@ ; ; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v2i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 ; LMULMAX2-RV32F-NEXT: vse64.v v9, (a0) ; LMULMAX2-RV32F-NEXT: ret @@ -1904,12 +1907,12 @@ ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0) ; LMULMAX2-RV64F-NEXT: ret @@ -2553,15 +2556,16 @@ ; ; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v4i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v12, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 ; LMULMAX2-RV32F-NEXT: vse64.v v10, (a0) ; LMULMAX2-RV32F-NEXT: ret @@ -2570,14 +2574,14 @@ ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v11, 23 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v10, v8 -; LMULMAX2-RV64F-NEXT: vse64.v v12, (a0) +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 +; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0) ; LMULMAX2-RV64F-NEXT: ret ; ; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1871,110 +1871,104 @@ ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a2, a1, 1365 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB34_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a2, a2, a4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a3, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v8, v0.t +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t @@ -1999,7 +1993,7 @@ ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -2015,7 +2009,8 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2028,7 +2023,8 @@ ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2043,8 +2039,7 @@ ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2158,65 +2153,72 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a2, a1, 1365 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a3, .LBB35_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB35_2: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vmv.v.x v0, a3 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: li a2, 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v24, v8 ; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vsrl.vx v8, v24, a1 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 @@ -2227,15 +2229,20 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8 -; RV32-NEXT: vsub.vv v24, v16, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v24, v8, v24 ; RV32-NEXT: vand.vv v8, v24, v0 ; RV32-NEXT: vsrl.vi v24, v24, 2 ; RV32-NEXT: vand.vv v24, v24, v0 @@ -2258,7 +2265,8 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -2133,32 +2133,47 @@ ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb -; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a1 +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a1, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB34_2: +; RV32-NEXT: li a1, 1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 56 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: li a1, 1 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vxor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -2176,27 +2191,27 @@ ; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 56 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -2207,6 +2222,12 @@ ; RV32-NEXT: addi a4, a4, 819 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 @@ -2227,12 +2248,6 @@ ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -2246,33 +2261,36 @@ ; RV32-NEXT: addi a4, a4, -241 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a3, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB34_2: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a0, vlenb @@ -2281,23 +2299,26 @@ ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vxor.vv v16, v16, v8, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -2309,50 +2330,31 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 @@ -2371,7 +2373,8 @@ ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2382,9 +2385,12 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 6 ; RV32-NEXT: add sp, sp, a0 @@ -2502,23 +2508,24 @@ ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v8, v16 -; RV32-NEXT: li a3, 1 -; RV32-NEXT: vsub.vx v8, v8, a3 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsub.vx v16, v8, a2 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 @@ -2531,7 +2538,7 @@ ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a4, 209715 ; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v16, a4 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 @@ -2542,7 +2549,7 @@ ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -2553,23 +2560,23 @@ ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, a0, -16 -; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -2577,7 +2584,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: vsub.vx v0, v0, a3 +; RV32-NEXT: vsub.vx v0, v0, a2 ; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: csrr a0, vlenb @@ -4778,32 +4785,47 @@ ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb -; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a1 +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a1, .LBB70_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB70_2: +; RV32-NEXT: li a1, 1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 56 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: li a1, 1 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vxor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -4821,27 +4843,27 @@ ; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 56 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -4852,6 +4874,12 @@ ; RV32-NEXT: addi a4, a4, 819 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 @@ -4872,12 +4900,6 @@ ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -4891,33 +4913,36 @@ ; RV32-NEXT: addi a4, a4, -241 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a3, .LBB70_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB70_2: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a0, vlenb @@ -4926,23 +4951,26 @@ ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vxor.vv v16, v16, v8, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -4954,50 +4982,31 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 @@ -5016,7 +5025,8 @@ ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -5027,9 +5037,12 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 6 ; RV32-NEXT: add sp, sp, a0 @@ -5147,23 +5160,24 @@ ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v8, v16 -; RV32-NEXT: li a3, 1 -; RV32-NEXT: vsub.vx v8, v8, a3 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsub.vx v16, v8, a2 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 @@ -5176,7 +5190,7 @@ ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a4, 209715 ; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v16, a4 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 @@ -5187,7 +5201,7 @@ ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -5198,23 +5212,23 @@ ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, a0, -16 -; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -5222,7 +5236,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: vsub.vx v0, v0, a3 +; RV32-NEXT: vsub.vx v0, v0, a2 ; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -548,20 +548,20 @@ ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.i v9, 0 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vsub.vv v10, v9, v8 -; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32F-NEXT: vsub.vv v9, v9, v8 +; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v11, v10 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v11, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v11, v10 +; LMULMAX2-RV32F-NEXT: vzext.vf2 v9, v8 ; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v10, v11, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32F-NEXT: vsub.vx v8, v9, a1 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -592,18 +592,18 @@ ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32D-NEXT: vmv.v.i v9, 0 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vsub.vv v10, v9, v8 -; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32D-NEXT: vsub.vv v9, v9, v8 +; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -633,18 +633,18 @@ ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8-RV32-NEXT: vmv.v.i v9, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vsub.vv v10, v9, v8 -; LMULMAX8-RV32-NEXT: vand.vv v10, v8, v10 +; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX8-RV32-NEXT: vsub.vv v9, v9, v8 +; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 ; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v10, v10 +; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 ; LMULMAX8-RV32-NEXT: fsrm a1 ; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v10, v10, a1 +; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; @@ -1232,20 +1232,20 @@ ; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vsub.vv v12, v10, v8 -; LMULMAX2-RV32F-NEXT: vand.vv v12, v8, v12 +; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32F-NEXT: vsub.vv v10, v10, v8 +; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v14, v12 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v12, v14, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v14, v12 +; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v8 ; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v12, v14, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32F-NEXT: vsub.vx v8, v10, a1 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -1276,18 +1276,18 @@ ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32D-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vsub.vv v12, v10, v8 -; LMULMAX2-RV32D-NEXT: vand.vv v12, v8, v12 +; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32D-NEXT: vsub.vv v10, v10, v8 +; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v12, v12 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v12, v12, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v12, v12, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -1317,18 +1317,18 @@ ; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX8-RV32-NEXT: vmv.v.i v10, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vsub.vv v12, v10, v8 -; LMULMAX8-RV32-NEXT: vand.vv v12, v8, v12 +; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX8-RV32-NEXT: vsub.vv v10, v10, v8 +; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v12, v12 +; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 ; LMULMAX8-RV32-NEXT: fsrm a1 ; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v12, v12, a1 +; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v12, v12, a1 -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -48,11 +48,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v2i64: @@ -154,10 +154,10 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v10, v8, a0 +; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v4i64: @@ -803,14 +803,14 @@ ; ; RV64M-LABEL: extractelt_udiv_v4i32: ; RV64M: # %bb.0: +; RV64M-NEXT: lui a0, 322639 +; RV64M-NEXT: addiw a0, a0, -945 +; RV64M-NEXT: slli a0, a0, 32 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64M-NEXT: vslidedown.vi v8, v8, 2 -; RV64M-NEXT: vmv.x.s a0, v8 -; RV64M-NEXT: slli a0, a0, 32 -; RV64M-NEXT: lui a1, 322639 -; RV64M-NEXT: addiw a1, a1, -945 +; RV64M-NEXT: vmv.x.s a1, v8 ; RV64M-NEXT: slli a1, a1, 32 -; RV64M-NEXT: mulhu a0, a0, a1 +; RV64M-NEXT: mulhu a0, a1, a0 ; RV64M-NEXT: srli a0, a0, 34 ; RV64M-NEXT: ret %bo = udiv <4 x i32> %x, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -235,12 +235,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v10, v8, v8 -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vfmax.vv v8, v11, v8 ; CHECK-NEXT: ret %c = fadd nnan <2 x half> %a, %a %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b) @@ -253,12 +253,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v10, v9, v9 -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v9, v8, v8 -; CHECK-NEXT: vmerge.vvm v11, v10, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v9, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v8, v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmax.vv v8, v11, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 ; CHECK-NEXT: ret %c = fadd nnan <2 x half> %b, %b %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll @@ -9,8 +9,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v9 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v8 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoVFADD_VV_M1_:%[0-9]+]]:vr = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFADD_VV_M1 [[DEF]], [[COPY1]], [[COPY]], 7, 2, 6 /* e64 */, 1 /* ta, mu */ + ; CHECK-NEXT: [[PseudoVFADD_VV_M1_:%[0-9]+]]:vr = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFADD_VV_M1 $noreg, [[COPY1]], [[COPY]], 7, 2, 6 /* e64 */, 1 /* ta, mu */, implicit $frm ; CHECK-NEXT: $v8 = COPY [[PseudoVFADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %1 = fadd fast <2 x double> %x, %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -235,12 +235,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v10, v8, v8 -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vfmin.vv v8, v11, v8 ; CHECK-NEXT: ret %c = fadd nnan <2 x half> %a, %a %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %b) @@ -253,12 +253,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v10, v9, v9 -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v9, v8, v8 -; CHECK-NEXT: vmerge.vvm v11, v10, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v9, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v8, v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmin.vv v8, v11, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 ; CHECK-NEXT: ret %c = fadd nnan <2 x half> %b, %b %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -160,8 +160,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x half> poison, half 0.0, i32 0 @@ -182,8 +182,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x float> poison, float 0.0, i32 0 @@ -204,8 +204,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse64.v v8, (a1) +; LMULMAX1-NEXT: vse64.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse64.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <4 x double> poison, double 0.0, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -77,8 +77,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -95,8 +95,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -114,8 +114,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -133,8 +133,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -151,13 +151,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, ptr %x @@ -172,13 +171,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, ptr %x @@ -663,12 +661,11 @@ define void @fp2si_v2f64_v2i32(ptr %x, ptr %y) { ; CHECK-LABEL: fp2si_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret @@ -682,12 +679,11 @@ define void @fp2ui_v2f64_v2i32(ptr %x, ptr %y) { ; CHECK-LABEL: fp2ui_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll @@ -94,25 +94,25 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_v32f32_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfwcvt.f.f.v v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -316,23 +316,23 @@ define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -316,23 +316,23 @@ define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -94,27 +94,27 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_v32f32_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v0 +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfncvt.f.f.w v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v12, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v24, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfncvt.f.f.w v24, v16, v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: ret %v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl) ret <32 x float> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -802,24 +802,24 @@ ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a0) +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: li a0, 63 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsll.vi v16, v8, 1, v0.t -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vand.vx v8, v24, a0, v0.t +; RV32-NEXT: vsrl.vv v16, v16, v8, v0.t +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vxor.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 63 ; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vv v8, v16, v8, v0.t -; RV32-NEXT: vand.vx v16, v24, a0, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vv v16, v24, v16, v0.t +; RV32-NEXT: vsll.vi v24, v24, 1, v0.t +; RV32-NEXT: vsll.vv v8, v24, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 @@ -835,20 +835,20 @@ ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v16, v8, 1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV64-NEXT: li a0, 63 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v24, a0, v0.t +; RV64-NEXT: vsrl.vv v16, v16, v8, v0.t ; RV64-NEXT: vnot.v v8, v24, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v24, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vv v16, v24, v16, v0.t +; RV64-NEXT: vsll.vi v24, v24, 1, v0.t +; RV64-NEXT: vsll.vv v8, v24, v8, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 @@ -869,25 +869,26 @@ ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a0) +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vmv8r.v v16, v8 +; RV32-NEXT: li a0, 63 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v8, -1 +; RV32-NEXT: vand.vx v8, v24, a0, v0.t +; RV32-NEXT: vsll.vv v8, v16, v8, v0.t +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v16, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vv v8, v16, v8, v0.t -; RV32-NEXT: vand.vx v16, v24, a0, v0.t +; RV32-NEXT: vxor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vx v16, v16, a0, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsll.vv v16, v24, v16, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 1, v0.t +; RV32-NEXT: vsrl.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -902,21 +903,22 @@ ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v16, 1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vmv8r.v v16, v8 ; RV64-NEXT: li a0, 63 -; RV64-NEXT: vnot.v v8, v24, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v24, a0, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v24, a0, v0.t +; RV64-NEXT: vsll.vv v8, v16, v8, v0.t +; RV64-NEXT: vnot.v v16, v24, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsll.vv v16, v24, v16, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v24, v24, 1, v0.t +; RV64-NEXT: vsrl.vv v16, v24, v16, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -22,9 +22,9 @@ ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -55,9 +55,9 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -90,9 +90,9 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -106,15 +106,14 @@ ; CHECK-LABEL: insertelt_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma -; CHECK-NEXT: vslideup.vi v12, v8, 1 +; CHECK-NEXT: vslideup.vi v8, v12, 1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i64 1 @@ -125,16 +124,15 @@ ; CHECK-LABEL: insertelt_idx_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma -; CHECK-NEXT: vslideup.vx v12, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v12, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -422,10 +422,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: li a0, 67 -; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: li a0, 67 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -470,14 +470,14 @@ ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v11, v8, 2 +; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vmv.v.x v0, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -488,38 +488,38 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; RV32-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8256 -; RV32-NEXT: addi a0, a0, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v11, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vrgather.vv v10, v8, v11 -; RV32-NEXT: vmv.v.i v8, 6 +; RV32-NEXT: vmv.v.i v10, 6 ; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; RV32-NEXT: vslideup.vi v11, v8, 5 +; RV32-NEXT: vslideup.vi v11, v10, 5 +; RV32-NEXT: lui a0, 8256 +; RV32-NEXT: addi a0, a0, 2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-NEXT: li a0, 98 ; RV32-NEXT: vmv.v.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8256 -; RV64-NEXT: addiw a0, a0, 2 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.x v11, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vrgather.vv v10, v8, v11 -; RV64-NEXT: vmv.v.i v8, 6 +; RV64-NEXT: vmv.v.i v10, 6 ; RV64-NEXT: vmv.v.i v11, 0 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; RV64-NEXT: vslideup.vi v11, v8, 5 +; RV64-NEXT: vslideup.vi v11, v10, 5 +; RV64-NEXT: lui a0, 8256 +; RV64-NEXT: addiw a0, a0, 2 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64-NEXT: li a0, 98 ; RV64-NEXT: vmv.v.x v0, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -339,8 +339,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse8.v v8, (a1) +; LMULMAX1-NEXT: vse8.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <32 x i8> poison, i8 0, i32 0 @@ -368,8 +368,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x i16> poison, i16 0, i32 0 @@ -397,8 +397,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x i32> poison, i32 0, i32 0 @@ -426,8 +426,8 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -435,8 +435,8 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: addi a0, a0, 16 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i64> poison, i64 0, i32 0 @@ -632,8 +632,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse8.v v8, (a1) +; LMULMAX1-NEXT: vse8.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <32 x i8> poison, i8 -1, i32 0 @@ -661,8 +661,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x i16> poison, i16 -1, i32 0 @@ -690,8 +690,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x i32> poison, i32 -1, i32 0 @@ -719,8 +719,8 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v8, -1 -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -728,8 +728,8 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmv.v.i v8, -1 -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: addi a0, a0, 16 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i64> poison, i64 -1, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1105,6 +1105,14 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: lui a1, 3 +; RV32-NEXT: addi a1, a1, -2044 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v0, a1 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: li a1, -128 +; RV32-NEXT: vmerge.vxm v10, v9, a1, v0 ; RV32-NEXT: lui a1, 1 ; RV32-NEXT: addi a2, a1, 32 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -1112,19 +1120,11 @@ ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: lui a2, %hi(.LCPI65_0) ; RV32-NEXT: addi a2, a2, %lo(.LCPI65_0) -; RV32-NEXT: vle8.v v9, (a2) -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vmerge.vim v11, v10, 1, v0 -; RV32-NEXT: vsrl.vv v11, v8, v11 -; RV32-NEXT: vmulhu.vv v9, v11, v9 +; RV32-NEXT: vle8.v v11, (a2) +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vsrl.vv v9, v8, v9 +; RV32-NEXT: vmulhu.vv v9, v9, v11 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a2, 3 -; RV32-NEXT: addi a2, a2, -2044 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: li a2, -128 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vxm v10, v10, a2, v0 ; RV32-NEXT: vmulhu.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: li a2, 513 @@ -1152,6 +1152,14 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: lui a1, 3 +; RV64-NEXT: addiw a1, a1, -2044 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: li a1, -128 +; RV64-NEXT: vmerge.vxm v10, v9, a1, v0 ; RV64-NEXT: lui a1, 1 ; RV64-NEXT: addiw a2, a1, 32 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -1159,19 +1167,11 @@ ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: lui a2, %hi(.LCPI65_0) ; RV64-NEXT: addi a2, a2, %lo(.LCPI65_0) -; RV64-NEXT: vle8.v v9, (a2) -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vmerge.vim v11, v10, 1, v0 -; RV64-NEXT: vsrl.vv v11, v8, v11 -; RV64-NEXT: vmulhu.vv v9, v11, v9 +; RV64-NEXT: vle8.v v11, (a2) +; RV64-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-NEXT: vsrl.vv v9, v8, v9 +; RV64-NEXT: vmulhu.vv v9, v9, v11 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a2, 3 -; RV64-NEXT: addiw a2, a2, -2044 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v0, a2 -; RV64-NEXT: li a2, -128 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vxm v10, v10, a2, v0 ; RV64-NEXT: vmulhu.vv v8, v8, v10 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: li a2, 513 @@ -1205,32 +1205,32 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vslideup.vi v11, v9, 6 +; CHECK-NEXT: vslideup.vi v9, v11, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI66_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) ; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: vsrl.vv v11, v8, v11 -; CHECK-NEXT: vmulhu.vv v11, v11, v12 -; CHECK-NEXT: vsub.vv v8, v8, v11 -; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vv v9, v8, v9 +; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vmulhu.vv v8, v8, v10 -; CHECK-NEXT: vadd.vv v8, v8, v11 +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: li a1, 33 ; CHECK-NEXT: vmv.v.x v0, a1 -; CHECK-NEXT: vmv.v.i v10, 3 -; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 +; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v10, v9, 6 +; CHECK-NEXT: vslideup.vi v9, v11, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x @@ -1272,18 +1272,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v10, v9, 2 ; CHECK-NEXT: lui a1, %hi(.LCPI68_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vmulhu.vv v9, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v11, v10, 2 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmulhu.vv v8, v8, v11 +; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vmv.v.i v9, 2 ; CHECK-NEXT: li a1, 1 @@ -1440,24 +1440,24 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vmv.v.i v9, -7 -; CHECK-NEXT: vmerge.vim v9, v9, 7, v0 -; CHECK-NEXT: vdiv.vv v9, v8, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 7 -; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: li a1, -14 -; CHECK-NEXT: vmadd.vx v11, a1, v10 +; CHECK-NEXT: vmadd.vx v10, a1, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vslidedown.vi v9, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vdiv.vv v8, v8, v11 +; CHECK-NEXT: vdiv.vv v9, v9, v10 +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -7 +; CHECK-NEXT: vmerge.vim v10, v10, 7, v0 +; CHECK-NEXT: vdiv.vv v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 4 +; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = sdiv <6 x i16> %a, @@ -1525,16 +1525,16 @@ ; RV32-NEXT: vrsub.vi v10, v10, 0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmadd.vv v10, v8, v9 -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsrl.vx v8, v10, a1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 1 -; RV32-NEXT: vmv.v.i v11, 0 +; RV32-NEXT: vmv.v.i v8, 1 +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV32-NEXT: vslideup.vi v11, v9, 2 +; RV32-NEXT: vslideup.vi v9, v8, 2 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsra.vv v9, v10, v11 -; RV32-NEXT: vadd.vv v8, v9, v8 +; RV32-NEXT: vsra.vv v8, v10, v9 +; RV32-NEXT: li a1, 63 +; RV32-NEXT: vsrl.vx v9, v10, a1 +; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -4955,6 +4955,13 @@ ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32-NEXT: lui a2, 163907 +; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV32-NEXT: li a2, -128 +; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v10, a2, v0 ; LMULMAX2-RV32-NEXT: lui a2, 66049 ; LMULMAX2-RV32-NEXT: addi a2, a2, 32 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -4962,20 +4969,13 @@ ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI181_0) ; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV32-NEXT: vle8.v v12, (a2) -; LMULMAX2-RV32-NEXT: vmerge.vim v14, v10, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v14, v8, v14 -; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v14, v12 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: lui a2, 163907 -; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, -128 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vle8.v v14, (a2) +; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v10, v8, v10 +; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV32-NEXT: lui a2, 8208 ; LMULMAX2-RV32-NEXT: addi a2, a2, 513 @@ -5005,6 +5005,13 @@ ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV64-NEXT: lui a2, 163907 +; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV64-NEXT: li a2, -128 +; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v10, a2, v0 ; LMULMAX2-RV64-NEXT: lui a2, 66049 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -5012,20 +5019,13 @@ ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI181_0) ; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV64-NEXT: vle8.v v12, (a2) -; LMULMAX2-RV64-NEXT: vmerge.vim v14, v10, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v14, v8, v14 -; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v14, v12 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: lui a2, 163907 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, -128 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vle8.v v14, (a2) +; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v10, v8, v10 +; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV64-NEXT: lui a2, 8208 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 @@ -5074,6 +5074,13 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) +; LMULMAX2-RV32-NEXT: li a1, 257 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 +; LMULMAX2-RV32-NEXT: lui a1, 1048568 +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v12, a1, v0 ; LMULMAX2-RV32-NEXT: lui a1, 4 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -5081,19 +5088,12 @@ ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0) ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV32-NEXT: vle16.v v12, (a1) -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v16, v14, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v16, v10, v16 -; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v16, v12 +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v12, v10, v12 +; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v12, v16 ; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 -; LMULMAX2-RV32-NEXT: lui a1, 1048568 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a1, v0 ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 ; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v12 ; LMULMAX2-RV32-NEXT: lui a1, 2 @@ -5113,6 +5113,13 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle16.v v10, (a0) +; LMULMAX2-RV64-NEXT: li a1, 257 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 +; LMULMAX2-RV64-NEXT: lui a1, 1048568 +; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v12, a1, v0 ; LMULMAX2-RV64-NEXT: lui a1, 4 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -5120,19 +5127,12 @@ ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV64-NEXT: vle16.v v12, (a1) -; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV64-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV64-NEXT: vmerge.vim v16, v14, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v16, v10, v16 -; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v16, v12 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v12, v10, v12 +; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v12, v16 ; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 -; LMULMAX2-RV64-NEXT: lui a1, 1048568 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v14, a1, v0 ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 ; LMULMAX2-RV64-NEXT: vadd.vv v10, v10, v12 ; LMULMAX2-RV64-NEXT: lui a1, 2 @@ -5173,18 +5173,18 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 68 ; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-NEXT: vmv.v.x v0, a1 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) +; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) +; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vmv.v.i v12, 0 ; LMULMAX2-NEXT: lui a1, 524288 ; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 136 @@ -5200,33 +5200,33 @@ ; LMULMAX1-RV32-LABEL: mulhu_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI183_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI183_0) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: vle32.v v10, (a0) -; LMULMAX1-RV32-NEXT: vmulhu.vv v11, v8, v9 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, 524288 -; LMULMAX1-RV32-NEXT: vmv.s.x v12, a2 -; LMULMAX1-RV32-NEXT: vmv.v.i v13, 0 +; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; LMULMAX1-RV32-NEXT: vslideup.vi v13, v12, 2 +; LMULMAX1-RV32-NEXT: vslideup.vi v11, v10, 2 +; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI183_0) +; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI183_0) ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vmv.v.i v11, 2 +; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) +; LMULMAX1-RV32-NEXT: vmulhu.vv v12, v9, v10 +; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vmv.v.i v12, 2 ; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vslide1down.vx v11, v11, a2 -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsub.vv v10, v10, v9 -; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v10, v13 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; LMULMAX1-RV32-NEXT: vslide1down.vx v12, v12, a2 +; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v8, v10 +; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v12 +; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhu_v8i32: @@ -5283,24 +5283,24 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) -; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: li a1, -1 ; LMULMAX2-RV64-NEXT: slli a1, a1, 63 -; LMULMAX2-RV64-NEXT: vmv.s.x v12, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; LMULMAX2-RV64-NEXT: vslideup.vi v14, v12, 2 +; LMULMAX2-RV64-NEXT: vslideup.vi v12, v10, 2 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) +; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_1) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_1) -; LMULMAX2-RV64-NEXT: vle64.v v12, (a1) -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v14 +; LMULMAX2-RV64-NEXT: vle64.v v14, (a1) +; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v14 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5330,46 +5330,46 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI184_0) -; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) -; LMULMAX1-RV64-NEXT: vlse64.v v9, (a1), zero -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI184_1) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI184_1)(a1) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0 +; LMULMAX1-RV64-NEXT: li a2, -1 +; LMULMAX1-RV64-NEXT: slli a2, a2, 63 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 +; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_0) +; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI184_0) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsub.vv v10, v10, v9 -; LMULMAX1-RV64-NEXT: vmv.v.i v11, 0 -; LMULMAX1-RV64-NEXT: li a1, -1 -; LMULMAX1-RV64-NEXT: slli a1, a1, 63 +; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI184_1)(a2) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a1 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 +; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 +; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vid.v v10 ; LMULMAX1-RV64-NEXT: vadd.vi v11, v10, 2 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: lui a1, 838861 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a3, a1, 32 -; LMULMAX1-RV64-NEXT: add a1, a1, a3 -; LMULMAX1-RV64-NEXT: vmv.v.x v11, a1 -; LMULMAX1-RV64-NEXT: lui a1, 699051 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a3, a1, 32 -; LMULMAX1-RV64-NEXT: add a1, a1, a3 +; LMULMAX1-RV64-NEXT: lui a2, 838861 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 +; LMULMAX1-RV64-NEXT: slli a3, a2, 32 +; LMULMAX1-RV64-NEXT: add a2, a2, a3 +; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV64-NEXT: lui a2, 699051 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 +; LMULMAX1-RV64-NEXT: slli a3, a2, 32 +; LMULMAX1-RV64-NEXT: add a2, a2, a3 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a1 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v10, 1 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, ptr %x %b = udiv <4 x i64> %a, @@ -5383,18 +5383,18 @@ ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a2, -123 -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: lui a2, 304453 ; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, 57 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV32-NEXT: li a1, -123 +; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32-NEXT: li a1, 57 +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -5404,18 +5404,18 @@ ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a2, -123 -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a2 +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: lui a2, 304453 ; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, 57 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV64-NEXT: li a1, -123 +; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV64-NEXT: li a1, 57 +; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -5658,16 +5658,16 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: vmv.v.i v10, -1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.i v0, 5 +; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a1, 349525 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32 ; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI188_0) ; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v10, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 ; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a2, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll @@ -7,13 +7,12 @@ define void @vector_interleave_store_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b, ptr %p) { ; CHECK-LABEL: vector_interleave_store_v32i1_v16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -16,10 +16,9 @@ ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v9, v8, v8 ; CHECK-NEXT: vrgather.vv v8, v10, v9 +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v10, 4 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vrgather.vi v8, v12, 0, v0.t ; CHECK-NEXT: vadd.vi v11, v9, 1 @@ -656,15 +655,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 92 +; RV64-NEXT: li a3, 90 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdc, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 92 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xda, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 90 * vlenb ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: addi a2, a1, 256 ; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 68 +; RV64-NEXT: li a3, 57 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -672,55 +671,62 @@ ; RV64-NEXT: addi a2, a1, 128 ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 84 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 6 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 73 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgather.vi v8, v16, 4 ; RV64-NEXT: li a1, 128 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.x v0, a1 +; RV64-NEXT: vmv.v.x v1, a1 +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v16, 8 +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t ; RV64-NEXT: vmv.v.v v4, v8 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: li a1, 6 ; RV64-NEXT: vid.v v8 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: vmul.vx v16, v8, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 81 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 56 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 60 +; RV64-NEXT: li a3, 73 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vv v16, v24, v8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 76 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v8, -16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v16 +; RV64-NEXT: vadd.vi v8, v16, -16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 44 +; RV64-NEXT: li a3, 41 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -728,123 +734,120 @@ ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 25 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v16, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v16 +; RV64-NEXT: vmv.v.v v4, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 36 +; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v0, 5 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v16, v8, 5 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v24, 3, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v16, v8, 3, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v8, 1 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v0, v16, 1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v0 -; RV64-NEXT: vadd.vi v24, v8, -15 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v0 +; RV64-NEXT: vadd.vi v8, v16, -15 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 25 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v16, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v20, v16 +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vi v8, v24, 2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -853,396 +856,357 @@ ; RV64-NEXT: li a1, 24 ; RV64-NEXT: vadd.vi v8, v24, -14 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v0, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv.v.x v2, a1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.i v12, 6 +; RV64-NEXT: vmv.v.i v8, 6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v8, zero -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vslideup.vi v12, v8, 5 +; RV64-NEXT: vmv.s.x v4, zero +; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: vslideup.vi v8, v4, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v20, v0, v12 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v12, v24, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl1r.v v1, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v8, 4, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v12, v24, 4, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v16 +; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 25 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v8, 3 +; RV64-NEXT: vadd.vi v16, v8, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v0 -; RV64-NEXT: vmv.v.v v24, v16 -; RV64-NEXT: vadd.vi v16, v8, -13 +; RV64-NEXT: vrgather.vv v8, v24, v16 +; RV64-NEXT: vmv.v.v v24, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v8, v8, -13 +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v8, v16, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: li a1, 1 -; RV64-NEXT: vmv.v.i v20, 7 +; RV64-NEXT: vmv.v.i v12, 7 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 20 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v20, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v16, a1 ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vslideup.vi v20, v8, 5 +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vslideup.vi v12, v16, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v20 +; RV64-NEXT: vrgather.vv v16, v24, v12 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v16, v8, 5, v0.t +; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v24, 5, v0.t -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v16, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v8, v8, 4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v20, v8, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: li a1, 192 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: li a1, 28 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 68 +; RV64-NEXT: li a3, 81 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v24, 2 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v16, v8, -12 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v20, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v24, 4 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v0 -; RV64-NEXT: li a1, 28 -; RV64-NEXT: vadd.vi v16, v24, -12 -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v24, v4, 6 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: li a1, 192 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 57 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v28, v8, 2 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv4r.v v12, v28 +; RV64-NEXT: vrgather.vv v12, v8, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v12, v8 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v4, v8, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v24, v0, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 3 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vadd.vi v24, v0, -11 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v4, v0.t -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v16, 5 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v0 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 -; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v24, v24, -11 +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v24, v8, 6 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v12, v8, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-NEXT: vrgather.vv v12, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v12, v8 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 36 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v16, (a0) +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: addi a1, a0, 320 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 40 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 5 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 24 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: li a3, 25 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: addi a1, a0, 64 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 41 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: addi a0, a0, 64 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 92 +; RV64-NEXT: li a1, 90 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -361,9 +361,9 @@ ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB6_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: ret %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) %ev = sext <2 x i8> %v to <2 x i64> @@ -422,11 +422,11 @@ ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB7_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a0, v9 -; RV64ZVE32F-NEXT: andi a1, a0, 255 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: andi a0, a0, 255 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: andi a1, a1, 255 ; RV64ZVE32F-NEXT: ret %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) %ev = zext <2 x i8> %v to <2 x i64> @@ -1079,9 +1079,9 @@ ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB17_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: ret %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) %ev = sext <2 x i16> %v to <2 x i64> @@ -2145,9 +2145,9 @@ ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: ret %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) %ev = sext <2 x i32> %v to <2 x i64> @@ -2204,13 +2204,13 @@ ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a0, v9 -; RV64ZVE32F-NEXT: slli a0, a0, 32 -; RV64ZVE32F-NEXT: srli a1, a0, 32 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 32 ; RV64ZVE32F-NEXT: srli a0, a0, 32 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 32 +; RV64ZVE32F-NEXT: srli a1, a1, 32 ; RV64ZVE32F-NEXT: ret %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) %ev = zext <2 x i32> %v to <2 x i64> @@ -12355,26 +12355,24 @@ ; ; RV64V-LABEL: mgather_baseidx_v32i8: ; RV64V: # %bb.0: -; RV64V-NEXT: vmv1r.v v12, v0 -; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v14, v8, 16 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v14 +; RV64V-NEXT: vsext.vf8 v16, v8 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64V-NEXT: vmv1r.v v12, v10 +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v14, v10, 16 +; RV64V-NEXT: vslidedown.vi v10, v10, 16 +; RV64V-NEXT: vslidedown.vi v8, v8, 16 +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v8 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64V-NEXT: vslidedown.vi v0, v0, 2 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64V-NEXT: vluxei64.v v14, (a0), v16, v0.t -; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v8 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64V-NEXT: vmv1r.v v0, v12 ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64V-NEXT: vslideup.vi v10, v14, 16 -; RV64V-NEXT: vmv.v.v v8, v10 +; RV64V-NEXT: vslideup.vi v12, v10, 16 +; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_baseidx_v32i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -10850,11 +10850,10 @@ ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vslidedown.vi v10, v10, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -1793,20 +1793,20 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: lui a2, %hi(.LCPI72_0) -; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: lui a2, %hi(.LCPI72_0) +; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV32-NEXT: vle32.v v16, (a2) ; RV32-NEXT: mv a2, a0 -; RV32-NEXT: vmsltu.vx v12, v16, a1 -; RV32-NEXT: vid.v v16 +; RV32-NEXT: vid.v v24 +; RV32-NEXT: vmsltu.vx v12, v24, a1 ; RV32-NEXT: vmsltu.vx v13, v16, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v13, v12, 4 +; RV32-NEXT: vslideup.vi v12, v13, 4 ; RV32-NEXT: li a0, 64 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-NEXT: vmand.mm v0, v13, v0 +; RV32-NEXT: vmand.mm v0, v12, v0 ; RV32-NEXT: vmv.v.i v12, 1 ; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: vslidedown.vx v12, v8, a3 @@ -1836,20 +1836,20 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: lui a2, %hi(.LCPI72_0) -; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: lui a2, %hi(.LCPI72_0) +; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV64-NEXT: vle32.v v16, (a2) ; RV64-NEXT: mv a2, a0 -; RV64-NEXT: vmsltu.vx v12, v16, a1 -; RV64-NEXT: vid.v v16 +; RV64-NEXT: vid.v v24 +; RV64-NEXT: vmsltu.vx v12, v24, a1 ; RV64-NEXT: vmsltu.vx v13, v16, a1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v13, v12, 4 +; RV64-NEXT: vslideup.vi v12, v13, 4 ; RV64-NEXT: li a0, 64 ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-NEXT: vmand.mm v0, v13, v0 +; RV64-NEXT: vmand.mm v0, v12, v0 ; RV64-NEXT: vmv.v.i v12, 1 ; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-NEXT: vslidedown.vx v12, v8, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -945,10 +945,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_add_v1i64: @@ -968,10 +968,10 @@ ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vsext.vf2 v9, v8 -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v8, v9, a0 ; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: vwreduce_add_v1i64: @@ -993,10 +993,10 @@ ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v8, v9, a0 ; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: vwreduce_uadd_v1i64: @@ -2130,10 +2130,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_and_v1i64: @@ -2155,11 +2155,11 @@ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredand.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_and_v2i64: @@ -2715,10 +2715,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_or_v1i64: @@ -2740,11 +2740,11 @@ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredor.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_or_v2i64: @@ -3321,10 +3321,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_xor_v1i64: @@ -3918,10 +3918,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v1i64: @@ -3943,11 +3943,11 @@ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredmin.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v2i64: @@ -4503,10 +4503,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v1i64: @@ -4528,11 +4528,11 @@ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredmax.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v2i64: @@ -5088,10 +5088,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umin_v1i64: @@ -5113,11 +5113,11 @@ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredminu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umin_v2i64: @@ -5673,10 +5673,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umax_v1i64: @@ -5698,11 +5698,11 @@ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredmaxu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umax_v2i64: @@ -6408,10 +6408,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_mul_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -246,8 +246,8 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -262,8 +262,8 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -512,8 +512,8 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -528,8 +528,8 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -854,9 +854,9 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v12, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v8 +; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v12, v16 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -870,9 +870,9 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v12, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v8, v16 +; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v16, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -1123,9 +1123,9 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v12, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v8 +; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v12, v16 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -1139,9 +1139,9 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v12, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v8, v16 +; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v16, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll @@ -153,26 +153,26 @@ define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vsext.vf2 v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB12_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf2 v24, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsext.vf2 v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -181,22 +181,23 @@ define <32 x i64> @vsext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vsext.vf2 v16, v24 -; CHECK-NEXT: bltu a0, a1, .LBB13_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf2 v24, v8 -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -100,10 +100,10 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) { ; NO-ZVBB-LABEL: reverse_v32i1: ; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: li a0, 32 +; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; NO-ZVBB-NEXT: lui a0, %hi(.LCPI4_0) ; NO-ZVBB-NEXT: addi a0, a0, %lo(.LCPI4_0) -; NO-ZVBB-NEXT: li a1, 32 -; NO-ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; NO-ZVBB-NEXT: vle8.v v8, (a0) ; NO-ZVBB-NEXT: vmv.v.i v10, 0 ; NO-ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 @@ -123,10 +123,10 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) { ; NO-ZVBB-LABEL: reverse_v64i1: ; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: li a0, 64 +; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; NO-ZVBB-NEXT: lui a0, %hi(.LCPI5_0) ; NO-ZVBB-NEXT: addi a0, a0, %lo(.LCPI5_0) -; NO-ZVBB-NEXT: li a1, 64 -; NO-ZVBB-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; NO-ZVBB-NEXT: vle8.v v8, (a0) ; NO-ZVBB-NEXT: vmv.v.i v12, 0 ; NO-ZVBB-NEXT: vmerge.vim v12, v12, 1, v0 @@ -146,10 +146,10 @@ define <128 x i1> @reverse_v128i1(<128 x i1> %a) { ; CHECK-LABEL: reverse_v128i1: ; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -308,23 +308,23 @@ define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -470,40 +470,41 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_load_v33f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a5, 32 +; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: vmv1r.v v8, v0 -; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: bltu a4, a5, .LBB35_2 +; CHECK-RV32-NEXT: mv a5, a4 +; CHECK-RV32-NEXT: bltu a4, a3, .LBB35_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: li a3, 32 +; CHECK-RV32-NEXT: li a5, 32 ; CHECK-RV32-NEXT: .LBB35_2: -; CHECK-RV32-NEXT: mul a5, a3, a2 -; CHECK-RV32-NEXT: addi a6, a4, -32 -; CHECK-RV32-NEXT: sltu a4, a4, a6 -; CHECK-RV32-NEXT: addi a4, a4, -1 -; CHECK-RV32-NEXT: and a6, a4, a6 -; CHECK-RV32-NEXT: li a4, 16 -; CHECK-RV32-NEXT: add a5, a1, a5 -; CHECK-RV32-NEXT: bltu a6, a4, .LBB35_4 -; CHECK-RV32-NEXT: # %bb.3: +; CHECK-RV32-NEXT: addi a3, a5, -16 +; CHECK-RV32-NEXT: sltu a6, a5, a3 +; CHECK-RV32-NEXT: addi a7, a6, -1 ; CHECK-RV32-NEXT: li a6, 16 -; CHECK-RV32-NEXT: .LBB35_4: -; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV32-NEXT: addi a5, a3, -16 -; CHECK-RV32-NEXT: sltu a6, a3, a5 -; CHECK-RV32-NEXT: addi a6, a6, -1 -; CHECK-RV32-NEXT: and a5, a6, a5 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB35_6 -; CHECK-RV32-NEXT: # %bb.5: +; CHECK-RV32-NEXT: and a7, a7, a3 +; CHECK-RV32-NEXT: mv a3, a5 +; CHECK-RV32-NEXT: bltu a5, a6, .LBB35_4 +; CHECK-RV32-NEXT: # %bb.3: ; CHECK-RV32-NEXT: li a3, 16 -; CHECK-RV32-NEXT: .LBB35_6: -; CHECK-RV32-NEXT: mul a4, a3, a2 -; CHECK-RV32-NEXT: add a4, a1, a4 +; CHECK-RV32-NEXT: .LBB35_4: +; CHECK-RV32-NEXT: mul t0, a3, a2 +; CHECK-RV32-NEXT: add t0, a1, t0 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (t0), a2, v0.t +; CHECK-RV32-NEXT: mul a7, a5, a2 +; CHECK-RV32-NEXT: addi a5, a4, -32 +; CHECK-RV32-NEXT: sltu a4, a4, a5 +; CHECK-RV32-NEXT: addi a4, a4, -1 +; CHECK-RV32-NEXT: and a5, a4, a5 +; CHECK-RV32-NEXT: add a4, a1, a7 +; CHECK-RV32-NEXT: bltu a5, a6, .LBB35_6 +; CHECK-RV32-NEXT: # %bb.5: +; CHECK-RV32-NEXT: li a5, 16 +; CHECK-RV32-NEXT: .LBB35_6: +; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -513,48 +514,49 @@ ; CHECK-RV32-NEXT: vse64.v v8, (a0) ; CHECK-RV32-NEXT: addi a1, a0, 256 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-RV32-NEXT: vse64.v v16, (a1) +; CHECK-RV32-NEXT: vse64.v v24, (a1) ; CHECK-RV32-NEXT: addi a0, a0, 128 ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV32-NEXT: vse64.v v24, (a0) +; CHECK-RV32-NEXT: vse64.v v16, (a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_load_v33f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: li a5, 32 +; CHECK-RV64-NEXT: li a4, 32 ; CHECK-RV64-NEXT: vmv1r.v v8, v0 -; CHECK-RV64-NEXT: mv a4, a3 -; CHECK-RV64-NEXT: bltu a3, a5, .LBB35_2 +; CHECK-RV64-NEXT: mv a5, a3 +; CHECK-RV64-NEXT: bltu a3, a4, .LBB35_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: li a4, 32 +; CHECK-RV64-NEXT: li a5, 32 ; CHECK-RV64-NEXT: .LBB35_2: -; CHECK-RV64-NEXT: mul a5, a4, a2 -; CHECK-RV64-NEXT: addi a6, a3, -32 -; CHECK-RV64-NEXT: sltu a3, a3, a6 -; CHECK-RV64-NEXT: addi a3, a3, -1 -; CHECK-RV64-NEXT: and a6, a3, a6 -; CHECK-RV64-NEXT: li a3, 16 -; CHECK-RV64-NEXT: add a5, a1, a5 -; CHECK-RV64-NEXT: bltu a6, a3, .LBB35_4 -; CHECK-RV64-NEXT: # %bb.3: +; CHECK-RV64-NEXT: addi a4, a5, -16 +; CHECK-RV64-NEXT: sltu a6, a5, a4 +; CHECK-RV64-NEXT: addi a7, a6, -1 ; CHECK-RV64-NEXT: li a6, 16 -; CHECK-RV64-NEXT: .LBB35_4: -; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV64-NEXT: addi a5, a4, -16 -; CHECK-RV64-NEXT: sltu a6, a4, a5 -; CHECK-RV64-NEXT: addi a6, a6, -1 -; CHECK-RV64-NEXT: and a5, a6, a5 -; CHECK-RV64-NEXT: bltu a4, a3, .LBB35_6 -; CHECK-RV64-NEXT: # %bb.5: +; CHECK-RV64-NEXT: and a7, a7, a4 +; CHECK-RV64-NEXT: mv a4, a5 +; CHECK-RV64-NEXT: bltu a5, a6, .LBB35_4 +; CHECK-RV64-NEXT: # %bb.3: ; CHECK-RV64-NEXT: li a4, 16 -; CHECK-RV64-NEXT: .LBB35_6: -; CHECK-RV64-NEXT: mul a3, a4, a2 -; CHECK-RV64-NEXT: add a3, a1, a3 +; CHECK-RV64-NEXT: .LBB35_4: +; CHECK-RV64-NEXT: mul t0, a4, a2 +; CHECK-RV64-NEXT: add t0, a1, t0 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v16, (t0), a2, v0.t +; CHECK-RV64-NEXT: mul a7, a5, a2 +; CHECK-RV64-NEXT: addi a5, a3, -32 +; CHECK-RV64-NEXT: sltu a3, a3, a5 +; CHECK-RV64-NEXT: addi a3, a3, -1 +; CHECK-RV64-NEXT: and a5, a3, a5 +; CHECK-RV64-NEXT: add a3, a1, a7 +; CHECK-RV64-NEXT: bltu a5, a6, .LBB35_6 +; CHECK-RV64-NEXT: # %bb.5: +; CHECK-RV64-NEXT: li a5, 16 +; CHECK-RV64-NEXT: .LBB35_6: +; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma @@ -564,10 +566,10 @@ ; CHECK-RV64-NEXT: vse64.v v8, (a0) ; CHECK-RV64-NEXT: addi a1, a0, 256 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-RV64-NEXT: vse64.v v16, (a1) +; CHECK-RV64-NEXT: vse64.v v24, (a1) ; CHECK-RV64-NEXT: addi a0, a0, 128 ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV64-NEXT: vse64.v v24, (a0) +; CHECK-RV64-NEXT: vse64.v v16, (a0) ; CHECK-RV64-NEXT: ret %v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 %evl) ret <33 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -53,27 +53,27 @@ define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_v128i7_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v0 +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a1, a0, -64 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t +; CHECK-NEXT: vslidedown.vi v12, v0, 8 +; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a1, .LBB4_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: addi a2, a0, -64 +; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: li a0, 128 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vx v16, v24, a1 -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vslideup.vx v8, v24, a1 ; CHECK-NEXT: ret %v = call <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl) ret <128 x i7> %v @@ -227,31 +227,30 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 6 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v3, v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v2, v0, 4 +; CHECK-NEXT: vslidedown.vi v26, v0, 4 ; CHECK-NEXT: addi a2, a1, 512 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 48 ; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 @@ -278,7 +277,7 @@ ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -289,139 +288,164 @@ ; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: .LBB16_2: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v4, v2, 2 +; CHECK-NEXT: vslidedown.vi v28, v26, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5) +; CHECK-NEXT: vle64.v v8, (a5) +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vmv1r.v v0, v27 ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: li a6, 48 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 +; CHECK-NEXT: li a6, 56 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a7, a3, .LBB16_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a7, 64 ; CHECK-NEXT: .LBB16_4: +; CHECK-NEXT: addi a5, a1, 384 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a5, a7, -32 -; CHECK-NEXT: sltu a6, a7, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: addi a6, a5, -16 -; CHECK-NEXT: sltu t0, a5, a6 +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: li t0, 40 +; CHECK-NEXT: mul a6, a6, t0 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: addi a6, a7, -32 +; CHECK-NEXT: sltu t0, a7, a6 ; CHECK-NEXT: addi t0, t0, -1 ; CHECK-NEXT: and a6, t0, a6 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: addi t0, a6, -16 +; CHECK-NEXT: sltu t1, a6, t0 +; CHECK-NEXT: addi t1, t1, -1 +; CHECK-NEXT: and t0, t1, t0 +; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v28 +; CHECK-NEXT: addi t0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (t0) # Unknown-size Folded Reload ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: addi a6, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a5, a2, .LBB16_6 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a6, a2, .LBB16_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a5, 16 +; CHECK-NEXT: li a6, 16 ; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: addi a6, a1, 384 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v20, v3, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a5) ; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li t0, 40 -; CHECK-NEXT: mul a5, a5, t0 +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 48 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a3, .LBB16_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: li a4, 32 ; CHECK-NEXT: .LBB16_8: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v3, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a6) ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: addi a1, a4, -16 +; CHECK-NEXT: sltu a5, a4, a1 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a1, a5, a1 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: bltu a4, a2, .LBB16_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB16_10: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v2, v1, 2 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v3 ; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a4, -16 -; CHECK-NEXT: sltu a4, a4, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: bltu a7, a3, .LBB16_12 ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: li a7, 32 ; CHECK-NEXT: .LBB16_12: ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 +; CHECK-NEXT: li a4, 56 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 +; CHECK-NEXT: li a4, 56 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v24, 16 +; CHECK-NEXT: vslideup.vi v8, v16, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -432,7 +456,8 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -444,25 +469,25 @@ ; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 16 -; CHECK-NEXT: vse32.v v24, (a0) +; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: vse32.v v16, (a0) ; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 48 ; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 @@ -470,15 +495,14 @@ ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: addi a0, a0, 384 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 48 +; CHECK-NEXT: li a2, 56 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 56 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 6 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -491,27 +515,27 @@ define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_v32i32_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v0 +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB17_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v12, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB17_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: ret %v = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> %a, <32 x i1> %m, i32 %vl) ret <32 x i32> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -308,23 +308,23 @@ define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1528,47 +1528,47 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vadd_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB108_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: li a1, 16 -; RV64-NEXT: vadd.vi v16, v16, -1, v0.t -; RV64-NEXT: bltu a0, a1, .LBB108_2 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB108_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB108_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v8, v8, -1, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t +; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer @@ -1649,17 +1649,16 @@ define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-LABEL: vadd_vx_v32i64_evl27: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64_evl27: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -324,46 +324,37 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfsgnj.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -321,23 +321,23 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfabs.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll @@ -3227,10 +3227,10 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vv v16, v8, v8 +; CHECK-NEXT: vmfle.vf v8, v12, fa0 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v16 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -657,93 +657,75 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: addi a1, a2, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v24, (a2) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: bltu a4, a1, .LBB50_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a0, .LBB50_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -324,46 +324,37 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t +; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -324,46 +324,37 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t +; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -657,93 +657,75 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: addi a1, a2, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v24, (a2) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: bltu a4, a1, .LBB50_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a0, .LBB50_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -321,23 +321,23 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfneg.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfneg.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfneg.v v8, v8, v0.t +; CHECK-NEXT: vfneg.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -321,23 +321,23 @@ define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsqrt.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfsqrt.v v8, v8, v0.t +; CHECK-NEXT: vfsqrt.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -1091,48 +1091,48 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmax_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmax.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vmax.vv v8, v8, v24, v0.t +; RV32-NEXT: vmax.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmax_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a1 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vmax.vx v16, v16, a1, v0.t +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vmax.vx v8, v8, a1, v0.t +; RV64-NEXT: vmax.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -1090,48 +1090,48 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmaxu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t +; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmaxu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a1 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vmaxu.vx v16, v16, a1, v0.t +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vmaxu.vx v8, v8, a1, v0.t +; RV64-NEXT: vmaxu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -1091,48 +1091,48 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmin_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmin.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vmin.vv v8, v8, v24, v0.t +; RV32-NEXT: vmin.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmin_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a1 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vmin.vx v16, v16, a1, v0.t +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vmin.vx v8, v8, a1, v0.t +; RV64-NEXT: vmin.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -1090,48 +1090,48 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vminu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vminu.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vminu.vv v8, v8, v24, v0.t +; RV32-NEXT: vminu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vminu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a1 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vminu.vx v16, v16, a1, v0.t +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vminu.vx v8, v8, a1, v0.t +; RV64-NEXT: vminu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -285,32 +285,32 @@ ; ; RV64-LABEL: vpgather_baseidx_v32i8: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v0 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB13_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB13_2: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma -; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB13_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB13_2: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-NEXT: vslideup.vi v8, v12, 16 +; RV64-NEXT: vslideup.vi v10, v8, 16 +; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs %v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -1890,47 +1890,45 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: li a2, 16 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB86_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB86_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t ; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v8, 16 +; RV32-NEXT: vslidedown.vi v8, v8, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (zero), v24, v0.t -; RV32-NEXT: li a1, 16 -; RV32-NEXT: bltu a0, a1, .LBB86_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB86_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v24 +; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v24 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB86_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB86_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t ; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t -; RV64-NEXT: li a1, 16 -; RV64-NEXT: bltu a0, a1, .LBB86_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB86_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t +; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t ; RV64-NEXT: ret %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) ret <32 x double> %v @@ -1951,12 +1949,12 @@ ; RV32-NEXT: .LBB87_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -1965,31 +1963,29 @@ ; ; RV64-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vslidedown.vi v10, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB87_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB87_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB87_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB87_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i8> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2011,12 +2007,12 @@ ; RV32-NEXT: .LBB88_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2025,30 +2021,30 @@ ; ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB88_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB88_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB88_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB88_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext <32 x i8> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2071,12 +2067,12 @@ ; RV32-NEXT: .LBB89_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2085,30 +2081,30 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 ; RV64-NEXT: vzext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vzext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB89_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB89_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB89_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB89_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i8> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2131,12 +2127,12 @@ ; RV32-NEXT: .LBB90_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2145,31 +2141,29 @@ ; ; RV64-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsext.vf4 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB90_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB90_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB90_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB90_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i16> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2191,12 +2185,12 @@ ; RV32-NEXT: .LBB91_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2205,30 +2199,30 @@ ; ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v0, v16 ; RV64-NEXT: vsext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v24, v16, 3 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v12, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB91_2 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB91_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB91_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext <32 x i16> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2251,12 +2245,12 @@ ; RV32-NEXT: .LBB92_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2265,30 +2259,30 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vzext.vf4 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v0, v16 ; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v24, v16, 3 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v12, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB92_2 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB92_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB92_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i16> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2310,12 +2304,12 @@ ; RV32-NEXT: .LBB93_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2324,45 +2318,29 @@ ; ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB93_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB93_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB93_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB93_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2383,12 +2361,12 @@ ; RV32-NEXT: .LBB94_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2397,30 +2375,30 @@ ; ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v0, v16 ; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB94_2 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB94_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB94_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2442,12 +2420,12 @@ ; RV32-NEXT: .LBB95_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2456,30 +2434,30 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vzext.vf2 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v0, v16 ; RV64-NEXT: vzext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB95_2 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB95_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB95_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2519,26 +2497,25 @@ ; ; RV64-LABEL: vpgather_baseidx_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB96_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB96_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB96_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB96_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -381,24 +381,23 @@ define <32 x double> @vpload_v32f64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB31_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: .LBB31_2: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: addi a2, a1, -16 -; CHECK-NEXT: sltu a3, a1, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a3, a0, 128 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a3), v0.t -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: bltu a1, a2, .LBB31_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB31_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <32 x double> @llvm.vp.load.v32f64.p0(ptr %ptr, <32 x i1> %m, i32 %evl) ret <32 x double> %load @@ -422,9 +421,9 @@ ; CHECK-NEXT: sltu a5, a3, a4 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: addi a5, a1, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: addi a5, a1, 128 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a5), v0.t ; CHECK-NEXT: addi a4, a2, -32 @@ -436,9 +435,9 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t ; CHECK-NEXT: bltu a3, a2, .LBB32_6 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1065,41 +1065,41 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: bltu a2, a0, .LBB79_2 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB79_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB79_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -1112,23 +1112,22 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vf_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB80_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 -; CHECK-NEXT: bltu a0, a1, .LBB80_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <32 x double> poison, double %a, i32 0 %va = shufflevector <32 x double> %elt.head, <32 x double> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1701,12 +1701,12 @@ ; RV32-NEXT: .LBB79_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a0, a1, -16 ; RV32-NEXT: sltu a1, a1, a0 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1769,12 +1769,12 @@ ; RV32-NEXT: .LBB80_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -1854,12 +1854,12 @@ ; RV32-NEXT: .LBB81_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -1941,12 +1941,12 @@ ; RV32-NEXT: .LBB82_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -297,9 +297,9 @@ ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll @@ -1832,18 +1832,19 @@ define <2 x i64> @vror_vi_v2i64(<2 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v2i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v9, a0 +; CHECK-RV32-NEXT: vand.vi v9, v9, 1 +; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 ; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v9, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v9, v9, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v8, v9 +; CHECK-RV32-NEXT: vsub.vx v10, v10, a1 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v9, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v2i64: @@ -1867,18 +1868,19 @@ define <2 x i64> @vror_vi_rotl_v2i64(<2 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v2i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v9, a0 +; CHECK-RV32-NEXT: vand.vi v9, v9, 1 +; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 ; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v9, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v9, v9, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v8, v9 +; CHECK-RV32-NEXT: vsub.vx v10, v10, a1 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v9, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v2i64: @@ -2002,18 +2004,19 @@ define <4 x i64> @vror_vi_v4i64(<4 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v4i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 ; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v12, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: vsub.vx v12, v12, a1 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v4i64: @@ -2037,18 +2040,19 @@ define <4 x i64> @vror_vi_rotl_v4i64(<4 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v4i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 ; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v12, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: vsub.vx v12, v12, a1 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v4i64: @@ -2172,18 +2176,19 @@ define <8 x i64> @vror_vi_v8i64(<8 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 ; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v12, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v16, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV32-NEXT: vmv.v.x v16, a0 -; CHECK-RV32-NEXT: vand.vi v16, v16, 1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v8, v12 +; CHECK-RV32-NEXT: vsub.vx v16, v16, a1 +; CHECK-RV32-NEXT: vand.vx v16, v16, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v8i64: @@ -2207,18 +2212,19 @@ define <8 x i64> @vror_vi_rotl_v8i64(<8 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 ; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v12, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v16, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV32-NEXT: vmv.v.x v16, a0 -; CHECK-RV32-NEXT: vand.vi v16, v16, 1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v8, v12 +; CHECK-RV32-NEXT: vsub.vx v16, v16, a1 +; CHECK-RV32-NEXT: vand.vx v16, v16, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -403,46 +403,35 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: bltu a2, a0, .LBB25_2 +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB25_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -453,42 +442,15 @@ define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) { ; CHECK-LABEL: select_evl_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll @@ -153,26 +153,26 @@ define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vzext.vf2 v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB12_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vzext.vf2 v24, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vzext.vf2 v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -181,22 +181,23 @@ define <32 x i64> @vzext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vzext.vf2 v16, v24 -; CHECK-NEXT: bltu a0, a1, .LBB13_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vzext.vf2 v24, v8 -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -456,9 +456,9 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -478,22 +478,36 @@ ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 @@ -503,7 +517,7 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -614,9 +628,9 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -636,22 +650,36 @@ ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -660,7 +688,7 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -781,9 +809,9 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -803,22 +831,36 @@ ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -828,7 +870,7 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2209,59 +2251,58 @@ ; CHECK-V-NEXT: call __fixdfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB18_3 +; CHECK-V-NEXT: beqz a1, .LBB18_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: bnez a1, .LBB18_4 +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: bnez s1, .LBB18_4 ; CHECK-V-NEXT: .LBB18_2: -; CHECK-V-NEXT: sltu a5, a0, a3 -; CHECK-V-NEXT: neg a6, a4 -; CHECK-V-NEXT: beqz a4, .LBB18_5 +; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: beqz a5, .LBB18_5 ; CHECK-V-NEXT: j .LBB18_6 ; CHECK-V-NEXT: .LBB18_3: -; CHECK-V-NEXT: sltu a4, s0, a3 -; CHECK-V-NEXT: beqz a1, .LBB18_2 +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: beqz s1, .LBB18_2 ; CHECK-V-NEXT: .LBB18_4: # %entry -; CHECK-V-NEXT: slti a5, a1, 0 -; CHECK-V-NEXT: neg a6, a4 -; CHECK-V-NEXT: bnez a4, .LBB18_6 +; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: bnez a5, .LBB18_6 ; CHECK-V-NEXT: .LBB18_5: # %entry ; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB18_6: # %entry -; CHECK-V-NEXT: and a6, a6, s1 -; CHECK-V-NEXT: neg a4, a5 -; CHECK-V-NEXT: bnez a5, .LBB18_8 +; CHECK-V-NEXT: neg a6, a5 +; CHECK-V-NEXT: neg a5, a4 +; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: bnez a4, .LBB18_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB18_8: # %entry -; CHECK-V-NEXT: and a4, a4, a1 +; CHECK-V-NEXT: and a4, a6, s1 ; CHECK-V-NEXT: slli a1, a2, 63 -; CHECK-V-NEXT: beq a6, a2, .LBB18_11 +; CHECK-V-NEXT: beq a5, a2, .LBB18_11 ; CHECK-V-NEXT: # %bb.9: # %entry -; CHECK-V-NEXT: slti a3, a6, 0 +; CHECK-V-NEXT: slti a3, a5, 0 ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB18_12 ; CHECK-V-NEXT: .LBB18_10: -; CHECK-V-NEXT: sltu a2, a1, a0 -; CHECK-V-NEXT: beqz a3, .LBB18_13 +; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: beqz a2, .LBB18_13 ; CHECK-V-NEXT: j .LBB18_14 ; CHECK-V-NEXT: .LBB18_11: -; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: sltu a3, a1, a0 ; CHECK-V-NEXT: beq a4, a2, .LBB18_10 ; CHECK-V-NEXT: .LBB18_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 -; CHECK-V-NEXT: bnez a3, .LBB18_14 +; CHECK-V-NEXT: bnez a2, .LBB18_14 ; CHECK-V-NEXT: .LBB18_13: # %entry ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB18_14: # %entry -; CHECK-V-NEXT: bnez a2, .LBB18_16 +; CHECK-V-NEXT: bnez a3, .LBB18_16 ; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB18_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2341,15 +2382,15 @@ ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt -; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2446,41 +2487,41 @@ ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixdfti@plt -; CHECK-V-NEXT: mv s1, a0 -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti@plt -; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: blez a1, .LBB20_2 +; CHECK-V-NEXT: mv a2, s1 +; CHECK-V-NEXT: blez s1, .LBB20_2 ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB20_2: # %entry -; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: slti a1, a1, 1 -; CHECK-V-NEXT: blez s0, .LBB20_4 +; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: blez a1, .LBB20_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 1 +; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB20_4: # %entry ; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: neg a1, a1 -; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: beqz a2, .LBB20_7 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: beqz a1, .LBB20_7 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a1, a2 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: bnez s0, .LBB20_8 +; CHECK-V-NEXT: sgtz a1, a1 +; CHECK-V-NEXT: and a3, a3, s0 +; CHECK-V-NEXT: bnez a2, .LBB20_8 ; CHECK-V-NEXT: .LBB20_6: ; CHECK-V-NEXT: snez a2, a3 ; CHECK-V-NEXT: j .LBB20_9 ; CHECK-V-NEXT: .LBB20_7: ; CHECK-V-NEXT: snez a1, a0 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: beqz s0, .LBB20_6 +; CHECK-V-NEXT: and a3, a3, s0 +; CHECK-V-NEXT: beqz a2, .LBB20_6 ; CHECK-V-NEXT: .LBB20_8: # %entry -; CHECK-V-NEXT: sgtz a2, s0 +; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: .LBB20_9: # %entry ; CHECK-V-NEXT: neg a2, a2 ; CHECK-V-NEXT: and a2, a2, a3 @@ -2617,59 +2658,58 @@ ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB21_3 +; CHECK-V-NEXT: beqz a1, .LBB21_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: bnez a1, .LBB21_4 +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: bnez s1, .LBB21_4 ; CHECK-V-NEXT: .LBB21_2: -; CHECK-V-NEXT: sltu a5, a0, a3 -; CHECK-V-NEXT: neg a6, a4 -; CHECK-V-NEXT: beqz a4, .LBB21_5 +; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: beqz a5, .LBB21_5 ; CHECK-V-NEXT: j .LBB21_6 ; CHECK-V-NEXT: .LBB21_3: -; CHECK-V-NEXT: sltu a4, s0, a3 -; CHECK-V-NEXT: beqz a1, .LBB21_2 +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: beqz s1, .LBB21_2 ; CHECK-V-NEXT: .LBB21_4: # %entry -; CHECK-V-NEXT: slti a5, a1, 0 -; CHECK-V-NEXT: neg a6, a4 -; CHECK-V-NEXT: bnez a4, .LBB21_6 +; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: bnez a5, .LBB21_6 ; CHECK-V-NEXT: .LBB21_5: # %entry ; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB21_6: # %entry -; CHECK-V-NEXT: and a6, a6, s1 -; CHECK-V-NEXT: neg a4, a5 -; CHECK-V-NEXT: bnez a5, .LBB21_8 +; CHECK-V-NEXT: neg a6, a5 +; CHECK-V-NEXT: neg a5, a4 +; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: bnez a4, .LBB21_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB21_8: # %entry -; CHECK-V-NEXT: and a4, a4, a1 +; CHECK-V-NEXT: and a4, a6, s1 ; CHECK-V-NEXT: slli a1, a2, 63 -; CHECK-V-NEXT: beq a6, a2, .LBB21_11 +; CHECK-V-NEXT: beq a5, a2, .LBB21_11 ; CHECK-V-NEXT: # %bb.9: # %entry -; CHECK-V-NEXT: slti a3, a6, 0 +; CHECK-V-NEXT: slti a3, a5, 0 ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB21_12 ; CHECK-V-NEXT: .LBB21_10: -; CHECK-V-NEXT: sltu a2, a1, a0 -; CHECK-V-NEXT: beqz a3, .LBB21_13 +; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: beqz a2, .LBB21_13 ; CHECK-V-NEXT: j .LBB21_14 ; CHECK-V-NEXT: .LBB21_11: -; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: sltu a3, a1, a0 ; CHECK-V-NEXT: beq a4, a2, .LBB21_10 ; CHECK-V-NEXT: .LBB21_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 -; CHECK-V-NEXT: bnez a3, .LBB21_14 +; CHECK-V-NEXT: bnez a2, .LBB21_14 ; CHECK-V-NEXT: .LBB21_13: # %entry ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB21_14: # %entry -; CHECK-V-NEXT: bnez a2, .LBB21_16 +; CHECK-V-NEXT: bnez a3, .LBB21_16 ; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB21_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2749,15 +2789,15 @@ ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2854,41 +2894,41 @@ ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: mv s1, a0 -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: blez a1, .LBB23_2 +; CHECK-V-NEXT: mv a2, s1 +; CHECK-V-NEXT: blez s1, .LBB23_2 ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB23_2: # %entry -; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: slti a1, a1, 1 -; CHECK-V-NEXT: blez s0, .LBB23_4 +; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: blez a1, .LBB23_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 1 +; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB23_4: # %entry ; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: neg a1, a1 -; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: beqz a2, .LBB23_7 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: beqz a1, .LBB23_7 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a1, a2 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: bnez s0, .LBB23_8 +; CHECK-V-NEXT: sgtz a1, a1 +; CHECK-V-NEXT: and a3, a3, s0 +; CHECK-V-NEXT: bnez a2, .LBB23_8 ; CHECK-V-NEXT: .LBB23_6: ; CHECK-V-NEXT: snez a2, a3 ; CHECK-V-NEXT: j .LBB23_9 ; CHECK-V-NEXT: .LBB23_7: ; CHECK-V-NEXT: snez a1, a0 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: beqz s0, .LBB23_6 +; CHECK-V-NEXT: and a3, a3, s0 +; CHECK-V-NEXT: beqz a2, .LBB23_6 ; CHECK-V-NEXT: .LBB23_8: # %entry -; CHECK-V-NEXT: sgtz a2, s0 +; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: .LBB23_9: # %entry ; CHECK-V-NEXT: neg a2, a2 ; CHECK-V-NEXT: and a2, a2, a3 @@ -3752,9 +3792,9 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -3774,22 +3814,36 @@ ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 @@ -3799,7 +3853,7 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -3908,9 +3962,9 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -3930,22 +3984,36 @@ ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -3954,7 +4022,7 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -4074,9 +4142,9 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -4096,22 +4164,36 @@ ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -4121,7 +4203,7 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -5486,62 +5568,61 @@ ; CHECK-V-NEXT: call __fixdfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB45_2 +; CHECK-V-NEXT: beqz a1, .LBB45_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: slti a4, a1, 0 ; CHECK-V-NEXT: beqz a4, .LBB45_3 ; CHECK-V-NEXT: j .LBB45_4 ; CHECK-V-NEXT: .LBB45_2: -; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: sltu a4, a0, a3 ; CHECK-V-NEXT: bnez a4, .LBB45_4 ; CHECK-V-NEXT: .LBB45_3: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB45_4: # %entry -; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: beqz a1, .LBB45_6 +; CHECK-V-NEXT: beqz s1, .LBB45_6 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: and a5, a5, s1 -; CHECK-V-NEXT: beqz a4, .LBB45_7 -; CHECK-V-NEXT: j .LBB45_8 +; CHECK-V-NEXT: slti a6, s1, 0 +; CHECK-V-NEXT: j .LBB45_7 ; CHECK-V-NEXT: .LBB45_6: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: and a5, a5, s1 -; CHECK-V-NEXT: bnez a4, .LBB45_8 +; CHECK-V-NEXT: sltu a6, s0, a3 ; CHECK-V-NEXT: .LBB45_7: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: .LBB45_8: # %entry +; CHECK-V-NEXT: neg a5, a6 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: bnez a6, .LBB45_9 +; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: .LBB45_9: # %entry ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: beq a5, a2, .LBB45_10 -; CHECK-V-NEXT: # %bb.9: # %entry +; CHECK-V-NEXT: beq a5, a2, .LBB45_11 +; CHECK-V-NEXT: # %bb.10: # %entry ; CHECK-V-NEXT: slti a5, a5, 0 ; CHECK-V-NEXT: xori a5, a5, 1 ; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: beqz a5, .LBB45_11 -; CHECK-V-NEXT: j .LBB45_12 -; CHECK-V-NEXT: .LBB45_10: +; CHECK-V-NEXT: beqz a5, .LBB45_12 +; CHECK-V-NEXT: j .LBB45_13 +; CHECK-V-NEXT: .LBB45_11: ; CHECK-V-NEXT: sltu a5, a3, s0 ; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: bnez a5, .LBB45_12 -; CHECK-V-NEXT: .LBB45_11: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: bnez a5, .LBB45_13 ; CHECK-V-NEXT: .LBB45_12: # %entry -; CHECK-V-NEXT: beq a1, a2, .LBB45_14 -; CHECK-V-NEXT: # %bb.13: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: .LBB45_13: # %entry +; CHECK-V-NEXT: beq a1, a2, .LBB45_15 +; CHECK-V-NEXT: # %bb.14: # %entry ; CHECK-V-NEXT: slti a1, a1, 0 ; CHECK-V-NEXT: xori a1, a1, 1 -; CHECK-V-NEXT: beqz a1, .LBB45_15 -; CHECK-V-NEXT: j .LBB45_16 -; CHECK-V-NEXT: .LBB45_14: +; CHECK-V-NEXT: beqz a1, .LBB45_16 +; CHECK-V-NEXT: j .LBB45_17 +; CHECK-V-NEXT: .LBB45_15: ; CHECK-V-NEXT: sltu a1, a3, a0 -; CHECK-V-NEXT: bnez a1, .LBB45_16 -; CHECK-V-NEXT: .LBB45_15: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: bnez a1, .LBB45_17 ; CHECK-V-NEXT: .LBB45_16: # %entry +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB45_17: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5608,26 +5689,26 @@ ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt -; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a1, s1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v8, a1 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5712,8 +5793,8 @@ ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixdfti@plt -; CHECK-V-NEXT: mv s1, a0 -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload @@ -5724,19 +5805,20 @@ ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB47_2: # %entry -; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a3, a3, s1 +; CHECK-V-NEXT: mv a3, s1 +; CHECK-V-NEXT: blez s1, .LBB47_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li a3, 1 +; CHECK-V-NEXT: .LBB47_4: # %entry ; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: blez s0, .LBB47_4 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 1 -; CHECK-V-NEXT: .LBB47_4: # %entry -; CHECK-V-NEXT: slti a1, s0, 0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a1, a1, a3 +; CHECK-V-NEXT: slti a1, s1, 1 +; CHECK-V-NEXT: neg a1, a1 +; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: slti a3, a3, 0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a1, a3, a1 ; CHECK-V-NEXT: slti a2, a2, 0 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a0, a2, a0 @@ -5872,62 +5954,61 @@ ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB48_2 +; CHECK-V-NEXT: beqz a1, .LBB48_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: slti a4, a1, 0 ; CHECK-V-NEXT: beqz a4, .LBB48_3 ; CHECK-V-NEXT: j .LBB48_4 ; CHECK-V-NEXT: .LBB48_2: -; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: sltu a4, a0, a3 ; CHECK-V-NEXT: bnez a4, .LBB48_4 ; CHECK-V-NEXT: .LBB48_3: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB48_4: # %entry -; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: beqz a1, .LBB48_6 +; CHECK-V-NEXT: beqz s1, .LBB48_6 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: and a5, a5, s1 -; CHECK-V-NEXT: beqz a4, .LBB48_7 -; CHECK-V-NEXT: j .LBB48_8 +; CHECK-V-NEXT: slti a6, s1, 0 +; CHECK-V-NEXT: j .LBB48_7 ; CHECK-V-NEXT: .LBB48_6: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: and a5, a5, s1 -; CHECK-V-NEXT: bnez a4, .LBB48_8 +; CHECK-V-NEXT: sltu a6, s0, a3 ; CHECK-V-NEXT: .LBB48_7: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: .LBB48_8: # %entry +; CHECK-V-NEXT: neg a5, a6 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: bnez a6, .LBB48_9 +; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: .LBB48_9: # %entry ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: beq a5, a2, .LBB48_10 -; CHECK-V-NEXT: # %bb.9: # %entry +; CHECK-V-NEXT: beq a5, a2, .LBB48_11 +; CHECK-V-NEXT: # %bb.10: # %entry ; CHECK-V-NEXT: slti a5, a5, 0 ; CHECK-V-NEXT: xori a5, a5, 1 ; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: beqz a5, .LBB48_11 -; CHECK-V-NEXT: j .LBB48_12 -; CHECK-V-NEXT: .LBB48_10: +; CHECK-V-NEXT: beqz a5, .LBB48_12 +; CHECK-V-NEXT: j .LBB48_13 +; CHECK-V-NEXT: .LBB48_11: ; CHECK-V-NEXT: sltu a5, a3, s0 ; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: bnez a5, .LBB48_12 -; CHECK-V-NEXT: .LBB48_11: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: bnez a5, .LBB48_13 ; CHECK-V-NEXT: .LBB48_12: # %entry -; CHECK-V-NEXT: beq a1, a2, .LBB48_14 -; CHECK-V-NEXT: # %bb.13: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: .LBB48_13: # %entry +; CHECK-V-NEXT: beq a1, a2, .LBB48_15 +; CHECK-V-NEXT: # %bb.14: # %entry ; CHECK-V-NEXT: slti a1, a1, 0 ; CHECK-V-NEXT: xori a1, a1, 1 -; CHECK-V-NEXT: beqz a1, .LBB48_15 -; CHECK-V-NEXT: j .LBB48_16 -; CHECK-V-NEXT: .LBB48_14: +; CHECK-V-NEXT: beqz a1, .LBB48_16 +; CHECK-V-NEXT: j .LBB48_17 +; CHECK-V-NEXT: .LBB48_15: ; CHECK-V-NEXT: sltu a1, a3, a0 -; CHECK-V-NEXT: bnez a1, .LBB48_16 -; CHECK-V-NEXT: .LBB48_15: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: bnez a1, .LBB48_17 ; CHECK-V-NEXT: .LBB48_16: # %entry +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB48_17: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5994,26 +6075,26 @@ ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a1, s1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v8, a1 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -6098,8 +6179,8 @@ ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: mv s1, a0 -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload @@ -6110,19 +6191,20 @@ ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB50_2: # %entry -; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a3, a3, s1 +; CHECK-V-NEXT: mv a3, s1 +; CHECK-V-NEXT: blez s1, .LBB50_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li a3, 1 +; CHECK-V-NEXT: .LBB50_4: # %entry ; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: blez s0, .LBB50_4 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 1 -; CHECK-V-NEXT: .LBB50_4: # %entry -; CHECK-V-NEXT: slti a1, s0, 0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a1, a1, a3 +; CHECK-V-NEXT: slti a1, s1, 1 +; CHECK-V-NEXT: neg a1, a1 +; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: slti a3, a3, 0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a1, a3, a1 ; CHECK-V-NEXT: slti a2, a2, 0 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a0, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -53,11 +53,10 @@ define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) @@ -67,11 +66,10 @@ define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) @@ -82,8 +80,8 @@ ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -95,8 +93,8 @@ ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -117,11 +115,10 @@ define @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) @@ -131,11 +128,10 @@ define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) @@ -145,11 +141,10 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) @@ -241,8 +236,8 @@ ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: ret @@ -254,8 +249,8 @@ ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -267,8 +262,8 @@ ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -304,11 +299,10 @@ ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -320,11 +314,10 @@ ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll @@ -53,11 +53,10 @@ define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) @@ -67,11 +66,10 @@ define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) @@ -82,8 +80,8 @@ ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -95,8 +93,8 @@ ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -117,11 +115,10 @@ define @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) @@ -131,11 +128,10 @@ define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) @@ -145,11 +141,10 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) @@ -259,8 +254,8 @@ ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: ret @@ -272,8 +267,8 @@ ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -285,8 +280,8 @@ ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -322,11 +317,10 @@ ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -338,11 +332,10 @@ ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -966,8 +966,7 @@ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -978,9 +977,6 @@ ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a0, a3 ; CHECK-NEXT: vl8re64.v v8, (a5) @@ -989,17 +985,22 @@ ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: srli a5, a1, 3 +; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: add a5, a2, a3 +; CHECK-NEXT: sub a3, a4, a1 +; CHECK-NEXT: sltu a6, a4, a3 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: add a3, a2, a3 -; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: and a6, a6, a3 +; CHECK-NEXT: li a3, 63 +; CHECK-NEXT: vl8re64.v v8, (a5) +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: mul a0, a0, a5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -1010,109 +1011,95 @@ ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: li a0, 63 -; CHECK-NEXT: vnot.v v16, v8, v0.t -; CHECK-NEXT: vand.vx v16, v16, a0, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vand.vx v16, v8, a3, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vx v8, v8, a3, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a1, .LBB46_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB46_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vand.vx v8, v8, a3, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vnot.v v16, v8, v0.t -; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v16, v16, a3, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 @@ -1122,14 +1109,14 @@ ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1156,8 +1143,7 @@ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1168,25 +1154,27 @@ ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a0, a3 ; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 +; CHECK-NEXT: li a6, 24 +; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: srli a5, a1, 3 +; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: add a5, a2, a3 +; CHECK-NEXT: sub a3, a4, a1 +; CHECK-NEXT: sltu a6, a4, a3 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: add a3, a2, a3 -; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: and a6, a6, a3 +; CHECK-NEXT: li a3, 63 +; CHECK-NEXT: vl8re64.v v8, (a5) +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 @@ -1200,107 +1188,97 @@ ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: li a0, 63 -; CHECK-NEXT: vnot.v v16, v8, v0.t -; CHECK-NEXT: vand.vx v16, v16, a0, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vand.vx v16, v8, a3, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vx v8, v8, a3, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsrl.vi v16, v16, 1, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsrl.vv v8, v16, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a1, .LBB47_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB47_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vand.vx v16, v8, a3, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vnot.v v16, v8, v0.t -; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v16, v16, a3, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/implicit-def-copy.ll b/llvm/test/CodeGen/RISCV/rvv/implicit-def-copy.ll --- a/llvm/test/CodeGen/RISCV/rvv/implicit-def-copy.ll +++ b/llvm/test/CodeGen/RISCV/rvv/implicit-def-copy.ll @@ -12,8 +12,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[DEF]], [[COPY2]], $v0, [[COPY]], 6 /* e64 */, 1 /* ta, mu */ :: (load unknown-size from %ir.ptr, align 64) + ; CHECK-NEXT: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK $noreg, [[COPY2]], $v0, [[COPY]], 6 /* e64 */, 1 /* ta, mu */ :: (load unknown-size from %ir.ptr, align 64) ; CHECK-NEXT: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]] ; CHECK-NEXT: PseudoRET implicit $v8m8 %load = call @llvm.vp.load.nxv8i64.p0(* %ptr, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -438,14 +438,14 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a1, a0, 3 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v9, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -28,9 +28,9 @@ define @insertelt_nxv1f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -62,9 +62,9 @@ define @insertelt_nxv2f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -96,9 +96,9 @@ define @insertelt_nxv4f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -232,9 +232,9 @@ define @insertelt_nxv1f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -266,9 +266,9 @@ define @insertelt_nxv2f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -402,9 +402,9 @@ define @insertelt_nxv1f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll @@ -25,9 +25,9 @@ ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -60,9 +60,9 @@ ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -95,9 +95,9 @@ ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -130,9 +130,9 @@ ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -26,10 +26,10 @@ define @insertelt_nxv1i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv1i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -60,10 +60,10 @@ define @insertelt_nxv2i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv2i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -94,10 +94,10 @@ define @insertelt_nxv4i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv4i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -128,10 +128,10 @@ define @insertelt_nxv8i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -264,10 +264,10 @@ define @insertelt_nxv1i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv1i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -298,10 +298,10 @@ define @insertelt_nxv2i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv2i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -332,10 +332,10 @@ define @insertelt_nxv4i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -468,10 +468,10 @@ define @insertelt_nxv1i32_idx( %v, i32 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv1i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -502,10 +502,10 @@ define @insertelt_nxv2i32_idx( %v, i32 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -26,10 +26,10 @@ define @insertelt_nxv1i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -60,10 +60,10 @@ define @insertelt_nxv2i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -94,10 +94,10 @@ define @insertelt_nxv4i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -128,10 +128,10 @@ define @insertelt_nxv8i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -264,10 +264,10 @@ define @insertelt_nxv1i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -298,10 +298,10 @@ define @insertelt_nxv2i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -332,10 +332,10 @@ define @insertelt_nxv4i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -468,10 +468,10 @@ define @insertelt_nxv1i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -502,10 +502,10 @@ define @insertelt_nxv2i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -169,19 +169,19 @@ define void @memset_32(ptr %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: memset_32: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: addi a2, a0, 16 ; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse8.v v8, (a1) +; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: memset_32: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: addi a2, a0, 16 ; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse8.v v8, (a1) +; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0) @@ -191,10 +191,10 @@ define void @memset_64(ptr %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: memset_64: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: addi a2, a0, 48 ; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse8.v v8, (a1) +; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: addi a1, a0, 32 ; RV32-BOTH-NEXT: vse8.v v8, (a1) ; RV32-BOTH-NEXT: addi a1, a0, 16 @@ -204,10 +204,10 @@ ; ; RV64-BOTH-LABEL: memset_64: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: addi a2, a0, 48 ; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse8.v v8, (a1) +; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: addi a1, a0, 32 ; RV64-BOTH-NEXT: vse8.v v8, (a1) ; RV64-BOTH-NEXT: addi a1, a0, 16 @@ -309,19 +309,19 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: aligned_memset_32: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: addi a2, a0, 16 ; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse8.v v8, (a1) +; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_32: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: addi a2, a0, 16 ; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse8.v v8, (a1) +; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0) @@ -331,10 +331,10 @@ define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: aligned_memset_64: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: addi a2, a0, 48 ; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse8.v v8, (a1) +; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: addi a1, a0, 32 ; RV32-BOTH-NEXT: vse8.v v8, (a1) ; RV32-BOTH-NEXT: addi a1, a0, 16 @@ -344,10 +344,10 @@ ; ; RV64-BOTH-LABEL: aligned_memset_64: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: addi a2, a0, 48 ; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse8.v v8, (a1) +; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: addi a1, a0, 32 ; RV64-BOTH-NEXT: vse8.v v8, (a1) ; RV64-BOTH-NEXT: addi a1, a0, 16 @@ -504,37 +504,37 @@ define void @bzero_32(ptr %a) nounwind { ; RV32-LABEL: bzero_32: ; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 16 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse8.v v8, (a1) +; RV32-NEXT: vse8.v v8, (a0) +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_32: ; RV64: # %bb.0: -; RV64-NEXT: addi a1, a0, 16 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vse8.v v8, (a1) +; RV64-NEXT: vse8.v v8, (a0) +; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_32: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: addi a1, a0, 16 ; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-FAST-NEXT: vmv.v.i v8, 0 -; RV32-FAST-NEXT: vse64.v v8, (a1) +; RV32-FAST-NEXT: vse64.v v8, (a0) +; RV32-FAST-NEXT: addi a0, a0, 16 ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_32: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: addi a1, a0, 16 ; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-FAST-NEXT: vmv.v.i v8, 0 -; RV64-FAST-NEXT: vse64.v v8, (a1) +; RV64-FAST-NEXT: vse64.v v8, (a0) +; RV64-FAST-NEXT: addi a0, a0, 16 ; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0) @@ -642,19 +642,19 @@ define void @aligned_bzero_32(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_32: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 16 ; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) +; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: addi a0, a0, 16 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_32: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 16 ; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) +; RV64-BOTH-NEXT: vse64.v v8, (a0) +; RV64-BOTH-NEXT: addi a0, a0, 16 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0) @@ -702,27 +702,27 @@ define void @aligned_bzero_96(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_96: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-BOTH-NEXT: vmv.v.i v8, 0 +; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: addi a1, a0, 80 ; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 64 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-BOTH-NEXT: vmv.v.i v8, 0 +; RV32-BOTH-NEXT: addi a0, a0, 64 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_96: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BOTH-NEXT: vmv.v.i v8, 0 +; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: addi a1, a0, 80 ; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 ; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 64 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-BOTH-NEXT: vmv.v.i v8, 0 +; RV64-BOTH-NEXT: addi a0, a0, 64 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0) @@ -750,19 +750,19 @@ define void @aligned_bzero_256(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_256: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 128 ; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) +; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: addi a0, a0, 128 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_256: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 128 ; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) +; RV64-BOTH-NEXT: vse64.v v8, (a0) +; RV64-BOTH-NEXT: addi a0, a0, 128 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 256, i1 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -2080,7 +2080,10 @@ ; ; RV64-LABEL: mgather_baseidx_nxv16i8: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma @@ -2089,11 +2092,6 @@ ; RV64-NEXT: vsext.vf8 v16, v9 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs @@ -2106,49 +2104,45 @@ define @mgather_baseidx_nxv32i8(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv32i8: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v16, v0 +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vsext.vf4 v16, v8 +; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v24, v10 -; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vluxei32.v v14, (a0), v24, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v24, v8 +; RV32-NEXT: vsext.vf4 v16, v10 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vmv1r.v v0, v16 -; RV32-NEXT: vluxei32.v v12, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v14, (a0), v16, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_nxv32i8: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v16, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 2 -; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v17, v0, a2 -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v10 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v17 -; RV64-NEXT: vluxei64.v v14, (a0), v24, v0.t -; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v16, a1 -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a2, a1, 3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v9 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: srli a1, a1, 2 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v0, v16, a1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v16 -; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v17, a1 +; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a2 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -16,11 +16,11 @@ ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 @@ -33,10 +33,10 @@ ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 2 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 @@ -48,10 +48,10 @@ ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 @@ -63,11 +63,11 @@ ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 @@ -80,10 +80,10 @@ ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 2 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 @@ -95,10 +95,10 @@ ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 @@ -114,11 +114,11 @@ ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 1 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 @@ -131,10 +131,10 @@ ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 1 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 @@ -146,10 +146,10 @@ ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 1 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 @@ -161,11 +161,11 @@ ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 @@ -178,10 +178,10 @@ ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 1 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 @@ -193,10 +193,10 @@ ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 1 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 @@ -209,86 +209,88 @@ define @reverse_nxv8i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv8i1: ; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v8 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v10 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v10, 0 -; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v10, v8 -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v11, 1 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv8i1: ; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV32-BITS-256-NEXT: vmv.v.i v8, 0 +; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: addi a0, a0, -1 -; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV32-BITS-256-NEXT: vid.v v8 -; RV32-BITS-256-NEXT: vrsub.vx v8, v8, a0 -; RV32-BITS-256-NEXT: vmv.v.i v9, 0 -; RV32-BITS-256-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-BITS-256-NEXT: vrgather.vv v10, v9, v8 +; RV32-BITS-256-NEXT: vid.v v9 +; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 +; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv8i1: ; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV32-BITS-512-NEXT: vmv.v.i v8, 0 +; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: addi a0, a0, -1 -; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV32-BITS-512-NEXT: vid.v v8 -; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 -; RV32-BITS-512-NEXT: vmv.v.i v9, 0 -; RV32-BITS-512-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-BITS-512-NEXT: vrgather.vv v10, v9, v8 +; RV32-BITS-512-NEXT: vid.v v9 +; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 +; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i1: ; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v8 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v10 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v10, 0 -; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v10, v8 -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v11, 1 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv8i1: ; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV64-BITS-256-NEXT: vmv.v.i v8, 0 +; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: addi a0, a0, -1 -; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV64-BITS-256-NEXT: vid.v v8 -; RV64-BITS-256-NEXT: vrsub.vx v8, v8, a0 -; RV64-BITS-256-NEXT: vmv.v.i v9, 0 -; RV64-BITS-256-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-BITS-256-NEXT: vrgather.vv v10, v9, v8 +; RV64-BITS-256-NEXT: vid.v v9 +; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 +; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv8i1: ; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV64-BITS-512-NEXT: vmv.v.i v8, 0 +; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: addi a0, a0, -1 -; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV64-BITS-512-NEXT: vid.v v8 -; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 -; RV64-BITS-512-NEXT: vmv.v.i v9, 0 -; RV64-BITS-512-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-BITS-512-NEXT: vrgather.vv v10, v9, v8 +; RV64-BITS-512-NEXT: vid.v v9 +; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 +; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-512-NEXT: ret @@ -302,11 +304,11 @@ ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v12 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v12 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 @@ -319,10 +321,10 @@ ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v10 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 1 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v10 ; RV32-BITS-256-NEXT: vrsub.vx v10, v10, a0 ; RV32-BITS-256-NEXT: vrgather.vv v12, v8, v10 ; RV32-BITS-256-NEXT: vand.vi v8, v12, 1 @@ -334,10 +336,10 @@ ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-512-NEXT: vid.v v10 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 1 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vid.v v10 ; RV32-BITS-512-NEXT: vrsub.vx v10, v10, a0 ; RV32-BITS-512-NEXT: vrgather.vv v12, v8, v10 ; RV32-BITS-512-NEXT: vand.vi v8, v12, 1 @@ -349,11 +351,11 @@ ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 @@ -366,10 +368,10 @@ ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 1 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v10 ; RV64-BITS-256-NEXT: vand.vi v8, v12, 1 @@ -381,10 +383,10 @@ ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 1 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v10 ; RV64-BITS-512-NEXT: vand.vi v8, v12, 1 @@ -400,11 +402,11 @@ ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 @@ -417,10 +419,10 @@ ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v12 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 2 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v12 ; RV32-BITS-256-NEXT: vrsub.vx v12, v12, a0 ; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v12 ; RV32-BITS-256-NEXT: vand.vi v8, v16, 1 @@ -432,10 +434,10 @@ ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-512-NEXT: vid.v v12 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vid.v v12 ; RV32-BITS-512-NEXT: vrsub.vx v12, v12, a0 ; RV32-BITS-512-NEXT: vrgather.vv v16, v8, v12 ; RV32-BITS-512-NEXT: vand.vi v8, v16, 1 @@ -447,11 +449,11 @@ ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 @@ -464,10 +466,10 @@ ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 2 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v12 ; RV64-BITS-256-NEXT: vand.vi v8, v16, 1 @@ -479,10 +481,10 @@ ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v12 ; RV64-BITS-512-NEXT: vand.vi v8, v16, 1 @@ -495,11 +497,11 @@ define @reverse_nxv64i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v8 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v8 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 @@ -517,10 +519,10 @@ ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 3 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: vrsub.vx v16, v16, a0 ; RV32-BITS-256-NEXT: vrgather.vv v24, v8, v16 ; RV32-BITS-256-NEXT: vand.vi v8, v24, 1 @@ -529,11 +531,11 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv64i1: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v8 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV32-BITS-512-NEXT: vid.v v8 ; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v16, 0 @@ -548,11 +550,11 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 @@ -570,10 +572,10 @@ ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 3 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vx v16, v16, a0 ; RV64-BITS-256-NEXT: vrgather.vv v24, v8, v16 ; RV64-BITS-256-NEXT: vand.vi v8, v24, 1 @@ -582,11 +584,11 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv64i1: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v16, 0 @@ -609,11 +611,11 @@ define @reverse_nxv1i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv1i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 3 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -622,11 +624,11 @@ ; ; RV32-BITS-256-LABEL: reverse_nxv1i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 3 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-256-NEXT: vmv1r.v v8, v9 @@ -634,11 +636,11 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv1i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 3 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-512-NEXT: vmv1r.v v8, v9 @@ -646,11 +648,11 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv1i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 3 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -659,11 +661,11 @@ ; ; RV64-BITS-256-LABEL: reverse_nxv1i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 3 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-256-NEXT: vmv1r.v v8, v9 @@ -671,11 +673,11 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv1i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 3 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-512-NEXT: vmv1r.v v8, v9 @@ -687,11 +689,11 @@ define @reverse_nxv2i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv2i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -700,11 +702,11 @@ ; ; RV32-BITS-256-LABEL: reverse_nxv2i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 2 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-256-NEXT: vmv1r.v v8, v9 @@ -712,11 +714,11 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv2i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-512-NEXT: vmv1r.v v8, v9 @@ -724,11 +726,11 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv2i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -737,11 +739,11 @@ ; ; RV64-BITS-256-LABEL: reverse_nxv2i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 2 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-256-NEXT: vmv1r.v v8, v9 @@ -749,11 +751,11 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv2i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-512-NEXT: vmv1r.v v8, v9 @@ -765,11 +767,11 @@ define @reverse_nxv4i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv4i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 1 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -778,11 +780,11 @@ ; ; RV32-BITS-256-LABEL: reverse_nxv4i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 1 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-256-NEXT: vmv1r.v v8, v9 @@ -790,11 +792,11 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv4i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 1 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-512-NEXT: vmv1r.v v8, v9 @@ -802,11 +804,11 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv4i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -815,11 +817,11 @@ ; ; RV64-BITS-256-LABEL: reverse_nxv4i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 1 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-256-NEXT: vmv1r.v v8, v9 @@ -827,11 +829,11 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv4i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 1 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-512-NEXT: vmv1r.v v8, v9 @@ -915,11 +917,11 @@ define @reverse_nxv16i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv16i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v12 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v12 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 @@ -928,11 +930,11 @@ ; ; RV32-BITS-256-LABEL: reverse_nxv16i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-BITS-256-NEXT: vid.v v10 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 1 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; RV32-BITS-256-NEXT: vid.v v10 ; RV32-BITS-256-NEXT: vrsub.vx v12, v10, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v12 ; RV32-BITS-256-NEXT: vmv.v.v v8, v10 @@ -940,11 +942,11 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv16i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-BITS-512-NEXT: vid.v v10 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 1 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; RV32-BITS-512-NEXT: vid.v v10 ; RV32-BITS-512-NEXT: vrsub.vx v12, v10, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v12 ; RV32-BITS-512-NEXT: vmv.v.v v8, v10 @@ -952,11 +954,11 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv16i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 @@ -965,11 +967,11 @@ ; ; RV64-BITS-256-LABEL: reverse_nxv16i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 1 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: vrsub.vx v12, v10, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v12 ; RV64-BITS-256-NEXT: vmv.v.v v8, v10 @@ -977,11 +979,11 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv16i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 1 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: vrsub.vx v12, v10, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v12 ; RV64-BITS-512-NEXT: vmv.v.v v8, v10 @@ -993,11 +995,11 @@ define @reverse_nxv32i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv32i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 @@ -1006,11 +1008,11 @@ ; ; RV32-BITS-256-LABEL: reverse_nxv32i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-256-NEXT: vid.v v12 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 2 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV32-BITS-256-NEXT: vid.v v12 ; RV32-BITS-256-NEXT: vrsub.vx v16, v12, a0 ; RV32-BITS-256-NEXT: vrgather.vv v12, v8, v16 ; RV32-BITS-256-NEXT: vmv.v.v v8, v12 @@ -1018,11 +1020,11 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv32i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v12 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV32-BITS-512-NEXT: vid.v v12 ; RV32-BITS-512-NEXT: vrsub.vx v16, v12, a0 ; RV32-BITS-512-NEXT: vrgather.vv v12, v8, v16 ; RV32-BITS-512-NEXT: vmv.v.v v8, v12 @@ -1030,11 +1032,11 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv32i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 @@ -1043,11 +1045,11 @@ ; ; RV64-BITS-256-LABEL: reverse_nxv32i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 2 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: vrsub.vx v16, v12, a0 ; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v16 ; RV64-BITS-256-NEXT: vmv.v.v v8, v12 @@ -1055,11 +1057,11 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv32i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: vrsub.vx v16, v12, a0 ; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v16 ; RV64-BITS-512-NEXT: vmv.v.v v8, v12 @@ -1071,11 +1073,11 @@ define @reverse_nxv64i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 @@ -1085,11 +1087,11 @@ ; ; RV32-BITS-256-LABEL: reverse_nxv64i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 3 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v24 ; RV32-BITS-256-NEXT: vmv.v.v v8, v16 @@ -1097,11 +1099,11 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv64i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v16 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV32-BITS-512-NEXT: vid.v v16 ; RV32-BITS-512-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV32-BITS-512-NEXT: vrgather.vv v16, v12, v24 @@ -1110,11 +1112,11 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 @@ -1124,11 +1126,11 @@ ; ; RV64-BITS-256-LABEL: reverse_nxv64i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 3 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0 ; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v24 ; RV64-BITS-256-NEXT: vmv.v.v v8, v16 @@ -1136,11 +1138,11 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv64i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v16 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV64-BITS-512-NEXT: vid.v v16 ; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0 ; RV64-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV64-BITS-512-NEXT: vrgather.vv v16, v12, v24 @@ -1153,11 +1155,11 @@ define @reverse_nxv1i16( %a) { ; CHECK-LABEL: reverse_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1169,11 +1171,11 @@ define @reverse_nxv2i16( %a) { ; CHECK-LABEL: reverse_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1185,11 +1187,11 @@ define @reverse_nxv4i16( %a) { ; CHECK-LABEL: reverse_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1216,11 +1218,11 @@ define @reverse_nxv16i16( %a) { ; CHECK-LABEL: reverse_nxv16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -1232,11 +1234,11 @@ define @reverse_nxv32i16( %a) { ; CHECK-LABEL: reverse_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 @@ -1248,11 +1250,11 @@ define @reverse_nxv1i32( %a) { ; CHECK-LABEL: reverse_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1264,11 +1266,11 @@ define @reverse_nxv2i32( %a) { ; CHECK-LABEL: reverse_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1280,11 +1282,11 @@ define @reverse_nxv4i32( %a) { ; CHECK-LABEL: reverse_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 @@ -1311,11 +1313,11 @@ define @reverse_nxv16i32( %a) { ; CHECK-LABEL: reverse_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 @@ -1327,11 +1329,11 @@ define @reverse_nxv1i64( %a) { ; CHECK-LABEL: reverse_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1343,11 +1345,11 @@ define @reverse_nxv2i64( %a) { ; CHECK-LABEL: reverse_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 @@ -1359,11 +1361,11 @@ define @reverse_nxv4i64( %a) { ; CHECK-LABEL: reverse_nxv4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -1394,11 +1396,11 @@ define @reverse_nxv1f16( %a) { ; CHECK-LABEL: reverse_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1410,11 +1412,11 @@ define @reverse_nxv2f16( %a) { ; CHECK-LABEL: reverse_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1426,11 +1428,11 @@ define @reverse_nxv4f16( %a) { ; CHECK-LABEL: reverse_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1457,11 +1459,11 @@ define @reverse_nxv16f16( %a) { ; CHECK-LABEL: reverse_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -1473,11 +1475,11 @@ define @reverse_nxv32f16( %a) { ; CHECK-LABEL: reverse_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 @@ -1489,11 +1491,11 @@ define @reverse_nxv1f32( %a) { ; CHECK-LABEL: reverse_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1505,11 +1507,11 @@ define @reverse_nxv2f32( %a) { ; CHECK-LABEL: reverse_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1521,11 +1523,11 @@ define @reverse_nxv4f32( %a) { ; CHECK-LABEL: reverse_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 @@ -1552,11 +1554,11 @@ define @reverse_nxv16f32( %a) { ; CHECK-LABEL: reverse_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 @@ -1568,11 +1570,11 @@ define @reverse_nxv1f64( %a) { ; CHECK-LABEL: reverse_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1584,11 +1586,11 @@ define @reverse_nxv2f64( %a) { ; CHECK-LABEL: reverse_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 @@ -1600,11 +1602,11 @@ define @reverse_nxv4f64( %a) { ; CHECK-LABEL: reverse_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -1633,11 +1635,11 @@ define @reverse_nxv3i64( %a) { ; CHECK-LABEL: reverse_nxv3i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v12, v12, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v12 ; CHECK-NEXT: vmv1r.v v8, v17 diff --git a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll @@ -15,8 +15,7 @@ ; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 32 ; CHECK-NEXT: [[SRLI:%[0-9]+]]:gprnox0 = SRLI killed [[SLLI]], 32 ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoVFMUL_VV_M1_MASK:%[0-9]+]]:vrnov0 = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFMUL_VV_M1_MASK [[DEF]], [[COPY3]], [[COPY2]], $v0, 7, killed [[SRLI]], 6 /* e64 */, 1 /* ta, mu */ + ; CHECK-NEXT: [[PseudoVFMUL_VV_M1_MASK:%[0-9]+]]:vrnov0 = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFMUL_VV_M1_MASK $noreg, [[COPY3]], [[COPY2]], $v0, 7, killed [[SRLI]], 6 /* e64 */, 1 /* ta, mu */, implicit $frm ; CHECK-NEXT: $v8 = COPY [[PseudoVFMUL_VV_M1_MASK]] ; CHECK-NEXT: PseudoRET implicit $v8 %1 = call fast @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -21,8 +21,8 @@ ; SPILL-O0-NEXT: add a1, sp, a1 ; SPILL-O0-NEXT: addi a1, a1, 16 ; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill -; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -37,8 +37,8 @@ ; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 ; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -24,8 +24,8 @@ ; SPILL-O0-NEXT: add a1, sp, a1 ; SPILL-O0-NEXT: addi a1, a1, 32 ; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill -; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: addi a0, sp, 32 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -40,8 +40,8 @@ ; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 ; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -937,8 +937,8 @@ ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (zero) ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmclr.m v0 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu ; CHECK-NEXT: vmv4r.v v20, v16 ; CHECK-NEXT: vssubu.vx v20, v16, zero, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -234,8 +234,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -250,8 +250,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -500,8 +500,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -516,8 +516,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -783,9 +783,9 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v10, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v12, v8 +; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v10, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -799,9 +799,9 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v10, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v8, v12 +; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v12, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1052,9 +1052,9 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v10, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v8 +; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v10, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1068,9 +1068,9 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v10, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v8, v12 +; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v12, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1369,8 +1369,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1385,8 +1385,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1635,8 +1635,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1651,8 +1651,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1919,9 +1919,9 @@ ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v24, v8 +; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v16, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1935,9 +1935,9 @@ ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v8, v24 +; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v24, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2188,9 +2188,9 @@ ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v24, v8 +; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v16, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2204,9 +2204,9 @@ ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v8, v24 +; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v24, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2239,27 +2239,27 @@ ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul t0, a3, a1 +; CHECK-NEXT: slli t2, a3, 3 ; CHECK-NEXT: srli a4, a3, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v1, v0, a4 ; CHECK-NEXT: srli a1, a3, 3 ; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a1 -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: slli t1, a3, 3 -; CHECK-NEXT: add a7, a2, t1 -; CHECK-NEXT: vl8re64.v v8, (a7) -; CHECK-NEXT: mul t0, a3, a5 +; CHECK-NEXT: add a5, a2, t2 +; CHECK-NEXT: vl8re64.v v8, (a5) +; CHECK-NEXT: slli t3, a3, 4 ; CHECK-NEXT: slli a5, a3, 1 -; CHECK-NEXT: slli t2, a3, 4 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: mv a7, a6 ; CHECK-NEXT: bltu a6, a5, .LBB171_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a7, a5 ; CHECK-NEXT: .LBB171_2: -; CHECK-NEXT: add t0, a2, t0 -; CHECK-NEXT: add t1, a0, t1 -; CHECK-NEXT: add t2, a2, t2 +; CHECK-NEXT: add t1, a2, t0 +; CHECK-NEXT: add t2, a0, t2 +; CHECK-NEXT: add t0, a2, t3 ; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 4 @@ -2281,22 +2281,22 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a3 ; CHECK-NEXT: .LBB171_4: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v17, v1, a1 -; CHECK-NEXT: vl8re64.v v8, (t0) +; CHECK-NEXT: vl8re64.v v8, (t1) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (t1) +; CHECK-NEXT: vl8re64.v v8, (t2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li t0, 24 -; CHECK-NEXT: mul a2, a2, t0 +; CHECK-NEXT: li t1, 24 +; CHECK-NEXT: mul a2, a2, t1 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (t2) +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v18, v1, a1 +; CHECK-NEXT: vl8re64.v v8, (t0) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 @@ -2318,14 +2318,14 @@ ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v18, v24, v8, v0.t +; CHECK-NEXT: vmfeq.vv v17, v24, v8, v0.t ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: sub a0, a6, a5 ; CHECK-NEXT: sltu a2, a6, a0 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: vslideup.vx v18, v2, a1 +; CHECK-NEXT: vslideup.vx v17, v2, a1 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a3, .LBB171_6 ; CHECK-NEXT: # %bb.5: @@ -2343,13 +2343,13 @@ ; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t ; CHECK-NEXT: add a2, a4, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v18, v16, a4 +; CHECK-NEXT: vslideup.vx v17, v16, a4 ; CHECK-NEXT: sub a2, a0, a3 ; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 @@ -2366,8 +2366,8 @@ ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v18, v16, a0 -; CHECK-NEXT: vmv1r.v v0, v18 +; CHECK-NEXT: vslideup.vx v17, v16, a0 +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 48 ; CHECK-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1164,33 +1164,33 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a2, a0, a1 -; CHECK-NEXT: vl8r.v v8, (a2) -; CHECK-NEXT: sub a2, a3, a1 -; CHECK-NEXT: sltu a4, a3, a2 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: add a4, a0, a1 +; CHECK-NEXT: vl8r.v v8, (a4) +; CHECK-NEXT: vl8r.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: and a2, a4, a2 -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: vlm.v v0, (a2) +; CHECK-NEXT: sltu a2, a3, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t ; CHECK-NEXT: bltu a3, a1, .LBB96_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB96_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -256,18 +256,18 @@ ; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0) ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV32-NEXT: vle16.v v20, (a0) -; RV32-NEXT: vmv2r.v v16, v10 -; RV32-NEXT: vmv2r.v v12, v8 -; RV32-NEXT: vrgather.vv v8, v12, v20 -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vrsub.vi v12, v12, 15 +; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: vmv2r.v v20, v10 +; RV32-NEXT: vrgather.vv v12, v8, v16 +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vrsub.vi v8, v8, 15 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV32-NEXT: vrgather.vv v8, v16, v12, v0.t +; RV32-NEXT: vrgather.vv v12, v20, v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: v16i16_2: @@ -276,18 +276,18 @@ ; RV64-NEXT: addi a0, a0, %lo(.LCPI15_0) ; RV64-NEXT: li a1, 32 ; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV64-NEXT: vle16.v v20, (a0) -; RV64-NEXT: vmv2r.v v16, v10 -; RV64-NEXT: vmv2r.v v12, v8 -; RV64-NEXT: vrgather.vv v8, v12, v20 -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vi v12, v12, 15 +; RV64-NEXT: vle16.v v16, (a0) +; RV64-NEXT: vmv2r.v v20, v10 +; RV64-NEXT: vrgather.vv v12, v8, v16 +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vrsub.vi v8, v8, 15 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV64-NEXT: vrgather.vv v8, v16, v12, v0.t +; RV64-NEXT: vrgather.vv v12, v20, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 @@ -497,12 +497,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vmv2r.v v16, v10 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vid.v v18 -; RV32-NEXT: vrsub.vi v19, v18, 7 +; RV32-NEXT: vid.v v10 +; RV32-NEXT: vrsub.vi v11, v10, 7 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v8, v19 +; RV32-NEXT: vrgatherei16.vv v12, v8, v11 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vrsub.vi v8, v18, 3 +; RV32-NEXT: vrsub.vi v8, v10, 3 ; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t @@ -791,12 +791,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vmv2r.v v16, v10 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vid.v v18 -; RV32-NEXT: vrsub.vi v19, v18, 7 +; RV32-NEXT: vid.v v10 +; RV32-NEXT: vrsub.vi v11, v10, 7 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v8, v19 +; RV32-NEXT: vrgatherei16.vv v12, v8, v11 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vrsub.vi v8, v18, 3 +; RV32-NEXT: vrsub.vi v8, v10, 3 ; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll @@ -36,16 +36,16 @@ ; CHECK-NEXT: vsrl.vx v12, v8, a1 ; CHECK-NEXT: lui a2, 16 ; CHECK-NEXT: addi a2, a2, -256 -; CHECK-NEXT: mv a3, sp -; CHECK-NEXT: vlse64.v v14, (a3), zero ; CHECK-NEXT: vand.vx v12, v12, a2 ; CHECK-NEXT: vor.vv v10, v12, v10 -; CHECK-NEXT: vsrl.vi v12, v8, 8 -; CHECK-NEXT: vand.vv v12, v12, v14 -; CHECK-NEXT: vsrl.vi v16, v8, 24 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: mv a3, sp +; CHECK-NEXT: vlse64.v v14, (a3), zero ; CHECK-NEXT: lui a3, 4080 -; CHECK-NEXT: vand.vx v16, v16, a3 -; CHECK-NEXT: vor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v12, v12, a3 +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vand.vv v16, v16, v14 +; CHECK-NEXT: vor.vv v12, v16, v12 ; CHECK-NEXT: vor.vv v10, v12, v10 ; CHECK-NEXT: vand.vv v12, v8, v14 ; CHECK-NEXT: vsll.vi v12, v12, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -10,17 +10,17 @@ ; CHECK-LABEL: vec_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vmv.v.x v11, a1 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) ret <2 x i64> %tmp @@ -30,18 +30,18 @@ ; CHECK-LABEL: vec_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addiw a0, a0, -1 -; CHECK-NEXT: vmv.v.x v11, a0 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: slli a0, a0, 31 -; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp @@ -51,16 +51,16 @@ ; CHECK-LABEL: vec_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addiw a1, a0, -1 -; CHECK-NEXT: vmsle.vi v0, v8, -1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) ret <8 x i16> %tmp @@ -70,16 +70,16 @@ ; CHECK-LABEL: vec_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v9, v8, v9 -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.v.x v11, a0 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) ret <16 x i8> %tmp @@ -94,15 +94,15 @@ ; CHECK-LABEL: vec_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vmv.v.x v14, a1 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret @@ -114,16 +114,16 @@ ; CHECK-LABEL: vec_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addiw a0, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addiw a0, a0, -1 -; CHECK-NEXT: vmv.v.x v14, a0 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: slli a0, a0, 31 -; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret @@ -135,12 +135,12 @@ ; CHECK-LABEL: vec_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a1, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addiw a1, a0, -1 -; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 @@ -154,14 +154,14 @@ ; CHECK-LABEL: vec_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.v.x v14, a0 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -562,9 +562,9 @@ ; ; RV64-LABEL: stepvector_nxv16i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vid.v v8 ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vid.v v8 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret %v = call @llvm.experimental.stepvector.nxv16i64() @@ -591,11 +591,11 @@ ; ; RV64-LABEL: add_stepvector_nxv16i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: @@ -680,15 +680,15 @@ ; ; RV64-LABEL: mul_bigimm_stepvector_nxv16i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: lui a1, 1987 +; RV64-NEXT: addiw a1, a1, -731 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -683 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vid.v v8 -; RV64-NEXT: lui a0, 1987 -; RV64-NEXT: addiw a0, a0, -731 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -683 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: mul a0, a1, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: @@ -719,11 +719,11 @@ ; ; RV64-LABEL: shl_stepvector_nxv16i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -771,17 +771,17 @@ ; CHECK-RV32-NEXT: sltu a5, a3, a2 ; CHECK-RV32-NEXT: addi a5, a5, -1 ; CHECK-RV32-NEXT: and a2, a5, a2 -; CHECK-RV32-NEXT: srli a5, a4, 3 -; CHECK-RV32-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a3, a4 ; CHECK-RV32-NEXT: .LBB42_2: -; CHECK-RV32-NEXT: mul a4, a3, a1 -; CHECK-RV32-NEXT: add a4, a0, a4 +; CHECK-RV32-NEXT: mul a5, a3, a1 +; CHECK-RV32-NEXT: add a5, a0, a5 +; CHECK-RV32-NEXT: srli a4, a4, 3 +; CHECK-RV32-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a4 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a4), a1, v0.t +; CHECK-RV32-NEXT: vlse64.v v16, (a5), a1, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t @@ -795,17 +795,17 @@ ; CHECK-RV64-NEXT: sltu a5, a2, a3 ; CHECK-RV64-NEXT: addi a5, a5, -1 ; CHECK-RV64-NEXT: and a3, a5, a3 -; CHECK-RV64-NEXT: srli a5, a4, 3 -; CHECK-RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-RV64-NEXT: bltu a2, a4, .LBB42_2 ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: mv a2, a4 ; CHECK-RV64-NEXT: .LBB42_2: -; CHECK-RV64-NEXT: mul a4, a2, a1 -; CHECK-RV64-NEXT: add a4, a0, a4 +; CHECK-RV64-NEXT: mul a5, a2, a1 +; CHECK-RV64-NEXT: add a5, a0, a5 +; CHECK-RV64-NEXT: srli a4, a4, 3 +; CHECK-RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a4 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a4), a1, v0.t +; CHECK-RV64-NEXT: vlse64.v v16, (a5), a1, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t @@ -867,46 +867,46 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, %mask, i32 zeroext %evl, * %hi_ptr) { ; CHECK-RV32-LABEL: strided_load_nxv17f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: csrr a5, vlenb -; CHECK-RV32-NEXT: slli a7, a5, 1 +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a7, a2, 1 ; CHECK-RV32-NEXT: vmv1r.v v8, v0 -; CHECK-RV32-NEXT: mv a2, a3 +; CHECK-RV32-NEXT: mv a6, a3 ; CHECK-RV32-NEXT: bltu a3, a7, .LBB44_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a2, a7 +; CHECK-RV32-NEXT: mv a6, a7 ; CHECK-RV32-NEXT: .LBB44_2: -; CHECK-RV32-NEXT: sub a6, a2, a5 -; CHECK-RV32-NEXT: sltu t0, a2, a6 +; CHECK-RV32-NEXT: sub a5, a6, a2 +; CHECK-RV32-NEXT: sltu t0, a6, a5 ; CHECK-RV32-NEXT: addi t0, t0, -1 -; CHECK-RV32-NEXT: and t0, t0, a6 -; CHECK-RV32-NEXT: srli a6, a5, 3 -; CHECK-RV32-NEXT: vsetvli t1, zero, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a6 -; CHECK-RV32-NEXT: mv a6, a2 -; CHECK-RV32-NEXT: bltu a2, a5, .LBB44_4 +; CHECK-RV32-NEXT: and t0, t0, a5 +; CHECK-RV32-NEXT: mv a5, a6 +; CHECK-RV32-NEXT: bltu a6, a2, .LBB44_4 ; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv a6, a5 +; CHECK-RV32-NEXT: mv a5, a2 ; CHECK-RV32-NEXT: .LBB44_4: -; CHECK-RV32-NEXT: mul t1, a6, a1 +; CHECK-RV32-NEXT: mul t1, a5, a1 ; CHECK-RV32-NEXT: add t1, a0, t1 +; CHECK-RV32-NEXT: srli t2, a2, 3 +; CHECK-RV32-NEXT: vsetvli t3, zero, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vslidedown.vx v0, v8, t2 ; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v16, (t1), a1, v0.t ; CHECK-RV32-NEXT: sub a7, a3, a7 ; CHECK-RV32-NEXT: sltu a3, a3, a7 ; CHECK-RV32-NEXT: addi a3, a3, -1 ; CHECK-RV32-NEXT: and a3, a3, a7 -; CHECK-RV32-NEXT: bltu a3, a5, .LBB44_6 +; CHECK-RV32-NEXT: bltu a3, a2, .LBB44_6 ; CHECK-RV32-NEXT: # %bb.5: -; CHECK-RV32-NEXT: mv a3, a5 +; CHECK-RV32-NEXT: mv a3, a2 ; CHECK-RV32-NEXT: .LBB44_6: -; CHECK-RV32-NEXT: srli a5, a5, 2 +; CHECK-RV32-NEXT: mul a6, a6, a1 +; CHECK-RV32-NEXT: add a6, a0, a6 +; CHECK-RV32-NEXT: srli a2, a2, 2 ; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a5 -; CHECK-RV32-NEXT: mul a2, a2, a1 -; CHECK-RV32-NEXT: add a2, a0, a2 +; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a2 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a2), a1, v0.t -; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a6), a1, v0.t +; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV32-NEXT: vs1r.v v24, (a4) @@ -914,46 +914,46 @@ ; ; CHECK-RV64-LABEL: strided_load_nxv17f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: csrr a5, vlenb -; CHECK-RV64-NEXT: slli a7, a5, 1 +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: slli a7, a4, 1 ; CHECK-RV64-NEXT: vmv1r.v v8, v0 -; CHECK-RV64-NEXT: mv a4, a2 +; CHECK-RV64-NEXT: mv a6, a2 ; CHECK-RV64-NEXT: bltu a2, a7, .LBB44_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a4, a7 +; CHECK-RV64-NEXT: mv a6, a7 ; CHECK-RV64-NEXT: .LBB44_2: -; CHECK-RV64-NEXT: sub a6, a4, a5 -; CHECK-RV64-NEXT: sltu t0, a4, a6 +; CHECK-RV64-NEXT: sub a5, a6, a4 +; CHECK-RV64-NEXT: sltu t0, a6, a5 ; CHECK-RV64-NEXT: addi t0, t0, -1 -; CHECK-RV64-NEXT: and t0, t0, a6 -; CHECK-RV64-NEXT: srli a6, a5, 3 -; CHECK-RV64-NEXT: vsetvli t1, zero, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a6 -; CHECK-RV64-NEXT: mv a6, a4 -; CHECK-RV64-NEXT: bltu a4, a5, .LBB44_4 +; CHECK-RV64-NEXT: and t0, t0, a5 +; CHECK-RV64-NEXT: mv a5, a6 +; CHECK-RV64-NEXT: bltu a6, a4, .LBB44_4 ; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: mv a6, a5 +; CHECK-RV64-NEXT: mv a5, a4 ; CHECK-RV64-NEXT: .LBB44_4: -; CHECK-RV64-NEXT: mul t1, a6, a1 +; CHECK-RV64-NEXT: mul t1, a5, a1 ; CHECK-RV64-NEXT: add t1, a0, t1 +; CHECK-RV64-NEXT: srli t2, a4, 3 +; CHECK-RV64-NEXT: vsetvli t3, zero, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vslidedown.vx v0, v8, t2 ; CHECK-RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v16, (t1), a1, v0.t ; CHECK-RV64-NEXT: sub a7, a2, a7 ; CHECK-RV64-NEXT: sltu a2, a2, a7 ; CHECK-RV64-NEXT: addi a2, a2, -1 ; CHECK-RV64-NEXT: and a2, a2, a7 -; CHECK-RV64-NEXT: bltu a2, a5, .LBB44_6 +; CHECK-RV64-NEXT: bltu a2, a4, .LBB44_6 ; CHECK-RV64-NEXT: # %bb.5: -; CHECK-RV64-NEXT: mv a2, a5 +; CHECK-RV64-NEXT: mv a2, a4 ; CHECK-RV64-NEXT: .LBB44_6: -; CHECK-RV64-NEXT: srli a5, a5, 2 +; CHECK-RV64-NEXT: mul a6, a6, a1 +; CHECK-RV64-NEXT: add a6, a0, a6 +; CHECK-RV64-NEXT: srli a4, a4, 2 ; CHECK-RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a5 -; CHECK-RV64-NEXT: mul a4, a4, a1 -; CHECK-RV64-NEXT: add a4, a0, a4 +; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a4 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v24, (a4), a1, v0.t -; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v24, (a6), a1, v0.t +; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV64-NEXT: vs1r.v v24, (a3) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -621,46 +621,46 @@ define void @strided_store_nxv16f64( %v, ptr %ptr, i32 signext %stride, %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_store_nxv16f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: csrr a4, vlenb -; CHECK-RV32-NEXT: mv a3, a2 -; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_2 +; CHECK-RV32-NEXT: csrr a3, vlenb +; CHECK-RV32-NEXT: mv a4, a2 +; CHECK-RV32-NEXT: bltu a2, a3, .LBB34_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a3, a4 +; CHECK-RV32-NEXT: mv a4, a3 ; CHECK-RV32-NEXT: .LBB34_2: -; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-RV32-NEXT: sub a5, a2, a4 +; CHECK-RV32-NEXT: sub a5, a2, a3 ; CHECK-RV32-NEXT: sltu a2, a2, a5 ; CHECK-RV32-NEXT: addi a2, a2, -1 ; CHECK-RV32-NEXT: and a2, a2, a5 -; CHECK-RV32-NEXT: srli a4, a4, 3 -; CHECK-RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-RV32-NEXT: mul a3, a3, a1 -; CHECK-RV32-NEXT: add a0, a0, a3 +; CHECK-RV32-NEXT: mul a4, a4, a1 +; CHECK-RV32-NEXT: add a0, a0, a4 +; CHECK-RV32-NEXT: srli a3, a3, 3 +; CHECK-RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_store_nxv16f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: mv a3, a2 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_2 +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: mv a4, a2 +; CHECK-RV64-NEXT: bltu a2, a3, .LBB34_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a3, a4 +; CHECK-RV64-NEXT: mv a4, a3 ; CHECK-RV64-NEXT: .LBB34_2: -; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-RV64-NEXT: sub a5, a2, a4 +; CHECK-RV64-NEXT: sub a5, a2, a3 ; CHECK-RV64-NEXT: sltu a2, a2, a5 ; CHECK-RV64-NEXT: addi a2, a2, -1 ; CHECK-RV64-NEXT: and a2, a2, a5 -; CHECK-RV64-NEXT: srli a4, a4, 3 -; CHECK-RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-RV64-NEXT: mul a3, a3, a1 -; CHECK-RV64-NEXT: add a0, a0, a3 +; CHECK-RV64-NEXT: mul a4, a4, a1 +; CHECK-RV64-NEXT: add a0, a0, a4 +; CHECK-RV64-NEXT: srli a3, a3, 3 +; CHECK-RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-RV64-NEXT: ret @@ -749,11 +749,11 @@ ; CHECK-RV32-NEXT: sltu t0, a5, a0 ; CHECK-RV32-NEXT: addi t0, t0, -1 ; CHECK-RV32-NEXT: and a0, t0, a0 +; CHECK-RV32-NEXT: mul a7, a7, a2 +; CHECK-RV32-NEXT: add a7, a1, a7 ; CHECK-RV32-NEXT: srli t0, a4, 3 ; CHECK-RV32-NEXT: vsetvli t1, zero, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vx v0, v24, t0 -; CHECK-RV32-NEXT: mul a7, a7, a2 -; CHECK-RV32-NEXT: add a7, a1, a7 ; CHECK-RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-RV32-NEXT: sub a0, a3, a6 ; CHECK-RV32-NEXT: sltu a3, a3, a0 @@ -764,11 +764,11 @@ ; CHECK-RV32-NEXT: # %bb.5: ; CHECK-RV32-NEXT: mv a0, a4 ; CHECK-RV32-NEXT: .LBB36_6: +; CHECK-RV32-NEXT: mul a3, a5, a2 +; CHECK-RV32-NEXT: add a1, a1, a3 ; CHECK-RV32-NEXT: srli a4, a4, 2 ; CHECK-RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-RV32-NEXT: vslidedown.vx v0, v24, a4 -; CHECK-RV32-NEXT: mul a3, a5, a2 -; CHECK-RV32-NEXT: add a1, a1, a3 ; CHECK-RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -810,11 +810,11 @@ ; CHECK-RV64-NEXT: sltu t0, a5, a0 ; CHECK-RV64-NEXT: addi t0, t0, -1 ; CHECK-RV64-NEXT: and a0, t0, a0 +; CHECK-RV64-NEXT: mul a7, a7, a2 +; CHECK-RV64-NEXT: add a7, a1, a7 ; CHECK-RV64-NEXT: srli t0, a4, 3 ; CHECK-RV64-NEXT: vsetvli t1, zero, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vx v0, v24, t0 -; CHECK-RV64-NEXT: mul a7, a7, a2 -; CHECK-RV64-NEXT: add a7, a1, a7 ; CHECK-RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-RV64-NEXT: sub a0, a3, a6 ; CHECK-RV64-NEXT: sltu a3, a3, a0 @@ -825,11 +825,11 @@ ; CHECK-RV64-NEXT: # %bb.5: ; CHECK-RV64-NEXT: mv a0, a4 ; CHECK-RV64-NEXT: .LBB36_6: +; CHECK-RV64-NEXT: mul a3, a5, a2 +; CHECK-RV64-NEXT: add a1, a1, a3 ; CHECK-RV64-NEXT: srli a4, a4, 2 ; CHECK-RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-RV64-NEXT: vslidedown.vx v0, v24, a4 -; CHECK-RV64-NEXT: mul a3, a5, a2 -; CHECK-RV64-NEXT: add a1, a1, a3 ; CHECK-RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -107,16 +107,14 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) { ; CHECK-LABEL: vector_deinterleave_v2i64_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vi v10, v8, 1 -; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vrgather.vi v10, v12, 1, v0.t -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: vmv.v.v v9, v10 +; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret %retval = call {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> %vec) ret {<2 x i64>, <2 x i64>} %retval @@ -196,16 +194,14 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) { ; CHECK-LABEL: vector_deinterleave_v2f64_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vi v10, v8, 1 -; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vrgather.vi v10, v12, 1, v0.t -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: vmv.v.v v9, v10 +; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret %retval = call {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec) ret {<2 x double>, <2 x double>} %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -104,51 +104,74 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: li a2, 40 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re64.v v16, (a1) -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v0, v8, v8 -; CHECK-NEXT: vrgather.vv v8, v16, v0 +; CHECK-NEXT: vl8re64.v v24, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vv v16, v8, v8 +; CHECK-NEXT: vrgather.vv v8, v0, v16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vrgather.vv v8, v24, v0 -; CHECK-NEXT: vadd.vi v0, v0, 1 -; CHECK-NEXT: vrgather.vv v24, v16, v0 +; CHECK-NEXT: vrgather.vv v0, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vadd.vi v24, v16, 1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v12, v24 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrgather.vv v16, v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrgather.vv v24, v0, v8 +; CHECK-NEXT: vmv4r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v20, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v12, v24 +; CHECK-NEXT: vmv4r.v v20, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -7,16 +7,15 @@ define {, } @vector_deinterleave_nxv16i1_nxv32i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 ; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 @@ -91,39 +90,24 @@ define {, } @vector_deinterleave_nxv64i1_nxv128i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v28, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 +; CHECK-NEXT: vnsrl.wi v24, v16, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v28 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v24, 0 +; CHECK-NEXT: vnsrl.wi v28, v8, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v4, v16, 8 -; CHECK-NEXT: vnsrl.wi v0, v24, 8 +; CHECK-NEXT: vnsrl.wi v24, v16, 8 +; CHECK-NEXT: vnsrl.wi v28, v8, 8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v8, v0, 0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmsne.vi v8, v24, 0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv128i1( %vec) ret {, } %retval @@ -134,10 +118,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 -; CHECK-NEXT: vnsrl.wi v4, v16, 8 +; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 8 +; CHECK-NEXT: vnsrl.wi v4, v16, 8 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv128i8( %vec) @@ -149,10 +133,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 -; CHECK-NEXT: vnsrl.wi v4, v16, 16 +; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 16 +; CHECK-NEXT: vnsrl.wi v4, v16, 16 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64i16( %vec) @@ -167,8 +151,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wx v20, v24, a0 ; CHECK-NEXT: vnsrl.wx v16, v8, a0 -; CHECK-NEXT: vnsrl.wi v4, v24, 0 ; CHECK-NEXT: vnsrl.wi v0, v8, 0 +; CHECK-NEXT: vnsrl.wi v4, v24, 0 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32i32( %vec) @@ -355,10 +339,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 -; CHECK-NEXT: vnsrl.wi v4, v16, 16 +; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 16 +; CHECK-NEXT: vnsrl.wi v4, v16, 16 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64f16( %vec) @@ -373,8 +357,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wx v20, v24, a0 ; CHECK-NEXT: vnsrl.wx v16, v8, a0 -; CHECK-NEXT: vnsrl.wi v4, v24, 0 ; CHECK-NEXT: vnsrl.wi v0, v8, 0 +; CHECK-NEXT: vnsrl.wi v4, v24, 0 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32f32( %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -7,13 +7,12 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) { ; CHECK-LABEL: vector_interleave_v32i1_v16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -106,13 +106,13 @@ ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu -; CHECK-NEXT: vid.v v24 -; CHECK-NEXT: vsrl.vi v2, v24, 1 -; CHECK-NEXT: vand.vi v24, v24, 1 -; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, mu +; CHECK-NEXT: vid.v v24 +; CHECK-NEXT: vand.vi v26, v24, 1 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsrl.vi v2, v24, 1 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a3, a3, 3 ; CHECK-NEXT: add a3, sp, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -71,13 +71,13 @@ define @vector_interleave_nxv4i64_nxv2i64( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv4i64_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vsrl.vi v16, v12, 1 -; CHECK-NEXT: vand.vi v12, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vand.vi v13, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v13, 0 +; CHECK-NEXT: vsrl.vi v16, v12, 1 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -177,13 +177,13 @@ ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv8r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vid.v v24 -; CHECK-NEXT: vsrl.vi v8, v24, 1 -; CHECK-NEXT: vand.vi v24, v24, 1 -; CHECK-NEXT: vmsne.vi v10, v24, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vid.v v24 +; CHECK-NEXT: vand.vi v26, v24, 1 +; CHECK-NEXT: vmsne.vi v10, v26, 0 +; CHECK-NEXT: vsrl.vi v8, v24, 1 ; CHECK-NEXT: vmv8r.v v24, v0 ; CHECK-NEXT: vmv4r.v v12, v4 ; CHECK-NEXT: vmv1r.v v0, v10 @@ -289,13 +289,13 @@ define @vector_interleave_nxv4f64_nxv2f64( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv4f64_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vsrl.vi v16, v12, 1 -; CHECK-NEXT: vand.vi v12, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vand.vi v13, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v13, 0 +; CHECK-NEXT: vsrl.vi v16, v12, 1 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -355,13 +355,13 @@ ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv8r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vid.v v24 -; CHECK-NEXT: vsrl.vi v8, v24, 1 -; CHECK-NEXT: vand.vi v24, v24, 1 -; CHECK-NEXT: vmsne.vi v10, v24, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vid.v v24 +; CHECK-NEXT: vand.vi v26, v24, 1 +; CHECK-NEXT: vmsne.vi v10, v26, 0 +; CHECK-NEXT: vsrl.vi v8, v24, 1 ; CHECK-NEXT: vmv8r.v v24, v0 ; CHECK-NEXT: vmv4r.v v12, v4 ; CHECK-NEXT: vmv1r.v v0, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -152,39 +152,39 @@ ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v16, v0, a3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: slli a3, a1, 1 -; CHECK-NEXT: sub a4, a2, a3 -; CHECK-NEXT: sltu a5, a2, a4 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v0, a4 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re64.v v8, (a4) +; CHECK-NEXT: slli a4, a1, 1 +; CHECK-NEXT: sub a5, a2, a4 +; CHECK-NEXT: sltu a6, a2, a5 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, a5 -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: vslidedown.vx v0, v16, a5 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: sub a6, a5, a1 +; CHECK-NEXT: sltu a7, a5, a6 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a6, a7, a6 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vslidedown.vx v0, v16, a3 ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t -; CHECK-NEXT: bltu a4, a1, .LBB8_2 +; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t +; CHECK-NEXT: bltu a5, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a1 +; CHECK-NEXT: mv a5, a1 ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v2, v1, a5 -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vx v2, v1, a3 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB8_4 +; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t +; CHECK-NEXT: bltu a2, a4, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: mv a2, a4 ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll --- a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll @@ -14,8 +14,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 [[DEF]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1) + ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1) ; CHECK-NEXT: $x10 = COPY [[PseudoVLE8FF_V_M1_1]] ; CHECK-NEXT: PseudoRET implicit $x10 entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -283,60 +283,59 @@ ; ; RV64-LABEL: vpgather_baseidx_nxv32i8: ; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a4, a2, 1 -; RV64-NEXT: sub a3, a1, a4 -; RV64-NEXT: sltu a5, a1, a3 +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: sub a4, a1, a3 +; RV64-NEXT: sltu a5, a1, a4 ; RV64-NEXT: addi a5, a5, -1 -; RV64-NEXT: and a3, a5, a3 -; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: mv a5, a3 -; RV64-NEXT: bltu a3, a2, .LBB12_2 +; RV64-NEXT: and a5, a5, a4 +; RV64-NEXT: sub a4, a5, a2 +; RV64-NEXT: sltu a6, a5, a4 +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: and a6, a6, a4 +; RV64-NEXT: srli a4, a2, 2 +; RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v13, v0, a4 +; RV64-NEXT: srli a4, a2, 3 +; RV64-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v13, a4 +; RV64-NEXT: vsetvli a7, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v11 +; RV64-NEXT: vsetvli zero, a6, e8, m1, ta, ma +; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t +; RV64-NEXT: bltu a5, a2, .LBB12_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a5, a2 ; RV64-NEXT: .LBB12_2: -; RV64-NEXT: srli a6, a2, 2 -; RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v12, v13, a6 ; RV64-NEXT: vsetvli a6, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetvli zero, a5, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t -; RV64-NEXT: bltu a1, a4, .LBB12_4 +; RV64-NEXT: bltu a1, a3, .LBB12_4 ; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a1, a4 +; RV64-NEXT: mv a1, a3 ; RV64-NEXT: .LBB12_4: -; RV64-NEXT: sub a4, a1, a2 -; RV64-NEXT: sltu a5, a1, a4 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a5, a1, a3 ; RV64-NEXT: addi a5, a5, -1 -; RV64-NEXT: and a5, a5, a4 -; RV64-NEXT: srli a4, a2, 3 -; RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v13, a4 -; RV64-NEXT: vsetvli a6, zero, e64, m8, ta, ma +; RV64-NEXT: and a3, a5, a3 +; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v12, a4 +; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v9 -; RV64-NEXT: vsetvli zero, a5, e8, m1, ta, ma +; RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v9, (a0), v16, v0.t ; RV64-NEXT: bltu a1, a2, .LBB12_6 ; RV64-NEXT: # %bb.5: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB12_6: -; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: sub a1, a3, a2 -; RV64-NEXT: sltu a2, a3, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v12, a4 -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v11 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs %v = call @llvm.vp.gather.nxv32i8.nxv32p0( %ptrs, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -459,11 +459,11 @@ ; CHECK-NEXT: sltu a4, a1, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: srli a4, a2, 3 -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a4 ; CHECK-NEXT: slli a4, a2, 3 ; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: srli a5, a2, 3 +; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: bltu a1, a2, .LBB37_2 @@ -504,11 +504,11 @@ ; CHECK-NEXT: sltu a7, a4, a6 ; CHECK-NEXT: addi a7, a7, -1 ; CHECK-NEXT: and a6, a7, a6 -; CHECK-NEXT: srli a7, a3, 3 -; CHECK-NEXT: vsetvli t0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v8, a7 ; CHECK-NEXT: slli a7, a3, 3 ; CHECK-NEXT: add a7, a0, a7 +; CHECK-NEXT: srli t0, a3, 3 +; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v8, t0 ; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a7), v0.t ; CHECK-NEXT: sub a5, a2, a5 @@ -519,11 +519,11 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB38_4: -; CHECK-NEXT: srli a5, a3, 2 -; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v8, a5 ; CHECK-NEXT: slli a5, a3, 4 ; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: srli a6, a3, 2 +; CHECK-NEXT: vsetvli a7, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v8, a6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t ; CHECK-NEXT: bltu a4, a3, .LBB38_6 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -380,18 +380,18 @@ ; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a2, a0, a1 -; CHECK-NEXT: vl8r.v v16, (a2) -; CHECK-NEXT: sub a2, a3, a1 -; CHECK-NEXT: sltu a4, a3, a2 -; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: add a4, a0, a1 +; CHECK-NEXT: vl8r.v v16, (a4) ; CHECK-NEXT: vl8r.v v8, (a0) -; CHECK-NEXT: and a2, a4, a2 -; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: vlm.v v0, (a2) +; CHECK-NEXT: sltu a2, a3, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma ; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: bltu a3, a1, .LBB28_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -381,11 +381,11 @@ ; CHECK-NEXT: sltu a1, a1, a3 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a3 -; CHECK-NEXT: srli a3, a2, 3 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a3 -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: srli a2, a2, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -429,11 +429,11 @@ ; CHECK-NEXT: sltu a5, a5, a0 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a0, a5, a0 -; CHECK-NEXT: srli a5, a3, 3 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, a5 ; CHECK-NEXT: slli a5, a3, 3 ; CHECK-NEXT: add a5, a1, a5 +; CHECK-NEXT: srli a6, a3, 3 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a6 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: sub a0, a2, a4 ; CHECK-NEXT: sltu a2, a2, a0 @@ -444,11 +444,11 @@ ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: .LBB31_6: -; CHECK-NEXT: srli a2, a3, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, a2 -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: slli a2, a3, 4 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: srli a3, a3, 2 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a3 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -886,14 +886,14 @@ define half @vreduce_ord_fadd_nxv3f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv3f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: slli a1, a0, 1 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: lui a2, 1048568 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a2 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -910,16 +910,16 @@ define half @vreduce_ord_fadd_nxv6f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv6f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.v.x v11, a0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v9, v11, a0 +; CHECK-NEXT: vslideup.vx v9, v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -937,12 +937,13 @@ ; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.v v11, v12 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -957,10 +958,10 @@ define half @vreduce_ord_fadd_nxv12f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv12f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 +; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -973,14 +974,14 @@ define half @vreduce_fadd_nxv3f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv3f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: slli a1, a0, 1 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: lui a2, 1048568 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a2 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -995,16 +996,16 @@ define half @vreduce_fadd_nxv6f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv6f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.v.x v11, a0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v9, v11, a0 +; CHECK-NEXT: vslideup.vx v9, v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1017,18 +1018,19 @@ define half @vreduce_fmin_nxv10f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv10f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI73_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v12, (a0), zero ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: lui a1, %hi(.LCPI73_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0) +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v12, (a1), zero ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.v v11, v12 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -93,25 +93,24 @@ define half @vpreduce_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a1, a2, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a2 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB6_2 +; CHECK-NEXT: vslidedown.vx v24, v0, a1 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub a1, a0, a2 +; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: bltu a0, a2, .LBB6_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 @@ -123,25 +122,24 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a1, a2, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a2 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: vslidedown.vx v24, v0, a1 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub a1, a0, a2 +; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -1153,25 +1153,24 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_umax_nxv32i32: ; RV32: # %bb.0: -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: srli a3, a2, 2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: srli a2, a3, 2 ; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v24, v0, a3 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bltu a1, a2, .LBB67_2 +; RV32-NEXT: vslidedown.vx v24, v0, a2 +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: sub a2, a1, a3 +; RV32-NEXT: sltu a4, a1, a2 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: bltu a1, a3, .LBB67_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB67_2: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v25, a0 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV32-NEXT: sub a0, a1, a2 -; RV32-NEXT: sltu a1, a1, a0 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vredmaxu.vs v25, v16, v25, v0.t ; RV32-NEXT: vmv.x.s a0, v25 @@ -1179,26 +1178,25 @@ ; ; RV64-LABEL: vpreduce_umax_nxv32i32: ; RV64: # %bb.0: -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: srli a3, a2, 2 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: srli a2, a3, 2 ; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a3 -; RV64-NEXT: slli a3, a0, 32 -; RV64-NEXT: slli a0, a2, 1 -; RV64-NEXT: srli a3, a3, 32 -; RV64-NEXT: mv a2, a1 -; RV64-NEXT: bltu a1, a0, .LBB67_2 +; RV64-NEXT: vslidedown.vx v24, v0, a2 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a2, a0, 32 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: sub a0, a1, a3 +; RV64-NEXT: sltu a4, a1, a0 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a0, a4, a0 +; RV64-NEXT: bltu a1, a3, .LBB67_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a0 +; RV64-NEXT: mv a1, a3 ; RV64-NEXT: .LBB67_2: ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v25, a3 -; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV64-NEXT: vmv.s.x v25, a2 +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sltu a1, a1, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vredmaxu.vs v25, v16, v25, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -1230,11 +1230,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmaxu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umax_nxv1i64: @@ -1254,11 +1254,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_nxv1i64: @@ -1278,11 +1278,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umin_nxv1i64: @@ -1302,11 +1302,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_nxv1i64: @@ -1326,11 +1326,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_and_nxv1i64: @@ -1350,11 +1350,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredor.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_or_nxv1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll @@ -1664,16 +1664,16 @@ define @vror_vi_nxv1i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv1i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v9, 1 +; CHECK-RV32-NEXT: vrsub.vi v9, v9, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v9, a0 -; CHECK-RV32-NEXT: vand.vi v9, v9, 1 -; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 -; CHECK-RV32-NEXT: vmv.v.i v10, 1 -; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v9, v8 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv1i64: @@ -1697,16 +1697,16 @@ define @vror_vi_rotl_nxv1i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv1i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v9, 1 +; CHECK-RV32-NEXT: vrsub.vi v9, v9, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v9, a0 -; CHECK-RV32-NEXT: vand.vi v9, v9, 1 -; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 -; CHECK-RV32-NEXT: vmv.v.i v10, 1 -; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v9, v8 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv1i64: @@ -1810,16 +1810,16 @@ define @vror_vi_nxv2i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv2i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 1 +; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV32-NEXT: vmv.v.i v12, 1 -; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv2i64: @@ -1843,16 +1843,16 @@ define @vror_vi_rotl_nxv2i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv2i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 1 +; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV32-NEXT: vmv.v.i v12, 1 -; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv2i64: @@ -1956,16 +1956,16 @@ define @vror_vi_nxv4i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv4i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v12, 1 +; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV32-NEXT: vmv.v.i v16, 1 -; CHECK-RV32-NEXT: vrsub.vi v16, v16, 0 -; CHECK-RV32-NEXT: vand.vx v16, v16, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-RV32-NEXT: vand.vi v16, v16, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v12 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv4i64: @@ -1989,16 +1989,16 @@ define @vror_vi_rotl_nxv4i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv4i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v12, 1 +; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV32-NEXT: vmv.v.i v16, 1 -; CHECK-RV32-NEXT: vrsub.vi v16, v16, 0 -; CHECK-RV32-NEXT: vand.vx v16, v16, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-RV32-NEXT: vand.vi v16, v16, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v12 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv4i64: @@ -2102,16 +2102,16 @@ define @vror_vi_nxv8i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v16, 1 +; CHECK-RV32-NEXT: vrsub.vi v16, v16, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v16, a0 -; CHECK-RV32-NEXT: vand.vi v16, v16, 1 -; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16 -; CHECK-RV32-NEXT: vmv.v.i v24, 1 -; CHECK-RV32-NEXT: vrsub.vi v24, v24, 0 -; CHECK-RV32-NEXT: vand.vx v24, v24, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v24 -; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: vand.vx v16, v16, a0 +; CHECK-RV32-NEXT: vsll.vv v16, v8, v16 +; CHECK-RV32-NEXT: vmv.v.x v24, a0 +; CHECK-RV32-NEXT: vand.vi v24, v24, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v24 +; CHECK-RV32-NEXT: vor.vv v8, v8, v16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv8i64: @@ -2135,16 +2135,16 @@ define @vror_vi_rotl_nxv8i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v16, 1 +; CHECK-RV32-NEXT: vrsub.vi v16, v16, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v16, a0 -; CHECK-RV32-NEXT: vand.vi v16, v16, 1 -; CHECK-RV32-NEXT: vsll.vv v16, v8, v16 -; CHECK-RV32-NEXT: vmv.v.i v24, 1 -; CHECK-RV32-NEXT: vrsub.vi v24, v24, 0 -; CHECK-RV32-NEXT: vand.vx v24, v24, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v24 -; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: vand.vx v16, v16, a0 +; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16 +; CHECK-RV32-NEXT: vmv.v.x v24, a0 +; CHECK-RV32-NEXT: vand.vi v24, v24, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v24 +; CHECK-RV32-NEXT: vor.vv v8, v8, v16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -297,39 +297,39 @@ ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v16, v0, a3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: slli a3, a1, 1 -; CHECK-NEXT: sub a4, a2, a3 -; CHECK-NEXT: sltu a5, a2, a4 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v0, a4 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re64.v v8, (a4) +; CHECK-NEXT: slli a4, a1, 1 +; CHECK-NEXT: sub a5, a2, a4 +; CHECK-NEXT: sltu a6, a2, a5 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, a5 -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: vslidedown.vx v0, v16, a5 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: sub a6, a5, a1 +; CHECK-NEXT: sltu a7, a5, a6 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a6, a7, a6 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vslidedown.vx v0, v16, a3 ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v20, v24, 0, v0.t -; CHECK-NEXT: bltu a4, a1, .LBB17_2 +; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t +; CHECK-NEXT: bltu a5, a1, .LBB17_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a1 +; CHECK-NEXT: mv a5, a1 ; CHECK-NEXT: .LBB17_2: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v2, v1, a5 -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vx v2, v1, a3 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB17_4 +; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t +; CHECK-NEXT: bltu a2, a4, .LBB17_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: mv a2, a4 ; CHECK-NEXT: .LBB17_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -722,21 +722,21 @@ ; ; RV64MV-LABEL: test_srem_vec: ; RV64MV: # %bb.0: -; RV64MV-NEXT: lbu a1, 12(a0) +; RV64MV-NEXT: ld a1, 0(a0) ; RV64MV-NEXT: lwu a2, 8(a0) -; RV64MV-NEXT: slli a1, a1, 32 -; RV64MV-NEXT: ld a3, 0(a0) -; RV64MV-NEXT: or a1, a2, a1 -; RV64MV-NEXT: slli a1, a1, 29 -; RV64MV-NEXT: srai a1, a1, 31 -; RV64MV-NEXT: srli a4, a3, 2 -; RV64MV-NEXT: slli a2, a2, 62 +; RV64MV-NEXT: srli a3, a1, 2 +; RV64MV-NEXT: lbu a4, 12(a0) +; RV64MV-NEXT: slli a5, a2, 62 +; RV64MV-NEXT: or a3, a5, a3 +; RV64MV-NEXT: srai a3, a3, 31 +; RV64MV-NEXT: slli a4, a4, 32 ; RV64MV-NEXT: or a2, a2, a4 +; RV64MV-NEXT: slli a2, a2, 29 ; RV64MV-NEXT: lui a4, %hi(.LCPI3_0) ; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4) ; RV64MV-NEXT: srai a2, a2, 31 -; RV64MV-NEXT: slli a3, a3, 31 -; RV64MV-NEXT: srai a3, a3, 31 +; RV64MV-NEXT: slli a1, a1, 31 +; RV64MV-NEXT: srai a1, a1, 31 ; RV64MV-NEXT: mulh a4, a2, a4 ; RV64MV-NEXT: srli a5, a4, 63 ; RV64MV-NEXT: srai a4, a4, 1 @@ -744,27 +744,27 @@ ; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) ; RV64MV-NEXT: add a2, a2, a4 -; RV64MV-NEXT: slli a4, a4, 3 -; RV64MV-NEXT: sub a2, a2, a4 +; RV64MV-NEXT: slli a4, a4, 2 +; RV64MV-NEXT: add a2, a2, a4 ; RV64MV-NEXT: mulh a4, a3, a5 ; RV64MV-NEXT: srli a5, a4, 63 +; RV64MV-NEXT: srai a4, a4, 1 +; RV64MV-NEXT: add a4, a4, a5 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_2) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5) +; RV64MV-NEXT: add a3, a3, a4 +; RV64MV-NEXT: slli a4, a4, 3 +; RV64MV-NEXT: sub a3, a3, a4 +; RV64MV-NEXT: mulh a4, a1, a5 +; RV64MV-NEXT: srli a5, a4, 63 ; RV64MV-NEXT: add a4, a4, a5 ; RV64MV-NEXT: li a5, 6 ; RV64MV-NEXT: mul a4, a4, a5 -; RV64MV-NEXT: sub a3, a3, a4 -; RV64MV-NEXT: lui a4, %hi(.LCPI3_2) -; RV64MV-NEXT: ld a4, %lo(.LCPI3_2)(a4) +; RV64MV-NEXT: sub a1, a1, a4 ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64MV-NEXT: vslide1down.vx v8, v8, a1 ; RV64MV-NEXT: vslide1down.vx v8, v8, a3 ; RV64MV-NEXT: vslide1down.vx v8, v8, a2 -; RV64MV-NEXT: mulh a2, a1, a4 -; RV64MV-NEXT: srli a3, a2, 63 -; RV64MV-NEXT: srai a2, a2, 1 -; RV64MV-NEXT: add a2, a2, a3 -; RV64MV-NEXT: slli a3, a2, 2 -; RV64MV-NEXT: add a1, a1, a2 -; RV64MV-NEXT: add a1, a1, a3 -; RV64MV-NEXT: vslide1down.vx v8, v8, a1 ; RV64MV-NEXT: vslidedown.vi v8, v8, 1 ; RV64MV-NEXT: lui a1, %hi(.LCPI3_3) ; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_3)