diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -117,7 +117,8 @@ OPERAND_SIMM12, OPERAND_UIMM20, OPERAND_UIMMLOG2XLEN, - OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN + OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN, + OPERAND_AVL, }; } // namespace RISCVOp diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1302,17 +1302,13 @@ return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF); } -// X0 has special meaning for vsetvl/vsetvli. -// rd | rs1 | AVL value | Effect on vl -//-------------------------------------------------------------- -// !X0 | X0 | VLMAX | Set vl to VLMAX -// X0 | X0 | Value in vl | Keep current vl, just change vtype. +// Select VL as a 5 bit immediate or a value that will become a register. This +// allows us to choose betwen VSETIVLI or VSETVLI later. bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { - // If the VL value is a constant 0, manually select it to an ADDI with 0 - // immediate to prevent the default selection path from matching it to X0. auto *C = dyn_cast(N); - if (C && C->isNullValue()) - VL = SDValue(selectImm(CurDAG, SDLoc(N), 0, Subtarget->getXLenVT()), 0); + if (C && isUInt<5>(C->getZExtValue())) + VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), + N->getValueType(0)); else VL = N; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6035,24 +6035,15 @@ auto BuildVSETVLI = [&]() { if (VLIndex >= 0) { Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - Register VLReg = MI.getOperand(VLIndex).getReg(); + const MachineOperand &VLOp = MI.getOperand(VLIndex); - // VL might be a compile time constant, but isel would have to put it - // in a register. See if VL comes from an ADDI X0, imm. - if (VLReg.isVirtual()) { - MachineInstr *Def = MRI.getVRegDef(VLReg); - if (Def && Def->getOpcode() == RISCV::ADDI && - Def->getOperand(1).getReg() == RISCV::X0 && - Def->getOperand(2).isImm()) { - uint64_t Imm = Def->getOperand(2).getImm(); - // VSETIVLI allows a 5-bit zero extended immediate. - if (isUInt<5>(Imm)) - return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addImm(Imm); - } - } + // VL can be a register or an immediate. + if (VLOp.isImm()) + return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addImm(VLOp.getImm()); + Register VLReg = MI.getOperand(VLIndex).getReg(); return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) .addReg(DestReg, RegState::Define | RegState::Dead) .addReg(VLReg); @@ -6092,7 +6083,7 @@ /*MaskAgnostic*/ false)); // Remove (now) redundant operands from pseudo - if (VLIndex >= 0) { + if (VLIndex >= 0 && MI.getOperand(VLIndex).isReg()) { MI.getOperand(VLIndex).setReg(RISCV::NoRegister); MI.getOperand(VLIndex).setIsKill(false); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -20,6 +20,14 @@ def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB", SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>; +// Operand that is allowed to be a register or a 5 bit immediate. +// This allows us to pick between VSETIVLI and VSETVLI opcodes using the same +// pseudo instructions. +def AVL : RegisterOperand { + let OperandNamespace = "RISCVOp"; + let OperandType = "OPERAND_AVL"; +} + // X0 has special meaning for vsetvl/vsetvli. // rd | rs1 | AVL value | Effect on vl //-------------------------------------------------------------- @@ -115,7 +123,7 @@ "NoDef"))))))); } -def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT GPR:$vl)))>; +def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>; // Output pattern for X0 used to represent VLMAX in the pseudo instructions. def VLMax : OutPatFrag<(ops), (XLenVT X0)>; @@ -603,7 +611,7 @@ class VPseudoUSLoadNoMask EEW, bit isFF> : Pseudo<(outs RetClass:$rd), - (ins GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>, + (ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -621,7 +629,7 @@ Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -638,7 +646,7 @@ class VPseudoSLoadNoMask EEW>: Pseudo<(outs RetClass:$rd), - (ins GPR:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew),[]>, + (ins GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -656,7 +664,7 @@ Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, GPR:$rs2, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -674,7 +682,7 @@ class VPseudoILoadNoMask EEW, bits<3> LMUL, bit Ordered, bit EarlyClobber>: Pseudo<(outs RetClass:$rd), - (ins GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>, + (ins GPR:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLX { let mayLoad = 1; @@ -694,7 +702,7 @@ Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, IdxClass:$rs2, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLX { let mayLoad = 1; @@ -711,7 +719,7 @@ class VPseudoUSStoreNoMask EEW>: Pseudo<(outs), - (ins StClass:$rd, GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -727,7 +735,7 @@ class VPseudoUSStoreMask EEW>: Pseudo<(outs), - (ins StClass:$rd, GPR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -742,7 +750,7 @@ class VPseudoSStoreNoMask EEW>: Pseudo<(outs), - (ins StClass:$rd, GPR:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -758,7 +766,7 @@ class VPseudoSStoreMask EEW>: Pseudo<(outs), - (ins StClass:$rd, GPR:$rs1, GPR:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPR:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -775,7 +783,7 @@ class VPseudoUnaryNoDummyMask : Pseudo<(outs RetClass:$rd), - (ins Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>, + (ins Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -789,7 +797,7 @@ class VPseudoNullaryNoMask: Pseudo<(outs RegClass:$rd), - (ins GPR:$vl, ixlenimm:$sew), + (ins AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -804,7 +812,7 @@ class VPseudoNullaryMask: Pseudo<(outs GetVRegNoV0.R:$rd), - (ins GetVRegNoV0.R:$merge, VMaskOp:$vm, GPR:$vl, + (ins GetVRegNoV0.R:$merge, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -821,7 +829,7 @@ // Nullary for pseudo instructions. They are expanded in // RISCVExpandPseudoInsts pass. class VPseudoNullaryPseudoM - : Pseudo<(outs VR:$rd), (ins GPR:$vl, ixlenimm:$sew), []>, + : Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -838,7 +846,7 @@ // RetClass could be GPR or VReg. class VPseudoUnaryNoMask : Pseudo<(outs RetClass:$rd), - (ins OpClass:$rs2, GPR:$vl, ixlenimm:$sew), []>, + (ins OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -855,7 +863,7 @@ class VPseudoUnaryMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, OpClass:$rs2, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -872,7 +880,7 @@ // mask unary operation without maskedoff class VPseudoMaskUnarySOutMask: Pseudo<(outs GPR:$rd), - (ins VR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -890,7 +898,7 @@ Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, Op1Class:$rs2, - VR:$vm, GPR:$vl, ixlenimm:$sew), + VR:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; @@ -910,7 +918,7 @@ DAGOperand Op2Class, string Constraint> : Pseudo<(outs RetClass:$rd), - (ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>, + (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -927,7 +935,7 @@ class VPseudoIStoreNoMask EEW, bits<3> LMUL, bit Ordered>: Pseudo<(outs), - (ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSX { let mayLoad = 0; @@ -944,7 +952,7 @@ class VPseudoIStoreMask EEW, bits<3> LMUL, bit Ordered>: Pseudo<(outs), - (ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSX { let mayLoad = 0; @@ -964,7 +972,7 @@ Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -986,7 +994,7 @@ Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1008,9 +1016,9 @@ string Constraint> : Pseudo<(outs RetClass:$rd), !if(CarryIn, - (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, GPR:$vl, + (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, ixlenimm:$sew), - (ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew)), []>, + (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1031,7 +1039,7 @@ string Constraint> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - GPR:$vl, ixlenimm:$sew), + AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; @@ -1053,7 +1061,7 @@ (ins GPR:$rs1, Op1Class:$vs2, GetVRegNoV0.R:$vd, - GPR:$vl, ixlenimm:$sew), []>, + AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 1; let mayStore = 1; @@ -1073,7 +1081,7 @@ (ins GPR:$rs1, Op1Class:$vs2, GetVRegNoV0.R:$vd, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 1; let mayStore = 1; @@ -1115,7 +1123,7 @@ class VPseudoUSSegLoadNoMask EEW, bits<4> NF, bit isFF>: Pseudo<(outs RetClass:$rd), - (ins GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>, + (ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1132,7 +1140,7 @@ class VPseudoUSSegLoadMask EEW, bits<4> NF, bit isFF>: Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1149,7 +1157,7 @@ class VPseudoSSegLoadNoMask EEW, bits<4> NF>: Pseudo<(outs RetClass:$rd), - (ins GPR:$rs1, GPR:$offset, GPR:$vl, ixlenimm:$sew),[]>, + (ins GPR:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1167,7 +1175,7 @@ class VPseudoSSegLoadMask EEW, bits<4> NF>: Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, - GPR:$offset, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1185,7 +1193,7 @@ class VPseudoISegLoadNoMask EEW, bits<3> LMUL, bits<4> NF, bit Ordered>: Pseudo<(outs RetClass:$rd), - (ins GPR:$rs1, IdxClass:$offset, GPR:$vl, ixlenimm:$sew),[]>, + (ins GPR:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLXSEG { let mayLoad = 1; @@ -1206,7 +1214,7 @@ bits<4> NF, bit Ordered>: Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, - IdxClass:$offset, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVLXSEG { let mayLoad = 1; @@ -1225,7 +1233,7 @@ class VPseudoUSSegStoreNoMask EEW, bits<4> NF>: Pseudo<(outs), - (ins ValClass:$rd, GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>, + (ins ValClass:$rd, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1242,7 +1250,7 @@ class VPseudoUSSegStoreMask EEW, bits<4> NF>: Pseudo<(outs), (ins ValClass:$rd, GPR:$rs1, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1257,7 +1265,7 @@ class VPseudoSSegStoreNoMask EEW, bits<4> NF>: Pseudo<(outs), - (ins ValClass:$rd, GPR:$rs1, GPR: $offset, GPR:$vl, ixlenimm:$sew),[]>, + (ins ValClass:$rd, GPR:$rs1, GPR: $offset, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1274,7 +1282,7 @@ class VPseudoSSegStoreMask EEW, bits<4> NF>: Pseudo<(outs), (ins ValClass:$rd, GPR:$rs1, GPR: $offset, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1291,7 +1299,7 @@ bits<4> NF, bit Ordered>: Pseudo<(outs), (ins ValClass:$rd, GPR:$rs1, IdxClass: $index, - GPR:$vl, ixlenimm:$sew),[]>, + AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSXSEG { let mayLoad = 0; @@ -1309,7 +1317,7 @@ bits<4> NF, bit Ordered>: Pseudo<(outs), (ins ValClass:$rd, GPR:$rs1, IdxClass: $index, - VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSXSEG { let mayLoad = 0; @@ -3794,7 +3802,7 @@ ForceTailAgnostic = true, Constraints = "$rd = $rs1" in def PseudoVMV_S_X # "_" # m.MX: Pseudo<(outs m.vrclass:$rd), (ins m.vrclass:$rs1, GPR:$rs2, - GPR:$vl, ixlenimm:$sew), + AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo; } } @@ -3822,7 +3830,7 @@ def "PseudoVFMV_S_" # f.FX # "_" # m.MX : Pseudo<(outs m.vrclass:$rd), (ins m.vrclass:$rs1, f.fprclass:$rs2, - GPR:$vl, ixlenimm:$sew), + AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -377,24 +377,24 @@ ; LMULMAX2-RV32-NEXT: addi a3, a3, -256 ; LMULMAX2-RV32-NEXT: vand.vx v27, v27, a3 ; LMULMAX2-RV32-NEXT: vor.vv v26, v27, v26 -; LMULMAX2-RV32-NEXT: addi a4, zero, 5 -; LMULMAX2-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a4 -; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v27, 0 -; LMULMAX2-RV32-NEXT: lui a4, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v27, v27, a4, v0 -; LMULMAX2-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vi v28, v25, 8 -; LMULMAX2-RV32-NEXT: vand.vv v27, v28, v27 -; LMULMAX2-RV32-NEXT: vsrl.vi v28, v25, 24 +; LMULMAX2-RV32-NEXT: vsrl.vi v27, v25, 24 ; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v28, v28, a4 -; LMULMAX2-RV32-NEXT: vor.vv v27, v27, v28 -; LMULMAX2-RV32-NEXT: vor.vv v26, v27, v26 -; LMULMAX2-RV32-NEXT: addi a5, zero, 255 +; LMULMAX2-RV32-NEXT: vand.vx v27, v27, a4 +; LMULMAX2-RV32-NEXT: addi a5, zero, 5 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v27, a5 +; LMULMAX2-RV32-NEXT: vmv.v.i v28, 0 +; LMULMAX2-RV32-NEXT: lui a1, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v28, v28, a1, v0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vi v29, v25, 8 +; LMULMAX2-RV32-NEXT: vand.vv v28, v29, v28 +; LMULMAX2-RV32-NEXT: vor.vv v27, v28, v27 +; LMULMAX2-RV32-NEXT: vor.vv v26, v27, v26 +; LMULMAX2-RV32-NEXT: addi a1, zero, 255 +; LMULMAX2-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vmerge.vim v27, v27, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vsll.vi v28, v25, 8 @@ -406,12 +406,12 @@ ; LMULMAX2-RV32-NEXT: vsll.vi v29, v25, 24 ; LMULMAX2-RV32-NEXT: vand.vv v28, v29, v28 ; LMULMAX2-RV32-NEXT: vor.vv v27, v28, v27 +; LMULMAX2-RV32-NEXT: vsll.vx v28, v25, a2 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a4 -; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v29, a4 +; LMULMAX2-RV32-NEXT: vmerge.vim v29, v29, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vx v29, v25, a2 -; LMULMAX2-RV32-NEXT: vand.vv v28, v29, v28 +; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v29 ; LMULMAX2-RV32-NEXT: vsll.vx v25, v25, a6 ; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v28 ; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v27 @@ -577,24 +577,24 @@ ; LMULMAX1-RV32-NEXT: addi a3, a3, -256 ; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a3 ; LMULMAX1-RV32-NEXT: vor.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: addi a4, zero, 5 -; LMULMAX1-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a4 -; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v27, 0 -; LMULMAX1-RV32-NEXT: lui a4, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v27, v27, a4, v0 -; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vi v28, v25, 8 -; LMULMAX1-RV32-NEXT: vand.vv v27, v28, v27 -; LMULMAX1-RV32-NEXT: vsrl.vi v28, v25, 24 +; LMULMAX1-RV32-NEXT: vsrl.vi v27, v25, 24 ; LMULMAX1-RV32-NEXT: lui a4, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v28, v28, a4 -; LMULMAX1-RV32-NEXT: vor.vv v27, v27, v28 -; LMULMAX1-RV32-NEXT: vor.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: addi a5, zero, 255 +; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a4 +; LMULMAX1-RV32-NEXT: addi a5, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5 ; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v27, a5 +; LMULMAX1-RV32-NEXT: vmv.v.i v28, 0 +; LMULMAX1-RV32-NEXT: lui a1, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v28, v28, a1, v0 +; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vi v29, v25, 8 +; LMULMAX1-RV32-NEXT: vand.vv v28, v29, v28 +; LMULMAX1-RV32-NEXT: vor.vv v27, v28, v27 +; LMULMAX1-RV32-NEXT: vor.vv v26, v27, v26 +; LMULMAX1-RV32-NEXT: addi a1, zero, 255 +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vmerge.vim v27, v27, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsll.vi v28, v25, 8 @@ -606,12 +606,12 @@ ; LMULMAX1-RV32-NEXT: vsll.vi v29, v25, 24 ; LMULMAX1-RV32-NEXT: vand.vv v28, v29, v28 ; LMULMAX1-RV32-NEXT: vor.vv v27, v28, v27 +; LMULMAX1-RV32-NEXT: vsll.vx v28, v25, a2 ; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v28, a4 -; LMULMAX1-RV32-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v29, a4 +; LMULMAX1-RV32-NEXT: vmerge.vim v29, v29, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vx v29, v25, a2 -; LMULMAX1-RV32-NEXT: vand.vv v28, v29, v28 +; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v29 ; LMULMAX1-RV32-NEXT: vsll.vx v25, v25, a6 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v28 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27 @@ -1241,24 +1241,24 @@ ; LMULMAX2-RV32-NEXT: addi a3, a3, -256 ; LMULMAX2-RV32-NEXT: vand.vx v30, v30, a3 ; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28 -; LMULMAX2-RV32-NEXT: addi a4, zero, 85 -; LMULMAX2-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a4 -; LMULMAX2-RV32-NEXT: vsetivli a4, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 -; LMULMAX2-RV32-NEXT: lui a4, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a4, v0 -; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v26, 8 -; LMULMAX2-RV32-NEXT: vand.vv v30, v8, v30 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v26, 24 +; LMULMAX2-RV32-NEXT: vsrl.vi v30, v26, 24 ; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX2-RV32-NEXT: vor.vv v30, v30, v8 -; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28 -; LMULMAX2-RV32-NEXT: addi a5, zero, 255 +; LMULMAX2-RV32-NEXT: vand.vx v30, v30, a4 +; LMULMAX2-RV32-NEXT: addi a5, zero, 85 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 ; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a5 +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV32-NEXT: lui a1, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vi v10, v26, 8 +; LMULMAX2-RV32-NEXT: vand.vv v8, v10, v8 +; LMULMAX2-RV32-NEXT: vor.vv v30, v8, v30 +; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28 +; LMULMAX2-RV32-NEXT: addi a1, zero, 255 +; LMULMAX2-RV32-NEXT: vsetivli a5, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vsll.vi v8, v26, 8 @@ -1270,12 +1270,12 @@ ; LMULMAX2-RV32-NEXT: vsll.vi v10, v26, 24 ; LMULMAX2-RV32-NEXT: vand.vv v8, v10, v8 ; LMULMAX2-RV32-NEXT: vor.vv v30, v8, v30 +; LMULMAX2-RV32-NEXT: vsll.vx v8, v26, a2 ; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v8, a4 -; LMULMAX2-RV32-NEXT: vmerge.vim v8, v8, 0, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a4 +; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vx v10, v26, a2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v10, v8 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsll.vx v26, v26, a6 ; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v8 ; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v30 @@ -1435,14 +1435,17 @@ ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v30, (a1) ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: addi a6, zero, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v30, a6 -; LMULMAX1-RV32-NEXT: addi a7, zero, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v27, v30, a7 +; LMULMAX1-RV32-NEXT: addi a7, zero, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v30, a7 +; LMULMAX1-RV32-NEXT: addi a3, zero, 40 +; LMULMAX1-RV32-NEXT: vsrl.vx v27, v30, a3 ; LMULMAX1-RV32-NEXT: lui a4, 16 ; LMULMAX1-RV32-NEXT: addi a4, a4, -256 ; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a4 ; LMULMAX1-RV32-NEXT: vor.vv v27, v27, v26 +; LMULMAX1-RV32-NEXT: vsrl.vi v26, v30, 24 +; LMULMAX1-RV32-NEXT: lui a6, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v28, v26, a6 ; LMULMAX1-RV32-NEXT: addi a5, zero, 5 ; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5 @@ -1451,15 +1454,12 @@ ; LMULMAX1-RV32-NEXT: lui a2, 1044480 ; LMULMAX1-RV32-NEXT: vmerge.vxm v26, v26, a2, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vi v28, v30, 8 -; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v26 -; LMULMAX1-RV32-NEXT: vsrl.vi v29, v30, 24 -; LMULMAX1-RV32-NEXT: lui a5, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v29, v29, a5 -; LMULMAX1-RV32-NEXT: vor.vv v28, v28, v29 +; LMULMAX1-RV32-NEXT: vsrl.vi v29, v30, 8 +; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v26 +; LMULMAX1-RV32-NEXT: vor.vv v28, v29, v28 ; LMULMAX1-RV32-NEXT: vor.vv v31, v28, v27 ; LMULMAX1-RV32-NEXT: addi a2, zero, 255 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v27, a2 ; LMULMAX1-RV32-NEXT: vmerge.vim v27, v27, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu @@ -1472,26 +1472,26 @@ ; LMULMAX1-RV32-NEXT: vsll.vi v8, v30, 24 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v28 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v29 +; LMULMAX1-RV32-NEXT: vsll.vx v9, v30, a3 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v29, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v29, a6 ; LMULMAX1-RV32-NEXT: vmerge.vim v29, v29, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vx v9, v30, a7 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v29 -; LMULMAX1-RV32-NEXT: vsll.vx v30, v30, a6 +; LMULMAX1-RV32-NEXT: vsll.vx v30, v30, a7 ; LMULMAX1-RV32-NEXT: vor.vv v30, v30, v9 ; LMULMAX1-RV32-NEXT: vor.vv v30, v30, v8 ; LMULMAX1-RV32-NEXT: vor.vv v31, v30, v31 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v30, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vand.vv v8, v31, v30 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV32-NEXT: lui a2, 986895 ; LMULMAX1-RV32-NEXT: addi a2, a2, 240 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v9, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v9 @@ -1499,14 +1499,14 @@ ; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v8 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vand.vv v10, v31, v8 ; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 2 ; LMULMAX1-RV32-NEXT: lui a2, 838861 ; LMULMAX1-RV32-NEXT: addi a2, a2, -820 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v11 @@ -1514,27 +1514,27 @@ ; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v10 ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vand.vv v12, v31, v10 ; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 1 ; LMULMAX1-RV32-NEXT: lui a2, 699051 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1366 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v13 ; LMULMAX1-RV32-NEXT: vsrl.vi v31, v31, 1 ; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v12 -; LMULMAX1-RV32-NEXT: vsrl.vx v12, v25, a6 -; LMULMAX1-RV32-NEXT: vsrl.vx v14, v25, a7 +; LMULMAX1-RV32-NEXT: vsrl.vx v12, v25, a7 +; LMULMAX1-RV32-NEXT: vsrl.vx v14, v25, a3 ; LMULMAX1-RV32-NEXT: vand.vx v14, v14, a4 ; LMULMAX1-RV32-NEXT: vor.vv v12, v14, v12 -; LMULMAX1-RV32-NEXT: vsrl.vi v14, v25, 8 -; LMULMAX1-RV32-NEXT: vand.vv v26, v14, v26 ; LMULMAX1-RV32-NEXT: vsrl.vi v14, v25, 24 -; LMULMAX1-RV32-NEXT: vand.vx v14, v14, a5 +; LMULMAX1-RV32-NEXT: vand.vx v14, v14, a6 +; LMULMAX1-RV32-NEXT: vsrl.vi v15, v25, 8 +; LMULMAX1-RV32-NEXT: vand.vv v26, v15, v26 ; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v14 ; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v12 ; LMULMAX1-RV32-NEXT: vsll.vi v12, v25, 8 @@ -1542,9 +1542,9 @@ ; LMULMAX1-RV32-NEXT: vsll.vi v12, v25, 24 ; LMULMAX1-RV32-NEXT: vand.vv v28, v12, v28 ; LMULMAX1-RV32-NEXT: vor.vv v27, v28, v27 -; LMULMAX1-RV32-NEXT: vsll.vx v28, v25, a7 +; LMULMAX1-RV32-NEXT: vsll.vx v28, v25, a3 ; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v29 -; LMULMAX1-RV32-NEXT: vsll.vx v25, v25, a6 +; LMULMAX1-RV32-NEXT: vsll.vx v25, v25, a7 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v28 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -614,64 +614,64 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX2-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX2-RV64-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 1 +; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV64-NEXT: srli a2, a1, 40 ; LMULMAX2-RV64-NEXT: lui a3, 16 ; LMULMAX2-RV64-NEXT: addiw a7, a3, -256 ; LMULMAX2-RV64-NEXT: and a2, a2, a7 ; LMULMAX2-RV64-NEXT: srli a4, a1, 56 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 +; LMULMAX2-RV64-NEXT: or t0, a2, a4 ; LMULMAX2-RV64-NEXT: srli a4, a1, 24 ; LMULMAX2-RV64-NEXT: lui a6, 4080 ; LMULMAX2-RV64-NEXT: and a4, a4, a6 ; LMULMAX2-RV64-NEXT: srli a5, a1, 8 ; LMULMAX2-RV64-NEXT: addi a3, zero, 255 -; LMULMAX2-RV64-NEXT: slli t0, a3, 24 -; LMULMAX2-RV64-NEXT: and a5, a5, t0 +; LMULMAX2-RV64-NEXT: slli a2, a3, 24 +; LMULMAX2-RV64-NEXT: and a5, a5, a2 ; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: or t1, a4, a2 -; LMULMAX2-RV64-NEXT: slli a4, a1, 8 -; LMULMAX2-RV64-NEXT: slli t2, a3, 32 +; LMULMAX2-RV64-NEXT: or t0, a4, t0 +; LMULMAX2-RV64-NEXT: slli a5, a1, 8 +; LMULMAX2-RV64-NEXT: slli t1, a3, 32 +; LMULMAX2-RV64-NEXT: and a5, a5, t1 +; LMULMAX2-RV64-NEXT: slli a4, a1, 24 +; LMULMAX2-RV64-NEXT: slli t2, a3, 40 ; LMULMAX2-RV64-NEXT: and a4, a4, t2 -; LMULMAX2-RV64-NEXT: slli a2, a1, 24 -; LMULMAX2-RV64-NEXT: slli t3, a3, 40 -; LMULMAX2-RV64-NEXT: and a2, a2, t3 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 -; LMULMAX2-RV64-NEXT: slli a4, a1, 40 +; LMULMAX2-RV64-NEXT: or a4, a4, a5 +; LMULMAX2-RV64-NEXT: slli a5, a1, 40 ; LMULMAX2-RV64-NEXT: slli a3, a3, 48 -; LMULMAX2-RV64-NEXT: and a4, a4, a3 +; LMULMAX2-RV64-NEXT: and a5, a5, a3 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 +; LMULMAX2-RV64-NEXT: or a1, a1, a5 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 -; LMULMAX2-RV64-NEXT: or a1, a1, a2 -; LMULMAX2-RV64-NEXT: or t1, a1, t1 -; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu -; LMULMAX2-RV64-NEXT: vslidedown.vi v25, v25, 1 -; LMULMAX2-RV64-NEXT: vmv.x.s a2, v25 -; LMULMAX2-RV64-NEXT: srli a4, a2, 40 -; LMULMAX2-RV64-NEXT: and a4, a4, a7 -; LMULMAX2-RV64-NEXT: srli a1, a2, 56 -; LMULMAX2-RV64-NEXT: or a1, a4, a1 -; LMULMAX2-RV64-NEXT: srli a4, a2, 24 +; LMULMAX2-RV64-NEXT: or a1, a1, t0 +; LMULMAX2-RV64-NEXT: vsetivli a4, 2, e64,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.x v26, a1 +; LMULMAX2-RV64-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV64-NEXT: srli a4, a1, 24 ; LMULMAX2-RV64-NEXT: and a4, a4, a6 -; LMULMAX2-RV64-NEXT: srli a5, a2, 8 -; LMULMAX2-RV64-NEXT: and a5, a5, t0 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: or a1, a4, a1 -; LMULMAX2-RV64-NEXT: slli a4, a2, 8 -; LMULMAX2-RV64-NEXT: and a4, a4, t2 -; LMULMAX2-RV64-NEXT: slli a5, a2, 24 -; LMULMAX2-RV64-NEXT: and a5, a5, t3 +; LMULMAX2-RV64-NEXT: srli a5, a1, 8 +; LMULMAX2-RV64-NEXT: and a2, a5, a2 +; LMULMAX2-RV64-NEXT: or a2, a2, a4 +; LMULMAX2-RV64-NEXT: srli a4, a1, 40 +; LMULMAX2-RV64-NEXT: and a4, a4, a7 +; LMULMAX2-RV64-NEXT: srli a5, a1, 56 +; LMULMAX2-RV64-NEXT: or a4, a4, a5 +; LMULMAX2-RV64-NEXT: or a2, a2, a4 +; LMULMAX2-RV64-NEXT: slli a4, a1, 8 +; LMULMAX2-RV64-NEXT: and a4, a4, t1 +; LMULMAX2-RV64-NEXT: slli a5, a1, 24 +; LMULMAX2-RV64-NEXT: and a5, a5, t2 ; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: slli a5, a2, 40 +; LMULMAX2-RV64-NEXT: slli a5, a1, 40 ; LMULMAX2-RV64-NEXT: and a3, a5, a3 -; LMULMAX2-RV64-NEXT: slli a2, a2, 56 -; LMULMAX2-RV64-NEXT: or a2, a2, a3 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 -; LMULMAX2-RV64-NEXT: or a1, a2, a1 -; LMULMAX2-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX2-RV64-NEXT: vmv.v.x v25, a1 -; LMULMAX2-RV64-NEXT: vmv.s.x v25, t1 -; LMULMAX2-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX2-RV64-NEXT: slli a1, a1, 56 +; LMULMAX2-RV64-NEXT: or a1, a1, a3 +; LMULMAX2-RV64-NEXT: or a1, a1, a4 +; LMULMAX2-RV64-NEXT: or a1, a1, a2 +; LMULMAX2-RV64-NEXT: vmv.s.x v26, a1 +; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-RV32-LABEL: bswap_v2i64: @@ -743,64 +743,64 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 1 +; LMULMAX1-RV64-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV64-NEXT: srli a2, a1, 40 ; LMULMAX1-RV64-NEXT: lui a3, 16 ; LMULMAX1-RV64-NEXT: addiw a7, a3, -256 ; LMULMAX1-RV64-NEXT: and a2, a2, a7 ; LMULMAX1-RV64-NEXT: srli a4, a1, 56 -; LMULMAX1-RV64-NEXT: or a2, a2, a4 +; LMULMAX1-RV64-NEXT: or t0, a2, a4 ; LMULMAX1-RV64-NEXT: srli a4, a1, 24 ; LMULMAX1-RV64-NEXT: lui a6, 4080 ; LMULMAX1-RV64-NEXT: and a4, a4, a6 ; LMULMAX1-RV64-NEXT: srli a5, a1, 8 ; LMULMAX1-RV64-NEXT: addi a3, zero, 255 -; LMULMAX1-RV64-NEXT: slli t0, a3, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, t0 +; LMULMAX1-RV64-NEXT: slli a2, a3, 24 +; LMULMAX1-RV64-NEXT: and a5, a5, a2 ; LMULMAX1-RV64-NEXT: or a4, a5, a4 -; LMULMAX1-RV64-NEXT: or t1, a4, a2 -; LMULMAX1-RV64-NEXT: slli a4, a1, 8 -; LMULMAX1-RV64-NEXT: slli t2, a3, 32 +; LMULMAX1-RV64-NEXT: or t0, a4, t0 +; LMULMAX1-RV64-NEXT: slli a5, a1, 8 +; LMULMAX1-RV64-NEXT: slli t1, a3, 32 +; LMULMAX1-RV64-NEXT: and a5, a5, t1 +; LMULMAX1-RV64-NEXT: slli a4, a1, 24 +; LMULMAX1-RV64-NEXT: slli t2, a3, 40 ; LMULMAX1-RV64-NEXT: and a4, a4, t2 -; LMULMAX1-RV64-NEXT: slli a2, a1, 24 -; LMULMAX1-RV64-NEXT: slli t3, a3, 40 -; LMULMAX1-RV64-NEXT: and a2, a2, t3 -; LMULMAX1-RV64-NEXT: or a2, a2, a4 -; LMULMAX1-RV64-NEXT: slli a4, a1, 40 +; LMULMAX1-RV64-NEXT: or a4, a4, a5 +; LMULMAX1-RV64-NEXT: slli a5, a1, 40 ; LMULMAX1-RV64-NEXT: slli a3, a3, 48 -; LMULMAX1-RV64-NEXT: and a4, a4, a3 +; LMULMAX1-RV64-NEXT: and a5, a5, a3 ; LMULMAX1-RV64-NEXT: slli a1, a1, 56 +; LMULMAX1-RV64-NEXT: or a1, a1, a5 ; LMULMAX1-RV64-NEXT: or a1, a1, a4 -; LMULMAX1-RV64-NEXT: or a1, a1, a2 -; LMULMAX1-RV64-NEXT: or t1, a1, t1 -; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vslidedown.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vmv.x.s a2, v25 -; LMULMAX1-RV64-NEXT: srli a4, a2, 40 -; LMULMAX1-RV64-NEXT: and a4, a4, a7 -; LMULMAX1-RV64-NEXT: srli a1, a2, 56 -; LMULMAX1-RV64-NEXT: or a1, a4, a1 -; LMULMAX1-RV64-NEXT: srli a4, a2, 24 +; LMULMAX1-RV64-NEXT: or a1, a1, t0 +; LMULMAX1-RV64-NEXT: vsetivli a4, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1 +; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV64-NEXT: srli a4, a1, 24 ; LMULMAX1-RV64-NEXT: and a4, a4, a6 -; LMULMAX1-RV64-NEXT: srli a5, a2, 8 -; LMULMAX1-RV64-NEXT: and a5, a5, t0 -; LMULMAX1-RV64-NEXT: or a4, a5, a4 -; LMULMAX1-RV64-NEXT: or a1, a4, a1 -; LMULMAX1-RV64-NEXT: slli a4, a2, 8 -; LMULMAX1-RV64-NEXT: and a4, a4, t2 -; LMULMAX1-RV64-NEXT: slli a5, a2, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, t3 +; LMULMAX1-RV64-NEXT: srli a5, a1, 8 +; LMULMAX1-RV64-NEXT: and a2, a5, a2 +; LMULMAX1-RV64-NEXT: or a2, a2, a4 +; LMULMAX1-RV64-NEXT: srli a4, a1, 40 +; LMULMAX1-RV64-NEXT: and a4, a4, a7 +; LMULMAX1-RV64-NEXT: srli a5, a1, 56 +; LMULMAX1-RV64-NEXT: or a4, a4, a5 +; LMULMAX1-RV64-NEXT: or a2, a2, a4 +; LMULMAX1-RV64-NEXT: slli a4, a1, 8 +; LMULMAX1-RV64-NEXT: and a4, a4, t1 +; LMULMAX1-RV64-NEXT: slli a5, a1, 24 +; LMULMAX1-RV64-NEXT: and a5, a5, t2 ; LMULMAX1-RV64-NEXT: or a4, a5, a4 -; LMULMAX1-RV64-NEXT: slli a5, a2, 40 +; LMULMAX1-RV64-NEXT: slli a5, a1, 40 ; LMULMAX1-RV64-NEXT: and a3, a5, a3 -; LMULMAX1-RV64-NEXT: slli a2, a2, 56 -; LMULMAX1-RV64-NEXT: or a2, a2, a3 -; LMULMAX1-RV64-NEXT: or a2, a2, a4 -; LMULMAX1-RV64-NEXT: or a1, a2, a1 -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1 -; LMULMAX1-RV64-NEXT: vmv.s.x v25, t1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV64-NEXT: slli a1, a1, 56 +; LMULMAX1-RV64-NEXT: or a1, a1, a3 +; LMULMAX1-RV64-NEXT: or a1, a1, a4 +; LMULMAX1-RV64-NEXT: or a1, a1, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 +; LMULMAX1-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y @@ -1959,108 +1959,108 @@ ; LMULMAX2-RV64-NEXT: andi sp, sp, -32 ; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26 -; LMULMAX2-RV64-NEXT: srli a2, a1, 40 -; LMULMAX2-RV64-NEXT: lui a3, 16 -; LMULMAX2-RV64-NEXT: addiw a7, a3, -256 -; LMULMAX2-RV64-NEXT: and a2, a2, a7 -; LMULMAX2-RV64-NEXT: srli a3, a1, 56 -; LMULMAX2-RV64-NEXT: or a2, a2, a3 -; LMULMAX2-RV64-NEXT: srli a3, a1, 24 +; LMULMAX2-RV64-NEXT: vmv.x.s a2, v26 +; LMULMAX2-RV64-NEXT: srli a1, a2, 24 ; LMULMAX2-RV64-NEXT: lui a6, 4080 -; LMULMAX2-RV64-NEXT: and a4, a3, a6 -; LMULMAX2-RV64-NEXT: srli a5, a1, 8 -; LMULMAX2-RV64-NEXT: addi a3, zero, 255 -; LMULMAX2-RV64-NEXT: slli t0, a3, 24 -; LMULMAX2-RV64-NEXT: and a5, a5, t0 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: or a4, a4, a2 -; LMULMAX2-RV64-NEXT: slli a2, a1, 8 -; LMULMAX2-RV64-NEXT: slli t1, a3, 32 -; LMULMAX2-RV64-NEXT: and a2, a2, t1 -; LMULMAX2-RV64-NEXT: slli a5, a1, 24 -; LMULMAX2-RV64-NEXT: slli t2, a3, 40 -; LMULMAX2-RV64-NEXT: and a5, a5, t2 -; LMULMAX2-RV64-NEXT: or a5, a5, a2 -; LMULMAX2-RV64-NEXT: slli a2, a1, 40 -; LMULMAX2-RV64-NEXT: slli a3, a3, 48 -; LMULMAX2-RV64-NEXT: and a2, a2, a3 -; LMULMAX2-RV64-NEXT: slli a1, a1, 56 -; LMULMAX2-RV64-NEXT: or a1, a1, a2 -; LMULMAX2-RV64-NEXT: or a1, a1, a5 -; LMULMAX2-RV64-NEXT: or a1, a1, a4 +; LMULMAX2-RV64-NEXT: and a1, a1, a6 +; LMULMAX2-RV64-NEXT: srli a3, a2, 8 +; LMULMAX2-RV64-NEXT: addi a5, zero, 255 +; LMULMAX2-RV64-NEXT: slli a7, a5, 24 +; LMULMAX2-RV64-NEXT: and a3, a3, a7 +; LMULMAX2-RV64-NEXT: or a3, a3, a1 +; LMULMAX2-RV64-NEXT: srli a4, a2, 40 +; LMULMAX2-RV64-NEXT: lui a1, 16 +; LMULMAX2-RV64-NEXT: addiw t0, a1, -256 +; LMULMAX2-RV64-NEXT: and a4, a4, t0 +; LMULMAX2-RV64-NEXT: srli a1, a2, 56 +; LMULMAX2-RV64-NEXT: or a1, a4, a1 +; LMULMAX2-RV64-NEXT: or a1, a3, a1 +; LMULMAX2-RV64-NEXT: slli a4, a2, 8 +; LMULMAX2-RV64-NEXT: slli t1, a5, 32 +; LMULMAX2-RV64-NEXT: and a3, a4, t1 +; LMULMAX2-RV64-NEXT: slli a4, a2, 24 +; LMULMAX2-RV64-NEXT: slli t2, a5, 40 +; LMULMAX2-RV64-NEXT: and a4, a4, t2 +; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: slli a4, a2, 40 +; LMULMAX2-RV64-NEXT: slli a5, a5, 48 +; LMULMAX2-RV64-NEXT: and a4, a4, a5 +; LMULMAX2-RV64-NEXT: slli a2, a2, 56 +; LMULMAX2-RV64-NEXT: or a2, a2, a4 +; LMULMAX2-RV64-NEXT: or a2, a2, a3 +; LMULMAX2-RV64-NEXT: or a1, a2, a1 ; LMULMAX2-RV64-NEXT: sd a1, 0(sp) ; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 3 ; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV64-NEXT: srli a2, a1, 40 -; LMULMAX2-RV64-NEXT: and a2, a2, a7 -; LMULMAX2-RV64-NEXT: srli a4, a1, 56 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 -; LMULMAX2-RV64-NEXT: srli a4, a1, 24 -; LMULMAX2-RV64-NEXT: and a4, a4, a6 -; LMULMAX2-RV64-NEXT: srli a5, a1, 8 -; LMULMAX2-RV64-NEXT: and a5, a5, t0 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: or a2, a4, a2 -; LMULMAX2-RV64-NEXT: slli a4, a1, 8 -; LMULMAX2-RV64-NEXT: and a4, a4, t1 -; LMULMAX2-RV64-NEXT: slli a5, a1, 24 -; LMULMAX2-RV64-NEXT: and a5, a5, t2 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: slli a5, a1, 40 -; LMULMAX2-RV64-NEXT: and a5, a5, a3 +; LMULMAX2-RV64-NEXT: and a2, a2, t0 +; LMULMAX2-RV64-NEXT: srli a3, a1, 56 +; LMULMAX2-RV64-NEXT: or a2, a2, a3 +; LMULMAX2-RV64-NEXT: srli a3, a1, 24 +; LMULMAX2-RV64-NEXT: and a3, a3, a6 +; LMULMAX2-RV64-NEXT: srli a4, a1, 8 +; LMULMAX2-RV64-NEXT: and a4, a4, a7 +; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: or a2, a3, a2 +; LMULMAX2-RV64-NEXT: slli a3, a1, 8 +; LMULMAX2-RV64-NEXT: and a3, a3, t1 +; LMULMAX2-RV64-NEXT: slli a4, a1, 24 +; LMULMAX2-RV64-NEXT: and a4, a4, t2 +; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: slli a4, a1, 40 +; LMULMAX2-RV64-NEXT: and a4, a4, a5 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 -; LMULMAX2-RV64-NEXT: or a1, a1, a5 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 +; LMULMAX2-RV64-NEXT: or a1, a1, a3 ; LMULMAX2-RV64-NEXT: or a1, a1, a2 ; LMULMAX2-RV64-NEXT: sd a1, 24(sp) ; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 2 ; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV64-NEXT: srli a2, a1, 40 -; LMULMAX2-RV64-NEXT: and a2, a2, a7 -; LMULMAX2-RV64-NEXT: srli a4, a1, 56 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 -; LMULMAX2-RV64-NEXT: srli a4, a1, 24 -; LMULMAX2-RV64-NEXT: and a4, a4, a6 -; LMULMAX2-RV64-NEXT: srli a5, a1, 8 -; LMULMAX2-RV64-NEXT: and a5, a5, t0 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: or a2, a4, a2 -; LMULMAX2-RV64-NEXT: slli a4, a1, 8 -; LMULMAX2-RV64-NEXT: and a4, a4, t1 -; LMULMAX2-RV64-NEXT: slli a5, a1, 24 -; LMULMAX2-RV64-NEXT: and a5, a5, t2 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: slli a5, a1, 40 -; LMULMAX2-RV64-NEXT: and a5, a5, a3 +; LMULMAX2-RV64-NEXT: and a2, a2, t0 +; LMULMAX2-RV64-NEXT: srli a3, a1, 56 +; LMULMAX2-RV64-NEXT: or a2, a2, a3 +; LMULMAX2-RV64-NEXT: srli a3, a1, 24 +; LMULMAX2-RV64-NEXT: and a3, a3, a6 +; LMULMAX2-RV64-NEXT: srli a4, a1, 8 +; LMULMAX2-RV64-NEXT: and a4, a4, a7 +; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: or a2, a3, a2 +; LMULMAX2-RV64-NEXT: slli a3, a1, 8 +; LMULMAX2-RV64-NEXT: and a3, a3, t1 +; LMULMAX2-RV64-NEXT: slli a4, a1, 24 +; LMULMAX2-RV64-NEXT: and a4, a4, t2 +; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: slli a4, a1, 40 +; LMULMAX2-RV64-NEXT: and a4, a4, a5 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 -; LMULMAX2-RV64-NEXT: or a1, a1, a5 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 +; LMULMAX2-RV64-NEXT: or a1, a1, a3 ; LMULMAX2-RV64-NEXT: or a1, a1, a2 ; LMULMAX2-RV64-NEXT: sd a1, 16(sp) ; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV64-NEXT: srli a2, a1, 40 -; LMULMAX2-RV64-NEXT: and a2, a2, a7 -; LMULMAX2-RV64-NEXT: srli a4, a1, 56 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 -; LMULMAX2-RV64-NEXT: srli a4, a1, 24 -; LMULMAX2-RV64-NEXT: and a4, a4, a6 -; LMULMAX2-RV64-NEXT: srli a5, a1, 8 -; LMULMAX2-RV64-NEXT: and a5, a5, t0 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: or a2, a4, a2 -; LMULMAX2-RV64-NEXT: slli a4, a1, 8 -; LMULMAX2-RV64-NEXT: and a4, a4, t1 -; LMULMAX2-RV64-NEXT: slli a5, a1, 24 -; LMULMAX2-RV64-NEXT: and a5, a5, t2 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: slli a5, a1, 40 -; LMULMAX2-RV64-NEXT: and a3, a5, a3 +; LMULMAX2-RV64-NEXT: and a2, a2, t0 +; LMULMAX2-RV64-NEXT: srli a3, a1, 56 +; LMULMAX2-RV64-NEXT: or a2, a2, a3 +; LMULMAX2-RV64-NEXT: srli a3, a1, 24 +; LMULMAX2-RV64-NEXT: and a3, a3, a6 +; LMULMAX2-RV64-NEXT: srli a4, a1, 8 +; LMULMAX2-RV64-NEXT: and a4, a4, a7 +; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: or a2, a3, a2 +; LMULMAX2-RV64-NEXT: slli a3, a1, 8 +; LMULMAX2-RV64-NEXT: and a3, a3, t1 +; LMULMAX2-RV64-NEXT: slli a4, a1, 24 +; LMULMAX2-RV64-NEXT: and a4, a4, t2 +; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: slli a4, a1, 40 +; LMULMAX2-RV64-NEXT: and a4, a4, a5 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 -; LMULMAX2-RV64-NEXT: or a1, a1, a3 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 +; LMULMAX2-RV64-NEXT: or a1, a1, a3 ; LMULMAX2-RV64-NEXT: or a1, a1, a2 ; LMULMAX2-RV64-NEXT: sd a1, 8(sp) ; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu @@ -2193,8 +2193,10 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: addi a6, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v26, (a6) +; LMULMAX1-RV64-NEXT: vle64.v v27, (a6) ; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v27, 1 ; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26 ; LMULMAX1-RV64-NEXT: srli a1, a2, 40 ; LMULMAX1-RV64-NEXT: lui a3, 16 @@ -2210,49 +2212,49 @@ ; LMULMAX1-RV64-NEXT: slli t1, a4, 24 ; LMULMAX1-RV64-NEXT: and a5, a5, t1 ; LMULMAX1-RV64-NEXT: or a3, a5, a3 -; LMULMAX1-RV64-NEXT: or a5, a3, a1 +; LMULMAX1-RV64-NEXT: or a3, a3, a1 ; LMULMAX1-RV64-NEXT: slli a1, a2, 8 ; LMULMAX1-RV64-NEXT: slli t2, a4, 32 -; LMULMAX1-RV64-NEXT: and a3, a1, t2 -; LMULMAX1-RV64-NEXT: slli a1, a2, 24 +; LMULMAX1-RV64-NEXT: and a1, a1, t2 +; LMULMAX1-RV64-NEXT: slli a5, a2, 24 ; LMULMAX1-RV64-NEXT: slli t3, a4, 40 -; LMULMAX1-RV64-NEXT: and a1, a1, t3 -; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: slli a3, a2, 40 +; LMULMAX1-RV64-NEXT: and a5, a5, t3 +; LMULMAX1-RV64-NEXT: or a5, a5, a1 +; LMULMAX1-RV64-NEXT: slli a1, a2, 40 ; LMULMAX1-RV64-NEXT: slli a4, a4, 48 -; LMULMAX1-RV64-NEXT: and a3, a3, a4 +; LMULMAX1-RV64-NEXT: and a1, a1, a4 ; LMULMAX1-RV64-NEXT: slli a2, a2, 56 -; LMULMAX1-RV64-NEXT: or a2, a2, a3 ; LMULMAX1-RV64-NEXT: or a1, a2, a1 -; LMULMAX1-RV64-NEXT: or t4, a1, a5 -; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v26, 1 -; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26 -; LMULMAX1-RV64-NEXT: srli a3, a2, 40 -; LMULMAX1-RV64-NEXT: and a3, a3, t0 -; LMULMAX1-RV64-NEXT: srli a5, a2, 56 -; LMULMAX1-RV64-NEXT: or a3, a3, a5 -; LMULMAX1-RV64-NEXT: srli a5, a2, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, a7 -; LMULMAX1-RV64-NEXT: srli a1, a2, 8 -; LMULMAX1-RV64-NEXT: and a1, a1, t1 ; LMULMAX1-RV64-NEXT: or a1, a1, a5 ; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: slli a3, a2, 8 +; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1 +; LMULMAX1-RV64-NEXT: vmv.x.s a1, v27 +; LMULMAX1-RV64-NEXT: srli a2, a1, 24 +; LMULMAX1-RV64-NEXT: and a2, a2, a7 +; LMULMAX1-RV64-NEXT: srli a3, a1, 8 +; LMULMAX1-RV64-NEXT: and a3, a3, t1 +; LMULMAX1-RV64-NEXT: or a2, a3, a2 +; LMULMAX1-RV64-NEXT: srli a3, a1, 40 +; LMULMAX1-RV64-NEXT: and a3, a3, t0 +; LMULMAX1-RV64-NEXT: srli a5, a1, 56 +; LMULMAX1-RV64-NEXT: or a3, a3, a5 +; LMULMAX1-RV64-NEXT: or a2, a2, a3 +; LMULMAX1-RV64-NEXT: slli a3, a1, 8 ; LMULMAX1-RV64-NEXT: and a3, a3, t2 -; LMULMAX1-RV64-NEXT: slli a5, a2, 24 +; LMULMAX1-RV64-NEXT: slli a5, a1, 24 ; LMULMAX1-RV64-NEXT: and a5, a5, t3 ; LMULMAX1-RV64-NEXT: or a3, a5, a3 -; LMULMAX1-RV64-NEXT: slli a5, a2, 40 +; LMULMAX1-RV64-NEXT: slli a5, a1, 40 ; LMULMAX1-RV64-NEXT: and a5, a5, a4 -; LMULMAX1-RV64-NEXT: slli a2, a2, 56 -; LMULMAX1-RV64-NEXT: or a2, a2, a5 -; LMULMAX1-RV64-NEXT: or a2, a2, a3 -; LMULMAX1-RV64-NEXT: or a1, a2, a1 -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1 -; LMULMAX1-RV64-NEXT: vmv.s.x v26, t4 -; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV64-NEXT: slli a1, a1, 56 +; LMULMAX1-RV64-NEXT: or a1, a1, a5 +; LMULMAX1-RV64-NEXT: or a1, a1, a3 +; LMULMAX1-RV64-NEXT: or a1, a1, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 +; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v25, 1 +; LMULMAX1-RV64-NEXT: vmv.x.s a1, v27 ; LMULMAX1-RV64-NEXT: srli a2, a1, 40 ; LMULMAX1-RV64-NEXT: and a2, a2, t0 ; LMULMAX1-RV64-NEXT: srli a3, a1, 56 @@ -2273,35 +2275,33 @@ ; LMULMAX1-RV64-NEXT: slli a1, a1, 56 ; LMULMAX1-RV64-NEXT: or a1, a1, a5 ; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: or t4, a1, a2 -; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vslidedown.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vmv.x.s a2, v25 -; LMULMAX1-RV64-NEXT: srli a3, a2, 40 +; LMULMAX1-RV64-NEXT: or a1, a1, a2 +; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.x v27, a1 +; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV64-NEXT: srli a2, a1, 24 +; LMULMAX1-RV64-NEXT: and a2, a2, a7 +; LMULMAX1-RV64-NEXT: srli a3, a1, 8 +; LMULMAX1-RV64-NEXT: and a3, a3, t1 +; LMULMAX1-RV64-NEXT: or a2, a3, a2 +; LMULMAX1-RV64-NEXT: srli a3, a1, 40 ; LMULMAX1-RV64-NEXT: and a3, a3, t0 -; LMULMAX1-RV64-NEXT: srli a5, a2, 56 +; LMULMAX1-RV64-NEXT: srli a5, a1, 56 ; LMULMAX1-RV64-NEXT: or a3, a3, a5 -; LMULMAX1-RV64-NEXT: srli a5, a2, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, a7 -; LMULMAX1-RV64-NEXT: srli a1, a2, 8 -; LMULMAX1-RV64-NEXT: and a1, a1, t1 -; LMULMAX1-RV64-NEXT: or a1, a1, a5 -; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: slli a3, a2, 8 +; LMULMAX1-RV64-NEXT: or a2, a2, a3 +; LMULMAX1-RV64-NEXT: slli a3, a1, 8 ; LMULMAX1-RV64-NEXT: and a3, a3, t2 -; LMULMAX1-RV64-NEXT: slli a5, a2, 24 +; LMULMAX1-RV64-NEXT: slli a5, a1, 24 ; LMULMAX1-RV64-NEXT: and a5, a5, t3 ; LMULMAX1-RV64-NEXT: or a3, a5, a3 -; LMULMAX1-RV64-NEXT: slli a5, a2, 40 +; LMULMAX1-RV64-NEXT: slli a5, a1, 40 ; LMULMAX1-RV64-NEXT: and a4, a5, a4 -; LMULMAX1-RV64-NEXT: slli a2, a2, 56 -; LMULMAX1-RV64-NEXT: or a2, a2, a4 -; LMULMAX1-RV64-NEXT: or a2, a2, a3 -; LMULMAX1-RV64-NEXT: or a1, a2, a1 -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1 -; LMULMAX1-RV64-NEXT: vmv.s.x v25, t4 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV64-NEXT: slli a1, a1, 56 +; LMULMAX1-RV64-NEXT: or a1, a1, a4 +; LMULMAX1-RV64-NEXT: or a1, a1, a3 +; LMULMAX1-RV64-NEXT: or a1, a1, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1 +; LMULMAX1-RV64-NEXT: vse64.v v27, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v26, (a6) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -900,27 +900,27 @@ ; LMULMAX1-NEXT: vle32.v v31, (a1) ; LMULMAX1-NEXT: addi a0, a0, 112 ; LMULMAX1-NEXT: vle32.v v24, (a0) -; LMULMAX1-NEXT: ld a1, 0(s0) -; LMULMAX1-NEXT: addi a0, sp, 240 -; LMULMAX1-NEXT: vse32.v v15, (a0) -; LMULMAX1-NEXT: addi a0, sp, 224 -; LMULMAX1-NEXT: vse32.v v14, (a0) -; LMULMAX1-NEXT: addi a0, sp, 208 -; LMULMAX1-NEXT: vse32.v v13, (a0) -; LMULMAX1-NEXT: addi a0, sp, 192 -; LMULMAX1-NEXT: vse32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, sp, 176 -; LMULMAX1-NEXT: vse32.v v11, (a0) -; LMULMAX1-NEXT: addi a0, sp, 160 -; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, sp, 144 -; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: addi a0, sp, 128 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, zero, 42 -; LMULMAX1-NEXT: sd a0, 8(sp) +; LMULMAX1-NEXT: ld a0, 0(s0) +; LMULMAX1-NEXT: addi a1, sp, 240 +; LMULMAX1-NEXT: vse32.v v15, (a1) +; LMULMAX1-NEXT: addi a1, sp, 224 +; LMULMAX1-NEXT: vse32.v v14, (a1) +; LMULMAX1-NEXT: addi a1, sp, 208 +; LMULMAX1-NEXT: vse32.v v13, (a1) +; LMULMAX1-NEXT: addi a1, sp, 192 +; LMULMAX1-NEXT: vse32.v v12, (a1) +; LMULMAX1-NEXT: addi a1, sp, 176 +; LMULMAX1-NEXT: vse32.v v11, (a1) +; LMULMAX1-NEXT: addi a1, sp, 160 +; LMULMAX1-NEXT: vse32.v v10, (a1) +; LMULMAX1-NEXT: addi a1, sp, 144 +; LMULMAX1-NEXT: vse32.v v9, (a1) +; LMULMAX1-NEXT: addi a1, zero, 42 +; LMULMAX1-NEXT: sd a1, 8(sp) +; LMULMAX1-NEXT: sd a0, 0(sp) ; LMULMAX1-NEXT: addi a0, sp, 128 -; LMULMAX1-NEXT: sd a1, 0(sp) +; LMULMAX1-NEXT: addi a1, sp, 128 +; LMULMAX1-NEXT: vse32.v v8, (a1) ; LMULMAX1-NEXT: vmv1r.v v8, v25 ; LMULMAX1-NEXT: vmv1r.v v9, v26 ; LMULMAX1-NEXT: vmv1r.v v10, v27 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -3657,23 +3657,23 @@ ; LMULMAX2-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: sw zero, 4(sp) -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 +; LMULMAX2-RV32-NEXT: addi a6, zero, 32 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: addi a6, zero, 32 -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX2-RV32-NEXT: addi a2, a2, 257 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_2 +; LMULMAX2-RV32-NEXT: lui a1, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a1, 257 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -3698,8 +3698,8 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB3_3 ; LMULMAX2-RV32-NEXT: .LBB3_2: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -3890,23 +3890,23 @@ ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 +; LMULMAX1-RV32-NEXT: addi a6, zero, 32 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a3, a1, 819 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a7, a1, -241 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a6, zero, 32 -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: addi a2, a2, 257 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_2 +; LMULMAX1-RV32-NEXT: lui a1, 4112 +; LMULMAX1-RV32-NEXT: addi a2, a1, 257 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: srli a1, a5, 1 -; LMULMAX1-RV32-NEXT: or a1, a5, a1 +; LMULMAX1-RV32-NEXT: srli a5, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 4 @@ -3931,8 +3931,8 @@ ; LMULMAX1-RV32-NEXT: addi a5, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB3_3 ; LMULMAX1-RV32-NEXT: .LBB3_2: -; LMULMAX1-RV32-NEXT: srli a5, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a5 +; LMULMAX1-RV32-NEXT: srli a1, a5, 1 +; LMULMAX1-RV32-NEXT: or a1, a5, a1 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 4 @@ -11110,23 +11110,23 @@ ; LMULMAX2-RV32-NEXT: sw zero, 20(sp) ; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: sw zero, 4(sp) -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 +; LMULMAX2-RV32-NEXT: addi a6, zero, 32 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: addi a6, zero, 32 -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 -; LMULMAX2-RV32-NEXT: addi a2, a2, 257 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_2 +; LMULMAX2-RV32-NEXT: lui a1, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a1, 257 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11151,8 +11151,8 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_3 ; LMULMAX2-RV32-NEXT: .LBB7_2: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11529,28 +11529,28 @@ ; LMULMAX1-RV32-NEXT: addi sp, sp, -32 ; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: addi a6, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v26, (a6) -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: sw zero, 28(sp) ; LMULMAX1-RV32-NEXT: sw zero, 20(sp) -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: addi a7, zero, 32 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a5, a2, 1365 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a4, a2, 819 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi t0, a2, -241 -; LMULMAX1-RV32-NEXT: lui a3, 4112 -; LMULMAX1-RV32-NEXT: addi a7, zero, 32 -; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v27 -; LMULMAX1-RV32-NEXT: addi a3, a3, 257 -; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_2 +; LMULMAX1-RV32-NEXT: lui a2, 4112 +; LMULMAX1-RV32-NEXT: addi a3, a2, 257 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: srli a2, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a2 +; LMULMAX1-RV32-NEXT: srli a1, a2, 1 +; LMULMAX1-RV32-NEXT: or a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11575,8 +11575,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_3 ; LMULMAX1-RV32-NEXT: .LBB7_2: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11660,15 +11660,14 @@ ; LMULMAX1-RV32-NEXT: sw a1, 24(sp) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 -; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_8 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 +; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: srli a2, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a2 +; LMULMAX1-RV32-NEXT: srli a1, a2, 1 +; LMULMAX1-RV32-NEXT: or a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11693,8 +11692,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_9 ; LMULMAX1-RV32-NEXT: .LBB7_8: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -2195,10 +2195,10 @@ ; LMULMAX2-RV64-NEXT: .cfi_def_cfa_offset 16 ; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX2-RV64-NEXT: vle32.v v25, (a0) -; LMULMAX2-RV64-NEXT: addi a1, zero, 1 -; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 3 ; LMULMAX2-RV64-NEXT: vmv.x.s a2, v26 +; LMULMAX2-RV64-NEXT: addi a1, zero, 1 ; LMULMAX2-RV64-NEXT: slli a6, a1, 32 ; LMULMAX2-RV64-NEXT: or a2, a2, a6 ; LMULMAX2-RV64-NEXT: addi a3, a2, -1 @@ -2407,10 +2407,10 @@ ; LMULMAX1-RV64-NEXT: .cfi_def_cfa_offset 16 ; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) -; LMULMAX1-RV64-NEXT: addi a1, zero, 1 -; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e32,m1,ta,mu +; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 3 ; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV64-NEXT: addi a1, zero, 1 ; LMULMAX1-RV64-NEXT: slli a6, a1, 32 ; LMULMAX1-RV64-NEXT: or a2, a2, a6 ; LMULMAX1-RV64-NEXT: addi a3, a2, -1 @@ -2537,24 +2537,24 @@ ; LMULMAX2-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: sw zero, 4(sp) -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 +; LMULMAX2-RV32-NEXT: addi a6, zero, 32 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a1, 257 -; LMULMAX2-RV32-NEXT: addi a6, zero, 32 -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2 +; LMULMAX2-RV32-NEXT: lui a2, 4112 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV32-NEXT: addi a2, a2, 257 +; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: addi a5, a1, -1 -; LMULMAX2-RV32-NEXT: not a1, a1 -; LMULMAX2-RV32-NEXT: and a1, a1, a5 +; LMULMAX2-RV32-NEXT: addi a1, a5, -1 +; LMULMAX2-RV32-NEXT: not a5, a5 +; LMULMAX2-RV32-NEXT: and a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -2570,9 +2570,9 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB3_3 ; LMULMAX2-RV32-NEXT: .LBB3_2: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -2718,24 +2718,24 @@ ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 +; LMULMAX1-RV32-NEXT: addi a6, zero, 32 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a3, a1, 819 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a7, a1, -241 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a1, 257 -; LMULMAX1-RV32-NEXT: addi a6, zero, 32 -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2 +; LMULMAX1-RV32-NEXT: lui a2, 4112 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: addi a2, a2, 257 +; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: addi a5, a1, -1 -; LMULMAX1-RV32-NEXT: not a1, a1 -; LMULMAX1-RV32-NEXT: and a1, a1, a5 +; LMULMAX1-RV32-NEXT: addi a1, a5, -1 +; LMULMAX1-RV32-NEXT: not a5, a5 +; LMULMAX1-RV32-NEXT: and a1, a5, a1 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: and a5, a5, a4 ; LMULMAX1-RV32-NEXT: sub a1, a1, a5 @@ -2751,9 +2751,9 @@ ; LMULMAX1-RV32-NEXT: addi a5, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB3_3 ; LMULMAX1-RV32-NEXT: .LBB3_2: -; LMULMAX1-RV32-NEXT: addi a1, a5, -1 -; LMULMAX1-RV32-NEXT: not a5, a5 -; LMULMAX1-RV32-NEXT: and a1, a5, a1 +; LMULMAX1-RV32-NEXT: addi a5, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: and a5, a5, a4 ; LMULMAX1-RV32-NEXT: sub a1, a1, a5 @@ -7060,10 +7060,10 @@ ; LMULMAX2-RV64-NEXT: andi sp, sp, -32 ; LMULMAX2-RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-RV64-NEXT: vle32.v v26, (a0) -; LMULMAX2-RV64-NEXT: addi a1, zero, 1 -; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e32,m2,ta,mu +; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 7 ; LMULMAX2-RV64-NEXT: vmv.x.s a2, v28 +; LMULMAX2-RV64-NEXT: addi a1, zero, 1 ; LMULMAX2-RV64-NEXT: slli a6, a1, 32 ; LMULMAX2-RV64-NEXT: or a2, a2, a6 ; LMULMAX2-RV64-NEXT: addi a3, a2, -1 @@ -7646,24 +7646,24 @@ ; LMULMAX2-RV32-NEXT: sw zero, 20(sp) ; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: sw zero, 4(sp) -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 +; LMULMAX2-RV32-NEXT: addi a6, zero, 32 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a1, 257 -; LMULMAX2-RV32-NEXT: addi a6, zero, 32 -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 +; LMULMAX2-RV32-NEXT: lui a2, 4112 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: addi a2, a2, 257 +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: addi a5, a1, -1 -; LMULMAX2-RV32-NEXT: not a1, a1 -; LMULMAX2-RV32-NEXT: and a1, a1, a5 +; LMULMAX2-RV32-NEXT: addi a1, a5, -1 +; LMULMAX2-RV32-NEXT: not a5, a5 +; LMULMAX2-RV32-NEXT: and a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -7679,9 +7679,9 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_3 ; LMULMAX2-RV32-NEXT: .LBB7_2: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -7961,29 +7961,29 @@ ; LMULMAX1-RV32-NEXT: addi sp, sp, -32 ; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: addi a6, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v26, (a6) -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: sw zero, 28(sp) ; LMULMAX1-RV32-NEXT: sw zero, 20(sp) -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: addi a7, zero, 32 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a5, a2, 1365 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a4, a2, 819 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi t0, a2, -241 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a3, a2, 257 -; LMULMAX1-RV32-NEXT: addi a7, zero, 32 -; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v27 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 +; LMULMAX1-RV32-NEXT: lui a3, 4112 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV32-NEXT: addi a3, a3, 257 +; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -7999,9 +7999,9 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_3 ; LMULMAX1-RV32-NEXT: .LBB7_2: -; LMULMAX1-RV32-NEXT: addi a2, a1, -1 -; LMULMAX1-RV32-NEXT: not a1, a1 -; LMULMAX1-RV32-NEXT: and a1, a1, a2 +; LMULMAX1-RV32-NEXT: addi a1, a2, -1 +; LMULMAX1-RV32-NEXT: not a2, a2 +; LMULMAX1-RV32-NEXT: and a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8060,10 +8060,9 @@ ; LMULMAX1-RV32-NEXT: sw a1, 24(sp) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -153,12 +153,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf8 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf8 v9, v25 +; LMULMAX1-NEXT: vsext.vf8 v9, v26 +; LMULMAX1-NEXT: vsext.vf8 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v4i8_v4i64: @@ -178,12 +177,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf8 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf8 v9, v25 +; LMULMAX1-NEXT: vzext.vf8 v9, v26 +; LMULMAX1-NEXT: vzext.vf8 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v4i8_v4i64: @@ -229,12 +227,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: vsext.vf4 v9, v26 +; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i8_v8i32: @@ -254,12 +251,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: vzext.vf4 v9, v26 +; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i8_v8i32: @@ -279,8 +275,6 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf8 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -290,9 +284,10 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vsext.vf8 v11, v26 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf8 v9, v25 +; LMULMAX1-NEXT: vsext.vf8 v9, v26 +; LMULMAX1-NEXT: vsext.vf8 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i8_v8i64: @@ -312,8 +307,6 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf8 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -323,9 +316,10 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vzext.vf8 v11, v26 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf8 v9, v25 +; LMULMAX1-NEXT: vzext.vf8 v9, v26 +; LMULMAX1-NEXT: vzext.vf8 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i8_v8i64: @@ -345,12 +339,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 ; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v9, v25 +; LMULMAX1-NEXT: vsext.vf2 v9, v26 +; LMULMAX1-NEXT: vsext.vf2 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i8_v16i16: @@ -370,12 +363,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 ; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v9, v25 +; LMULMAX1-NEXT: vzext.vf2 v9, v26 +; LMULMAX1-NEXT: vzext.vf2 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i8_v16i16: @@ -395,8 +387,6 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu @@ -406,9 +396,10 @@ ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vsext.vf4 v11, v26 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: vsext.vf4 v9, v26 +; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i8_v16i32: @@ -428,8 +419,6 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu @@ -439,9 +428,10 @@ ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vzext.vf4 v11, v26 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: vzext.vf4 v9, v26 +; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i8_v16i32: @@ -461,8 +451,6 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf8 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -480,29 +468,29 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vsext.vf8 v9, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf8 v10, v25 +; LMULMAX1-NEXT: vsext.vf8 v10, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf8 v11, v25 +; LMULMAX1-NEXT: vsext.vf8 v11, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf8 v13, v25 +; LMULMAX1-NEXT: vsext.vf8 v13, v26 +; LMULMAX1-NEXT: vsext.vf8 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i8_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX4-NEXT: vle8.v v25, (a0) -; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vsext.vf8 v8, v25 ; LMULMAX4-NEXT: vsetivli a0, 8, e8,m1,ta,mu -; LMULMAX4-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX4-NEXT: vslidedown.vi v26, v25, 8 ; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vsext.vf8 v12, v25 +; LMULMAX4-NEXT: vsext.vf8 v12, v26 +; LMULMAX4-NEXT: vsext.vf8 v8, v25 ; LMULMAX4-NEXT: ret %y = load <16 x i8>, <16 x i8>* %x %z = sext <16 x i8> %y to <16 x i64> @@ -514,8 +502,6 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf8 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -533,29 +519,29 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vzext.vf8 v9, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf8 v10, v25 +; LMULMAX1-NEXT: vzext.vf8 v10, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf8 v11, v25 +; LMULMAX1-NEXT: vzext.vf8 v11, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf8 v13, v25 +; LMULMAX1-NEXT: vzext.vf8 v13, v26 +; LMULMAX1-NEXT: vzext.vf8 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i8_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX4-NEXT: vle8.v v25, (a0) -; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vzext.vf8 v8, v25 ; LMULMAX4-NEXT: vsetivli a0, 8, e8,m1,ta,mu -; LMULMAX4-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX4-NEXT: vslidedown.vi v26, v25, 8 ; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vzext.vf8 v12, v25 +; LMULMAX4-NEXT: vzext.vf8 v12, v26 +; LMULMAX4-NEXT: vzext.vf8 v8, v25 ; LMULMAX4-NEXT: ret %y = load <16 x i8>, <16 x i8>* %x %z = zext <16 x i8> %y to <16 x i64> @@ -692,12 +678,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu ; LMULMAX1-NEXT: vle16.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: vsext.vf4 v9, v26 +; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v4i16_v4i64: @@ -717,12 +702,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu ; LMULMAX1-NEXT: vle16.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: vzext.vf4 v9, v26 +; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v4i16_v4i64: @@ -755,12 +739,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; LMULMAX1-NEXT: vle16.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v9, v25 +; LMULMAX1-NEXT: vsext.vf2 v9, v26 +; LMULMAX1-NEXT: vsext.vf2 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i16_v8i32: @@ -780,12 +763,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; LMULMAX1-NEXT: vle16.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v9, v25 +; LMULMAX1-NEXT: vzext.vf2 v9, v26 +; LMULMAX1-NEXT: vzext.vf2 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i16_v8i32: @@ -805,8 +787,6 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; LMULMAX1-NEXT: vle16.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -816,9 +796,10 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vsext.vf4 v11, v26 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: vsext.vf4 v9, v26 +; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i16_v8i64: @@ -838,8 +819,6 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; LMULMAX1-NEXT: vle16.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -849,9 +828,10 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vzext.vf4 v11, v26 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: vzext.vf4 v9, v26 +; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i16_v8i64: @@ -901,17 +881,16 @@ ; LMULMAX1-NEXT: vle16.v v25, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle16.v v26, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v9, v25 -; LMULMAX1-NEXT: vsext.vf2 v10, v26 +; LMULMAX1-NEXT: vsext.vf2 v9, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v11, v25 +; LMULMAX1-NEXT: vsext.vf2 v11, v27 +; LMULMAX1-NEXT: vsext.vf2 v8, v25 +; LMULMAX1-NEXT: vsext.vf2 v10, v26 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i16_v16i32: @@ -933,17 +912,16 @@ ; LMULMAX1-NEXT: vle16.v v25, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle16.v v26, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v9, v25 -; LMULMAX1-NEXT: vzext.vf2 v10, v26 +; LMULMAX1-NEXT: vzext.vf2 v9, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v11, v25 +; LMULMAX1-NEXT: vzext.vf2 v11, v27 +; LMULMAX1-NEXT: vzext.vf2 v8, v25 +; LMULMAX1-NEXT: vzext.vf2 v10, v26 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i16_v16i32: @@ -965,8 +943,6 @@ ; LMULMAX1-NEXT: vle16.v v25, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle16.v v26, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -975,7 +951,6 @@ ; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vsext.vf4 v11, v27 -; LMULMAX1-NEXT: vsext.vf4 v12, v26 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -985,25 +960,26 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vsext.vf4 v15, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: vsext.vf4 v9, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v13, v25 +; LMULMAX1-NEXT: vsext.vf4 v13, v27 +; LMULMAX1-NEXT: vsext.vf4 v8, v25 +; LMULMAX1-NEXT: vsext.vf4 v12, v26 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i16_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu ; LMULMAX4-NEXT: vle16.v v26, (a0) -; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vsext.vf4 v8, v26 ; LMULMAX4-NEXT: vsetivli a0, 8, e16,m2,ta,mu -; LMULMAX4-NEXT: vslidedown.vi v26, v26, 8 +; LMULMAX4-NEXT: vslidedown.vi v28, v26, 8 ; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vsext.vf4 v12, v26 +; LMULMAX4-NEXT: vsext.vf4 v12, v28 +; LMULMAX4-NEXT: vsext.vf4 v8, v26 ; LMULMAX4-NEXT: ret %y = load <16 x i16>, <16 x i16>* %x %z = sext <16 x i16> %y to <16 x i64> @@ -1017,8 +993,6 @@ ; LMULMAX1-NEXT: vle16.v v25, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle16.v v26, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -1027,7 +1001,6 @@ ; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vzext.vf4 v11, v27 -; LMULMAX1-NEXT: vzext.vf4 v12, v26 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu @@ -1037,25 +1010,26 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vzext.vf4 v15, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: vzext.vf4 v9, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v13, v25 +; LMULMAX1-NEXT: vzext.vf4 v13, v27 +; LMULMAX1-NEXT: vzext.vf4 v8, v25 +; LMULMAX1-NEXT: vzext.vf4 v12, v26 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i16_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu ; LMULMAX4-NEXT: vle16.v v26, (a0) -; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vzext.vf4 v8, v26 ; LMULMAX4-NEXT: vsetivli a0, 8, e16,m2,ta,mu -; LMULMAX4-NEXT: vslidedown.vi v26, v26, 8 +; LMULMAX4-NEXT: vslidedown.vi v28, v26, 8 ; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vzext.vf4 v12, v26 +; LMULMAX4-NEXT: vzext.vf4 v12, v28 +; LMULMAX4-NEXT: vzext.vf4 v8, v26 ; LMULMAX4-NEXT: ret %y = load <16 x i16>, <16 x i16>* %x %z = zext <16 x i16> %y to <16 x i64> @@ -1149,12 +1123,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vle32.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v9, v25 +; LMULMAX1-NEXT: vsext.vf2 v9, v26 +; LMULMAX1-NEXT: vsext.vf2 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v4i32_v4i64: @@ -1174,12 +1147,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vle32.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v9, v25 +; LMULMAX1-NEXT: vzext.vf2 v9, v26 +; LMULMAX1-NEXT: vzext.vf2 v8, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v4i32_v4i64: @@ -1264,17 +1236,16 @@ ; LMULMAX1-NEXT: vle32.v v25, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle32.v v26, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v9, v25 -; LMULMAX1-NEXT: vsext.vf2 v10, v26 +; LMULMAX1-NEXT: vsext.vf2 v9, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v11, v25 +; LMULMAX1-NEXT: vsext.vf2 v11, v27 +; LMULMAX1-NEXT: vsext.vf2 v8, v25 +; LMULMAX1-NEXT: vsext.vf2 v10, v26 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v8i32_v8i64: @@ -1296,17 +1267,16 @@ ; LMULMAX1-NEXT: vle32.v v25, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle32.v v26, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v8, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v9, v25 -; LMULMAX1-NEXT: vzext.vf2 v10, v26 +; LMULMAX1-NEXT: vzext.vf2 v9, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v11, v25 +; LMULMAX1-NEXT: vzext.vf2 v11, v27 +; LMULMAX1-NEXT: vzext.vf2 v8, v25 +; LMULMAX1-NEXT: vzext.vf2 v10, v26 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v8i32_v8i64: @@ -1424,39 +1394,37 @@ ; LMULMAX1-NEXT: vle32.v v27, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle32.v v28, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v8, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vslidedown.vi v29, v27, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v9, v27 -; LMULMAX1-NEXT: vsext.vf2 v10, v28 +; LMULMAX1-NEXT: vsext.vf2 v9, v29 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v27, v28, 2 +; LMULMAX1-NEXT: vslidedown.vi v29, v28, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v11, v27 -; LMULMAX1-NEXT: vsext.vf2 v12, v26 +; LMULMAX1-NEXT: vsext.vf2 v11, v29 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v29, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v13, v26 -; LMULMAX1-NEXT: vsext.vf2 v14, v25 +; LMULMAX1-NEXT: vsext.vf2 v13, v29 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v29, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf2 v15, v25 +; LMULMAX1-NEXT: vsext.vf2 v15, v29 +; LMULMAX1-NEXT: vsext.vf2 v8, v27 +; LMULMAX1-NEXT: vsext.vf2 v10, v28 +; LMULMAX1-NEXT: vsext.vf2 v12, v26 +; LMULMAX1-NEXT: vsext.vf2 v14, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: sextload_v16i32_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu ; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vsext.vf2 v8, v28 ; LMULMAX4-NEXT: vsetivli a0, 8, e32,m4,ta,mu -; LMULMAX4-NEXT: vslidedown.vi v28, v28, 8 +; LMULMAX4-NEXT: vslidedown.vi v8, v28, 8 ; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vsext.vf2 v12, v28 +; LMULMAX4-NEXT: vsext.vf2 v12, v8 +; LMULMAX4-NEXT: vsext.vf2 v8, v28 ; LMULMAX4-NEXT: ret %y = load <16 x i32>, <16 x i32>* %x %z = sext <16 x i32> %y to <16 x i64> @@ -1474,39 +1442,37 @@ ; LMULMAX1-NEXT: vle32.v v27, (a0) ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle32.v v28, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v8, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vslidedown.vi v29, v27, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v9, v27 -; LMULMAX1-NEXT: vzext.vf2 v10, v28 +; LMULMAX1-NEXT: vzext.vf2 v9, v29 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v27, v28, 2 +; LMULMAX1-NEXT: vslidedown.vi v29, v28, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v11, v27 -; LMULMAX1-NEXT: vzext.vf2 v12, v26 +; LMULMAX1-NEXT: vzext.vf2 v11, v29 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v29, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v13, v26 -; LMULMAX1-NEXT: vzext.vf2 v14, v25 +; LMULMAX1-NEXT: vzext.vf2 v13, v29 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v29, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf2 v15, v25 +; LMULMAX1-NEXT: vzext.vf2 v15, v29 +; LMULMAX1-NEXT: vzext.vf2 v8, v27 +; LMULMAX1-NEXT: vzext.vf2 v10, v28 +; LMULMAX1-NEXT: vzext.vf2 v12, v26 +; LMULMAX1-NEXT: vzext.vf2 v14, v25 ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: zextload_v16i32_v16i64: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu ; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vzext.vf2 v8, v28 ; LMULMAX4-NEXT: vsetivli a0, 8, e32,m4,ta,mu -; LMULMAX4-NEXT: vslidedown.vi v28, v28, 8 +; LMULMAX4-NEXT: vslidedown.vi v8, v28, 8 ; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; LMULMAX4-NEXT: vzext.vf2 v12, v28 +; LMULMAX4-NEXT: vzext.vf2 v12, v8 +; LMULMAX4-NEXT: vzext.vf2 v8, v28 ; LMULMAX4-NEXT: ret %y = load <16 x i32>, <16 x i32>* %x %z = zext <16 x i32> %y to <16 x i64> @@ -2013,19 +1979,19 @@ ; LMULMAX4-LABEL: truncstore_v16i64_v16i16: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vnsrl.wi v26, v12, 0 ; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; LMULMAX4-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v30, v26, 0 ; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu ; LMULMAX4-NEXT: vmv.v.i v26, 0 ; LMULMAX4-NEXT: vsetivli a1, 8, e16,m2,tu,mu -; LMULMAX4-NEXT: vslideup.vi v26, v28, 0 -; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0 -; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX4-NEXT: vnsrl.wi v30, v28, 0 +; LMULMAX4-NEXT: vslideup.vi v26, v30, 0 ; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,tu,mu -; LMULMAX4-NEXT: vslideup.vi v26, v30, 8 +; LMULMAX4-NEXT: vslideup.vi v26, v28, 8 ; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu ; LMULMAX4-NEXT: vse16.v v26, (a0) ; LMULMAX4-NEXT: ret @@ -2087,13 +2053,12 @@ ; LMULMAX4-LABEL: truncstore_v16i64_v16i32: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX4-NEXT: vnsrl.wi v28, v8, 0 +; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0 +; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 ; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu ; LMULMAX4-NEXT: vmv.v.i v8, 0 ; LMULMAX4-NEXT: vsetivli a1, 8, e32,m4,tu,mu -; LMULMAX4-NEXT: vslideup.vi v8, v28, 0 -; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0 +; LMULMAX4-NEXT: vslideup.vi v8, v12, 0 ; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,tu,mu ; LMULMAX4-NEXT: vslideup.vi v8, v28, 8 ; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -76,11 +76,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v2i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -52,12 +52,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu ; LMULMAX1-NEXT: vle16.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e16,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25 +; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v26 +; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v25 ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vse32.v v27, (a0) @@ -91,28 +90,28 @@ ; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v26 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v27 -; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu -; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25 -; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu -; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25 +; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.f.f.v v29, v27 +; LMULMAX1-NEXT: vfwcvt.f.f.v v29, v28 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v28 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu -; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25 +; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.f.f.v v25, v27 +; LMULMAX1-NEXT: vfwcvt.f.f.v v25, v28 ; LMULMAX1-NEXT: addi a0, a1, 48 ; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: vse64.v v27, (a0) ; LMULMAX1-NEXT: addi a0, a1, 32 ; LMULMAX1-NEXT: vse64.v v29, (a0) -; LMULMAX1-NEXT: vse64.v v28, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vse64.v v26, (a0) ; LMULMAX1-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll @@ -1528,10 +1528,10 @@ ; CHECK-NEXT: vsetivli a2, 4, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v25, (a0) ; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vv v27, v25, v25 -; CHECK-NEXT: vmfeq.vf v25, v26, fa0 +; CHECK-NEXT: vmfeq.vf v27, v26, fa0 +; CHECK-NEXT: vmfeq.vv v26, v25, v25 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v0, v25, v27 +; CHECK-NEXT: vmand.mm v0, v27, v26 ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 ; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu @@ -1556,10 +1556,10 @@ ; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v25, (a0) ; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vv v27, v25, v25 -; CHECK-NEXT: vmfne.vf v25, v26, fa0 +; CHECK-NEXT: vmfne.vf v27, v26, fa0 +; CHECK-NEXT: vmfne.vv v26, v25, v25 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v0, v25, v27 +; CHECK-NEXT: vmor.mm v0, v27, v26 ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 ; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -111,21 +111,21 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: addi a1, zero, 8 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vsetivli a0, 4, e16,m1,tu,mu -; RV32-NEXT: vslideup.vi v28, v25, 3 ; RV32-NEXT: lui a0, %hi(.LCPI6_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0) ; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu ; RV32-NEXT: vle16.v v25, (a0) ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vrgatherei16.vv v26, v8, v25 +; RV32-NEXT: addi a0, zero, 8 +; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: addi a0, zero, 1 +; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v25, a0 +; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vsetivli a0, 4, e16,m1,tu,mu +; RV32-NEXT: vslideup.vi v28, v25, 3 ; RV32-NEXT: vsetivli a0, 4, e64,m2,tu,mu ; RV32-NEXT: vrgatherei16.vv v26, v10, v28, v0.t ; RV32-NEXT: vmv2r.v v8, v26 @@ -139,14 +139,14 @@ ; RV64-NEXT: vmv.v.i v28, 0 ; RV64-NEXT: vsetivli a0, 4, e64,m2,tu,mu ; RV64-NEXT: vslideup.vi v28, v26, 3 -; RV64-NEXT: addi a0, zero, 8 -; RV64-NEXT: vsetivli a1, 1, e8,m1,ta,mu -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, %hi(.LCPI6_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0) ; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; RV64-NEXT: vle64.v v30, (a0) ; RV64-NEXT: vrgather.vv v26, v8, v30 +; RV64-NEXT: addi a0, zero, 8 +; RV64-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetivli a0, 4, e64,m2,tu,mu ; RV64-NEXT: vrgather.vv v26, v10, v28, v0.t ; RV64-NEXT: vmv2r.v v8, v26 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -160,8 +160,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x half> undef, half 0.0, i32 0 @@ -182,8 +182,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x float> undef, float 0.0, i32 0 @@ -204,8 +204,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse64.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <4 x double> undef, double 0.0, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -134,17 +134,16 @@ ; LMULMAX1-NEXT: addi a2, a0, 16 ; LMULMAX1-NEXT: vle32.v v25, (a2) ; LMULMAX1-NEXT: vle32.v v26, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v27, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v28, v25 -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v25, v26 +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v28, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v29, v26 +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v29, v27 +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v27, v25 +; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v25, v26 ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vse64.v v29, (a0) @@ -176,17 +175,16 @@ ; LMULMAX1-NEXT: addi a2, a0, 16 ; LMULMAX1-NEXT: vle32.v v25, (a2) ; LMULMAX1-NEXT: vle32.v v26, (a0) -; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v27, v25 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v28, v25 -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v25, v26 +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v28, v27 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v29, v26 +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v29, v27 +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v27, v25 +; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v25, v26 ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vse64.v v29, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -138,23 +138,23 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vsext.vf4 v27, v26 ; LMULMAX1-NEXT: vfcvt.f.x.v v26, v27 -; LMULMAX1-NEXT: vsext.vf4 v27, v25 -; LMULMAX1-NEXT: vfcvt.f.x.v v27, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v28, v25 +; LMULMAX1-NEXT: vsext.vf4 v28, v27 ; LMULMAX1-NEXT: vfcvt.f.x.v v28, v28 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v29, v27 +; LMULMAX1-NEXT: vfcvt.f.x.v v27, v29 ; LMULMAX1-NEXT: vsext.vf4 v29, v25 ; LMULMAX1-NEXT: vfcvt.f.x.v v25, v29 ; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: vse64.v v27, (a0) ; LMULMAX1-NEXT: addi a0, a1, 32 ; LMULMAX1-NEXT: vse64.v v28, (a0) -; LMULMAX1-NEXT: vse64.v v27, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vse64.v v26, (a0) ; LMULMAX1-NEXT: ret @@ -184,23 +184,23 @@ ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vzext.vf4 v27, v26 ; LMULMAX1-NEXT: vfcvt.f.xu.v v26, v27 -; LMULMAX1-NEXT: vzext.vf4 v27, v25 -; LMULMAX1-NEXT: vfcvt.f.xu.v v27, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; LMULMAX1-NEXT: vzext.vf4 v28, v25 +; LMULMAX1-NEXT: vzext.vf4 v28, v27 ; LMULMAX1-NEXT: vfcvt.f.xu.v v28, v28 ; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 ; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v29, v27 +; LMULMAX1-NEXT: vfcvt.f.xu.v v27, v29 ; LMULMAX1-NEXT: vzext.vf4 v29, v25 ; LMULMAX1-NEXT: vfcvt.f.xu.v v25, v29 ; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: vse64.v v27, (a0) ; LMULMAX1-NEXT: addi a0, a1, 32 ; LMULMAX1-NEXT: vse64.v v28, (a0) -; LMULMAX1-NEXT: vse64.v v27, (a1) +; LMULMAX1-NEXT: vse64.v v25, (a1) ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vse64.v v26, (a0) ; LMULMAX1-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -48,10 +48,10 @@ ; RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu ; RV32-NEXT: vmv.v.i v28, 0 ; RV32-NEXT: vsetivli a3, 2, e64,m2,tu,mu -; RV32-NEXT: vslideup.vi v28, v26, 0 ; RV32-NEXT: lw a3, 20(a0) -; RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu ; RV32-NEXT: lw a4, 16(a0) +; RV32-NEXT: vslideup.vi v28, v26, 0 +; RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu ; RV32-NEXT: vmv.v.x v26, a3 ; RV32-NEXT: vmv.s.x v26, a4 ; RV32-NEXT: vsetivli a3, 4, e64,m2,tu,mu @@ -62,10 +62,10 @@ ; RV32-NEXT: vslide1up.vx v26, v30, a1 ; RV32-NEXT: vsetivli a3, 3, e64,m2,tu,mu ; RV32-NEXT: vslideup.vi v28, v26, 2 -; RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; RV32-NEXT: vse64.v v28, (a0) ; RV32-NEXT: sw a1, 16(a0) ; RV32-NEXT: sw a2, 20(a0) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vse64.v v28, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v3i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -265,6 +265,8 @@ define void @buildvec_seq_v9i8(<9 x i8>* %x) { ; RV32-LABEL: buildvec_seq_v9i8: ; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 3 +; RV32-NEXT: sb a1, 8(a0) ; RV32-NEXT: addi a1, zero, 73 ; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu ; RV32-NEXT: vmv.s.x v0, a1 @@ -277,8 +279,6 @@ ; RV32-NEXT: vsetivli a1, 8, e8,m1,ta,mu ; RV32-NEXT: vmerge.vim v25, v25, 3, v0 ; RV32-NEXT: vse8.v v25, (a0) -; RV32-NEXT: addi a1, zero, 3 -; RV32-NEXT: sb a1, 8(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_seq_v9i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll @@ -59,12 +59,11 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a2, 8, e8,m1,ta,mu ; LMULMAX1-NEXT: vle8.v v25, (a0) -; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v26, v25 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v27, v25 +; LMULMAX1-NEXT: vsext.vf4 v27, v26 +; LMULMAX1-NEXT: vsext.vf4 v26, v25 ; LMULMAX1-NEXT: addi a0, a1, 16 ; LMULMAX1-NEXT: vse32.v v27, (a0) ; LMULMAX1-NEXT: vse32.v v26, (a1) @@ -126,24 +125,24 @@ ; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vsext.vf4 v29, v27 -; LMULMAX1-NEXT: vsext.vf4 v27, v25 ; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 8 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v30, v25 +; LMULMAX1-NEXT: vsext.vf4 v30, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v31, v25 -; LMULMAX1-NEXT: vsext.vf4 v25, v26 +; LMULMAX1-NEXT: vsext.vf4 v31, v27 ; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v26, v26, 8 +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 8 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v8, v26 +; LMULMAX1-NEXT: vsext.vf4 v8, v27 ; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; LMULMAX1-NEXT: vslidedown.vi v26, v26, 4 +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 4 ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-NEXT: vsext.vf4 v9, v26 +; LMULMAX1-NEXT: vsext.vf4 v9, v27 +; LMULMAX1-NEXT: vsext.vf4 v27, v25 +; LMULMAX1-NEXT: vsext.vf4 v25, v26 ; LMULMAX1-NEXT: addi a0, a1, 48 ; LMULMAX1-NEXT: vse32.v v9, (a0) ; LMULMAX1-NEXT: addi a0, a1, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -91,14 +91,14 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e16,m1,tu,mu ; CHECK-NEXT: vslideup.vi v26, v25, 3 -; CHECK-NEXT: addi a0, zero, 8 -; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v27, (a0) ; CHECK-NEXT: vrgather.vv v25, v8, v27 +; CHECK-NEXT: addi a0, zero, 8 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli a0, 4, e16,m1,tu,mu ; CHECK-NEXT: vrgather.vv v25, v9, v26, v0.t ; CHECK-NEXT: vmv1r.v v8, v25 @@ -211,15 +211,15 @@ ; RV32-NEXT: vmerge.vim v26, v26, 2, v0 ; RV32-NEXT: vsetivli a0, 8, e16,m1,tu,mu ; RV32-NEXT: vslideup.vi v26, v25, 7 -; RV32-NEXT: addi a0, zero, 164 -; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) ; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vle16.v v25, (a0) ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vrgatherei16.vv v28, v8, v25 +; RV32-NEXT: addi a0, zero, 164 +; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli a0, 8, e64,m4,tu,mu ; RV32-NEXT: vrgatherei16.vv v28, v12, v26, v0.t ; RV32-NEXT: vmv4r.v v8, v28 @@ -238,14 +238,14 @@ ; RV64-NEXT: vmerge.vim v16, v16, 2, v0 ; RV64-NEXT: vsetivli a0, 8, e64,m4,tu,mu ; RV64-NEXT: vslideup.vi v16, v28, 7 -; RV64-NEXT: addi a0, zero, 164 -; RV64-NEXT: vsetivli a1, 1, e8,m1,ta,mu -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, %hi(.LCPI11_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0) ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vle64.v v20, (a0) ; RV64-NEXT: vrgather.vv v28, v8, v20 +; RV64-NEXT: addi a0, zero, 164 +; RV64-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetivli a0, 8, e64,m4,tu,mu ; RV64-NEXT: vrgather.vv v28, v12, v16, v0.t ; RV64-NEXT: vmv4r.v v8, v28 @@ -267,9 +267,6 @@ ; RV32-NEXT: vslideup.vi v27, v26, 5 ; RV32-NEXT: vsetivli a0, 7, e16,m1,tu,mu ; RV32-NEXT: vslideup.vi v27, v25, 6 -; RV32-NEXT: addi a0, zero, 113 -; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) ; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu @@ -278,6 +275,9 @@ ; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vrgatherei16.vv v28, v12, v25 +; RV32-NEXT: addi a0, zero, 113 +; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli a0, 8, e64,m4,tu,mu ; RV32-NEXT: vrgatherei16.vv v28, v8, v27, v0.t ; RV32-NEXT: vmv4r.v v8, v28 @@ -311,24 +311,24 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) { ; RV32-LABEL: vrgather_shuffle_vx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 140 -; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI13_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) ; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: vmv4r.v v28, v8 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; RV32-NEXT: vrgatherei16.vv v28, v8, v25 +; RV32-NEXT: vrgatherei16.vv v8, v28, v25 +; RV32-NEXT: addi a0, zero, 140 +; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI13_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) ; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vle16.v v25, (a0) ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 5 +; RV32-NEXT: vmv.v.i v28, 5 ; RV32-NEXT: vsetivli a0, 8, e64,m4,tu,mu -; RV32-NEXT: vrgatherei16.vv v28, v8, v25, v0.t -; RV32-NEXT: vmv4r.v v8, v28 +; RV32-NEXT: vrgatherei16.vv v8, v28, v25, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vx_v8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -366,8 +366,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse8.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <32 x i8> undef, i8 0, i32 0 @@ -395,8 +395,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x i16> undef, i16 0, i32 0 @@ -424,8 +424,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x i32> undef, i32 0, i32 0 @@ -453,8 +453,8 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.i v25, 0 -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v25, (a1) +; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -476,8 +476,8 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.v.i v25, 0 -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a0, a0, 16 ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i64> undef, i64 0, i32 0 @@ -594,8 +594,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse8.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <32 x i8> undef, i8 -1, i32 0 @@ -623,8 +623,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x i16> undef, i16 -1, i32 0 @@ -652,8 +652,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v25, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: vse32.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v25, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x i32> undef, i32 -1, i32 0 @@ -681,8 +681,8 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.i v25, -1 -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v25, (a1) +; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -704,8 +704,8 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmv.v.i v25, -1 -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a0, a0, 16 ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i64> undef, i64 -1, i32 0 @@ -798,13 +798,14 @@ ; LMULMAX8-RV32: # %bb.0: ; LMULMAX8-RV32-NEXT: vsetivli a4, 16, e64,m8,ta,mu ; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: lui a0, 349525 -; LMULMAX8-RV32-NEXT: addi a0, a0, 1365 -; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e32,m1,ta,mu -; LMULMAX8-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX8-RV32-NEXT: addi a0, zero, 32 -; LMULMAX8-RV32-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; LMULMAX8-RV32-NEXT: vsetvli a4, a0, e32,m8,ta,mu ; LMULMAX8-RV32-NEXT: vmv.v.x v16, a2 +; LMULMAX8-RV32-NEXT: lui a2, 349525 +; LMULMAX8-RV32-NEXT: addi a2, a2, 1365 +; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e32,m1,ta,mu +; LMULMAX8-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX8-RV32-NEXT: vsetvli a0, a0, e32,m8,ta,mu ; LMULMAX8-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 ; LMULMAX8-RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1220,18 +1220,18 @@ ; RV32-NEXT: vmv.s.x v26, a1 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vmulh.vv v26, v25, v26 -; RV32-NEXT: addi a1, zero, 1 -; RV32-NEXT: addi a2, zero, 3 -; RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a2 -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: addi a1, zero, 3 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; RV32-NEXT: vmv.v.i v27, -1 ; RV32-NEXT: vmerge.vim v27, v27, 0, v0 -; RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vmul.vv v25, v25, v27 ; RV32-NEXT: vadd.vv v25, v26, v25 -; RV32-NEXT: addi a2, zero, 63 -; RV32-NEXT: vsrl.vx v26, v25, a2 +; RV32-NEXT: addi a1, zero, 63 +; RV32-NEXT: vsrl.vx v26, v25, a1 +; RV32-NEXT: addi a1, zero, 1 ; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; RV32-NEXT: vmv.s.x v27, a1 ; RV32-NEXT: vmv.v.i v28, 0 @@ -3994,6 +3994,28 @@ ; LMULMAX2-RV32-NEXT: addi a1, zero, 32 ; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu ; LMULMAX2-RV32-NEXT: vle8.v v26, (a0) +; LMULMAX2-RV32-NEXT: lui a2, 66049 +; LMULMAX2-RV32-NEXT: addi a2, a2, 32 +; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI129_0) +; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI129_0) +; LMULMAX2-RV32-NEXT: vle8.v v28, (a2) +; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v30, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v8, v26, v8 +; LMULMAX2-RV32-NEXT: vmulhu.vv v28, v8, v28 +; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: lui a2, 163907 +; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 +; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: addi a2, zero, -128 +; LMULMAX2-RV32-NEXT: vsetvli a3, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a2, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a2, 8208 ; LMULMAX2-RV32-NEXT: addi a2, a2, 513 ; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu @@ -4011,30 +4033,8 @@ ; LMULMAX2-RV32-NEXT: addi a2, a2, 304 ; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 2, v0 -; LMULMAX2-RV32-NEXT: lui a2, 163907 -; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 -; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 -; LMULMAX2-RV32-NEXT: addi a2, zero, -128 -; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v30, a2, v0 -; LMULMAX2-RV32-NEXT: lui a2, 66049 -; LMULMAX2-RV32-NEXT: addi a2, a2, 32 -; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli a1, a1, e8,m2,ta,mu -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI129_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI129_0) -; LMULMAX2-RV32-NEXT: vle8.v v10, (a1) -; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v30, v26, v30 -; LMULMAX2-RV32-NEXT: vmulhu.vv v30, v30, v10 -; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v8 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 2, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: vse8.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -4044,6 +4044,28 @@ ; LMULMAX2-RV64-NEXT: addi a1, zero, 32 ; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu ; LMULMAX2-RV64-NEXT: vle8.v v26, (a0) +; LMULMAX2-RV64-NEXT: lui a2, 66049 +; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 +; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI129_0) +; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI129_0) +; LMULMAX2-RV64-NEXT: vle8.v v28, (a2) +; LMULMAX2-RV64-NEXT: vmv.v.i v30, 0 +; LMULMAX2-RV64-NEXT: vmerge.vim v8, v30, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v8, v26, v8 +; LMULMAX2-RV64-NEXT: vmulhu.vv v28, v8, v28 +; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: lui a2, 163907 +; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 +; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: addi a2, zero, -128 +; LMULMAX2-RV64-NEXT: vsetvli a3, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmerge.vxm v30, v30, a2, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: lui a2, 8208 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 ; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu @@ -4061,30 +4083,8 @@ ; LMULMAX2-RV64-NEXT: addiw a2, a2, 304 ; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu -; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 2, v0 -; LMULMAX2-RV64-NEXT: lui a2, 163907 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 -; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu -; LMULMAX2-RV64-NEXT: vmv.v.i v30, 0 -; LMULMAX2-RV64-NEXT: addi a2, zero, -128 -; LMULMAX2-RV64-NEXT: vmerge.vxm v8, v30, a2, v0 -; LMULMAX2-RV64-NEXT: lui a2, 66049 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 -; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli a1, a1, e8,m2,ta,mu -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI129_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI129_0) -; LMULMAX2-RV64-NEXT: vle8.v v10, (a1) -; LMULMAX2-RV64-NEXT: vmerge.vim v30, v30, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v30, v26, v30 -; LMULMAX2-RV64-NEXT: vmulhu.vv v30, v30, v10 -; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v30 -; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v8 -; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 2, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: vse8.v v26, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -4371,65 +4371,65 @@ ; ; LMULMAX1-RV64-LABEL: mulhu_v4i64: ; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: addi a1, zero, 2 -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v26, (a1) ; LMULMAX1-RV64-NEXT: vmv.v.i v27, 0 -; LMULMAX1-RV64-NEXT: addi a3, zero, -1 -; LMULMAX1-RV64-NEXT: slli a3, a3, 63 -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a3 -; LMULMAX1-RV64-NEXT: lui a3, 1044935 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 455 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 455 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 455 -; LMULMAX1-RV64-NEXT: slli a3, a3, 13 -; LMULMAX1-RV64-NEXT: addi a3, a3, 911 -; LMULMAX1-RV64-NEXT: vmv.v.x v28, a3 -; LMULMAX1-RV64-NEXT: lui a3, 4681 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 585 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 585 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 585 -; LMULMAX1-RV64-NEXT: slli a3, a3, 13 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1171 -; LMULMAX1-RV64-NEXT: vmv.s.x v28, a3 +; LMULMAX1-RV64-NEXT: addi a2, zero, -1 +; LMULMAX1-RV64-NEXT: slli a2, a2, 63 +; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 +; LMULMAX1-RV64-NEXT: lui a2, 1044935 +; LMULMAX1-RV64-NEXT: addiw a2, a2, 455 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, 455 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, 455 +; LMULMAX1-RV64-NEXT: slli a2, a2, 13 +; LMULMAX1-RV64-NEXT: addi a2, a2, 911 +; LMULMAX1-RV64-NEXT: vmv.v.x v28, a2 +; LMULMAX1-RV64-NEXT: lui a2, 4681 +; LMULMAX1-RV64-NEXT: addiw a2, a2, 585 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, 585 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, 585 +; LMULMAX1-RV64-NEXT: slli a2, a2, 13 +; LMULMAX1-RV64-NEXT: addi a2, a2, 1171 +; LMULMAX1-RV64-NEXT: vmv.s.x v28, a2 ; LMULMAX1-RV64-NEXT: vmulhu.vv v28, v26, v28 ; LMULMAX1-RV64-NEXT: vsub.vv v26, v26, v28 ; LMULMAX1-RV64-NEXT: vmulhu.vv v26, v26, v27 ; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 ; LMULMAX1-RV64-NEXT: vmv.v.i v27, 3 -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1 +; LMULMAX1-RV64-NEXT: addi a2, zero, 2 +; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 ; LMULMAX1-RV64-NEXT: vsrl.vv v26, v26, v27 ; LMULMAX1-RV64-NEXT: vmv.v.i v27, 2 -; LMULMAX1-RV64-NEXT: addi a1, zero, 1 -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1 -; LMULMAX1-RV64-NEXT: lui a1, 1035469 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -819 -; LMULMAX1-RV64-NEXT: vmv.v.x v28, a1 -; LMULMAX1-RV64-NEXT: lui a1, 1026731 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: vmv.s.x v28, a1 +; LMULMAX1-RV64-NEXT: addi a2, zero, 1 +; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 +; LMULMAX1-RV64-NEXT: lui a2, 1035469 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, -819 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, -819 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, -819 +; LMULMAX1-RV64-NEXT: vmv.v.x v28, a2 +; LMULMAX1-RV64-NEXT: lui a2, 1026731 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 +; LMULMAX1-RV64-NEXT: slli a2, a2, 12 +; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 +; LMULMAX1-RV64-NEXT: vmv.s.x v28, a2 ; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v28 ; LMULMAX1-RV64-NEXT: vsrl.vv v25, v25, v27 ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV64-NEXT: vse64.v v26, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = udiv <4 x i64> %a, @@ -4443,18 +4443,18 @@ ; LMULMAX2-RV32-NEXT: addi a1, zero, 32 ; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu ; LMULMAX2-RV32-NEXT: vle8.v v26, (a0) +; LMULMAX2-RV32-NEXT: addi a2, zero, -123 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a2 ; LMULMAX2-RV32-NEXT: lui a2, 304453 ; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 ; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: addi a2, zero, 57 ; LMULMAX2-RV32-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v28, v28, a2, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: vmv.v.i v28, 7 ; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0 -; LMULMAX2-RV32-NEXT: addi a1, zero, -123 -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 -; LMULMAX2-RV32-NEXT: addi a1, zero, 57 -; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a1, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v30 ; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: vse8.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -4464,18 +4464,18 @@ ; LMULMAX2-RV64-NEXT: addi a1, zero, 32 ; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu ; LMULMAX2-RV64-NEXT: vle8.v v26, (a0) +; LMULMAX2-RV64-NEXT: addi a2, zero, -123 +; LMULMAX2-RV64-NEXT: vmv.v.x v28, a2 ; LMULMAX2-RV64-NEXT: lui a2, 304453 ; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 ; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: addi a2, zero, 57 ; LMULMAX2-RV64-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmerge.vxm v28, v28, a2, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: vmv.v.i v28, 7 ; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 1, v0 -; LMULMAX2-RV64-NEXT: addi a1, zero, -123 -; LMULMAX2-RV64-NEXT: vmv.v.x v30, a1 -; LMULMAX2-RV64-NEXT: addi a1, zero, 57 -; LMULMAX2-RV64-NEXT: vmerge.vxm v30, v30, a1, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v30 ; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: vse8.v v26, (a0) ; LMULMAX2-RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -94,19 +94,19 @@ ; ; LMULMAX1-RV32-LABEL: splat_zeros_v32i1: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a1, a0, 2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmclr.m v25 -; LMULMAX1-RV32-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV32-NEXT: vse1.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a0, a0, 2 ; LMULMAX1-RV32-NEXT: vse1.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: splat_zeros_v32i1: ; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: addi a1, a0, 2 -; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmclr.m v25 -; LMULMAX1-RV64-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV64-NEXT: vse1.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a0, a0, 2 ; LMULMAX1-RV64-NEXT: vse1.v v25, (a0) ; LMULMAX1-RV64-NEXT: ret store <32 x i1> zeroinitializer, <32 x i1>* %x @@ -126,27 +126,27 @@ ; ; LMULMAX1-RV32-LABEL: splat_ones_v64i1: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a1, a0, 6 -; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmset.m v25 +; LMULMAX1-RV32-NEXT: vse1.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a1, a0, 6 ; LMULMAX1-RV32-NEXT: vse1.v v25, (a1) ; LMULMAX1-RV32-NEXT: addi a1, a0, 4 ; LMULMAX1-RV32-NEXT: vse1.v v25, (a1) -; LMULMAX1-RV32-NEXT: addi a1, a0, 2 -; LMULMAX1-RV32-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV32-NEXT: addi a0, a0, 2 ; LMULMAX1-RV32-NEXT: vse1.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: splat_ones_v64i1: ; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: addi a1, a0, 6 -; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; LMULMAX1-RV64-NEXT: vmset.m v25 +; LMULMAX1-RV64-NEXT: vse1.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a1, a0, 6 ; LMULMAX1-RV64-NEXT: vse1.v v25, (a1) ; LMULMAX1-RV64-NEXT: addi a1, a0, 4 ; LMULMAX1-RV64-NEXT: vse1.v v25, (a1) -; LMULMAX1-RV64-NEXT: addi a1, a0, 2 -; LMULMAX1-RV64-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV64-NEXT: addi a0, a0, 2 ; LMULMAX1-RV64-NEXT: vse1.v v25, (a0) ; LMULMAX1-RV64-NEXT: ret store <64 x i1> , <64 x i1>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2181,11 +2181,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v25, v0 ; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v8, 16 -; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu -; RV64-NEXT: vsext.vf8 v16, v26 -; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 16 +; RV64-NEXT: vslidedown.vi v28, v8, 16 +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v28 ; RV64-NEXT: vsetivli a1, 2, e8,m1,ta,mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli a1, 16, e8,m1,tu,mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1903,15 +1903,14 @@ ; RV64-NEXT: vsetivli a1, 8, e8,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 16 -; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu -; RV64-NEXT: vsext.vf8 v16, v26 -; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 16 +; RV64-NEXT: vslidedown.vi v28, v10, 16 +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v8, v28 ; RV64-NEXT: vsetivli a1, 2, e8,m1,ta,mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli a1, 8, e8,m1,ta,mu -; RV64-NEXT: vsoxei64.v v26, (a0), v16, v0.t +; RV64-NEXT: vsoxei64.v v26, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> %val, <32 x i8*> %ptrs, i32 1, <32 x i1> %m) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -459,10 +459,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_add_v1i64: @@ -1165,10 +1165,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_and_v1i64: @@ -1871,10 +1871,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_or_v1i64: @@ -2577,10 +2577,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_xor_v1i64: @@ -3505,10 +3505,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v1i64: @@ -3705,7 +3705,6 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: addi a1, zero, 32 @@ -3717,6 +3716,7 @@ ; RV32-NEXT: vsll.vx v26, v26, a1 ; RV32-NEXT: vor.vv v25, v25, v26 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: vredmin.vs v25, v8, v25 ; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 @@ -3751,34 +3751,46 @@ define i64 @vreduce_smin_v64i64(<64 x i64>* %x) nounwind { ; RV32-LABEL: vreduce_smin_v64i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: addi a2, a0, 384 -; RV32-NEXT: vle64.v v16, (a2) +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 +; RV32-NEXT: vle64.v v24, (a1) ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: vle64.v v0, (a1) -; RV32-NEXT: vmin.vv v16, v24, v16 -; RV32-NEXT: vmin.vv v8, v8, v0 -; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsll.vx v25, v25, a1 -; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.v.i v8, -1 +; RV32-NEXT: vsll.vx v8, v8, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.v.x v26, a0 -; RV32-NEXT: vsll.vx v26, v26, a1 -; RV32-NEXT: vor.vv v25, v25, v26 +; RV32-NEXT: vmv.v.x v9, a0 +; RV32-NEXT: vsll.vx v9, v9, a1 +; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu -; RV32-NEXT: vredmin.vs v25, v8, v25 +; RV32-NEXT: vmin.vv v24, v0, v24 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmin.vv v16, v0, v16 +; RV32-NEXT: vmin.vv v16, v16, v24 +; RV32-NEXT: vredmin.vs v25, v16, v8 ; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu ; RV32-NEXT: vsrl.vx v25, v25, a1 ; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v64i64: @@ -4286,10 +4298,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v1i64: @@ -4482,7 +4494,6 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmax.vv v8, v8, v16 ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.v.x v25, a0 @@ -4493,6 +4504,7 @@ ; RV32-NEXT: vsrl.vx v26, v26, a1 ; RV32-NEXT: vor.vv v25, v26, v25 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vmax.vv v8, v8, v16 ; RV32-NEXT: vredmax.vs v25, v8, v25 ; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 @@ -4527,33 +4539,45 @@ define i64 @vreduce_smax_v64i64(<64 x i64>* %x) nounwind { ; RV32-LABEL: vreduce_smax_v64i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: addi a2, a0, 384 -; RV32-NEXT: vle64.v v16, (a2) +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 +; RV32-NEXT: vle64.v v24, (a1) ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: vle64.v v0, (a1) -; RV32-NEXT: vmax.vv v16, v24, v16 -; RV32-NEXT: vmax.vv v8, v8, v0 -; RV32-NEXT: vmax.vv v8, v8, v16 +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsll.vx v25, v25, a1 -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vsll.vx v26, v26, a1 -; RV32-NEXT: vsrl.vx v26, v26, a1 -; RV32-NEXT: vor.vv v25, v26, v25 +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: vsll.vx v8, v8, a1 +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vsll.vx v9, v9, a1 +; RV32-NEXT: vsrl.vx v9, v9, a1 +; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu -; RV32-NEXT: vredmax.vs v25, v8, v25 +; RV32-NEXT: vmax.vv v24, v0, v24 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmax.vv v16, v0, v16 +; RV32-NEXT: vmax.vv v16, v16, v24 +; RV32-NEXT: vredmax.vs v25, v16, v8 ; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu ; RV32-NEXT: vsrl.vx v25, v25, a1 ; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v64i64: @@ -5040,10 +5064,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umin_v1i64: @@ -5746,10 +5770,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsrl.vx v26, v25, a0 +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsrl.vx v25, v25, a1 -; RV32-NEXT: vmv.x.s a1, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umax_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll @@ -199,9 +199,9 @@ define <4 x i64> @stepvector_v4i64() { ; LMULMAX1-LABEL: stepvector_v4i64: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, zero, 2 -; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vmv.v.i v9, 3 +; LMULMAX1-NEXT: addi a0, zero, 2 ; LMULMAX1-NEXT: vmv.s.x v9, a0 ; LMULMAX1-NEXT: vid.v v8 ; LMULMAX1-NEXT: ret @@ -220,16 +220,16 @@ define <8 x i64> @stepvector_v8i64() { ; LMULMAX1-LABEL: stepvector_v8i64: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v9, 3 ; LMULMAX1-NEXT: addi a0, zero, 2 -; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vmv.s.x v9, a0 ; LMULMAX1-NEXT: vmv.v.i v10, 5 -; LMULMAX1-NEXT: addi a1, zero, 4 -; LMULMAX1-NEXT: vmv.s.x v10, a1 +; LMULMAX1-NEXT: addi a0, zero, 4 +; LMULMAX1-NEXT: vmv.s.x v10, a0 ; LMULMAX1-NEXT: vmv.v.i v11, 7 -; LMULMAX1-NEXT: addi a1, zero, 6 -; LMULMAX1-NEXT: vmv.s.x v11, a1 -; LMULMAX1-NEXT: vmv.v.i v9, 3 -; LMULMAX1-NEXT: vmv.s.x v9, a0 +; LMULMAX1-NEXT: addi a0, zero, 6 +; LMULMAX1-NEXT: vmv.s.x v11, a0 ; LMULMAX1-NEXT: vid.v v8 ; LMULMAX1-NEXT: ret ; @@ -250,28 +250,28 @@ define <16 x i64> @stepvector_v16i64() { ; LMULMAX1-LABEL: stepvector_v16i64: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v9, 3 ; LMULMAX1-NEXT: addi a0, zero, 2 -; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vmv.s.x v9, a0 ; LMULMAX1-NEXT: vmv.v.i v10, 5 -; LMULMAX1-NEXT: addi a1, zero, 4 -; LMULMAX1-NEXT: vmv.s.x v10, a1 +; LMULMAX1-NEXT: addi a0, zero, 4 +; LMULMAX1-NEXT: vmv.s.x v10, a0 ; LMULMAX1-NEXT: vmv.v.i v11, 7 -; LMULMAX1-NEXT: addi a1, zero, 6 -; LMULMAX1-NEXT: vmv.s.x v11, a1 +; LMULMAX1-NEXT: addi a0, zero, 6 +; LMULMAX1-NEXT: vmv.s.x v11, a0 ; LMULMAX1-NEXT: vmv.v.i v12, 9 -; LMULMAX1-NEXT: addi a1, zero, 8 -; LMULMAX1-NEXT: vmv.s.x v12, a1 +; LMULMAX1-NEXT: addi a0, zero, 8 +; LMULMAX1-NEXT: vmv.s.x v12, a0 ; LMULMAX1-NEXT: vmv.v.i v13, 11 -; LMULMAX1-NEXT: addi a1, zero, 10 -; LMULMAX1-NEXT: vmv.s.x v13, a1 +; LMULMAX1-NEXT: addi a0, zero, 10 +; LMULMAX1-NEXT: vmv.s.x v13, a0 ; LMULMAX1-NEXT: vmv.v.i v14, 13 -; LMULMAX1-NEXT: addi a1, zero, 12 -; LMULMAX1-NEXT: vmv.s.x v14, a1 +; LMULMAX1-NEXT: addi a0, zero, 12 +; LMULMAX1-NEXT: vmv.s.x v14, a0 ; LMULMAX1-NEXT: vmv.v.i v15, 15 -; LMULMAX1-NEXT: addi a1, zero, 14 -; LMULMAX1-NEXT: vmv.s.x v15, a1 -; LMULMAX1-NEXT: vmv.v.i v9, 3 -; LMULMAX1-NEXT: vmv.s.x v9, a0 +; LMULMAX1-NEXT: addi a0, zero, 14 +; LMULMAX1-NEXT: vmv.s.x v15, a0 ; LMULMAX1-NEXT: vid.v v8 ; LMULMAX1-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll --- a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll @@ -15,9 +15,8 @@ ; CHECK: bb.0.entry: ; CHECK: liveins: $v8 ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8 - ; CHECK: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; CHECK: dead %3:gpr = PseudoVSETIVLI 1, 88, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, $noreg, 64, implicit $vl, implicit $vtype + ; CHECK: dead %2:gpr = PseudoVSETIVLI 1, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 64, implicit $vl, implicit $vtype ; CHECK: [[LD:%[0-9]+]]:gpr = LD %stack.0.a, 0 :: (dereferenceable load 8 from %ir.a) ; CHECK: $x10 = COPY [[LD]] ; CHECK: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -576,8 +576,8 @@ ; RV32MV-NEXT: vsub.vv v25, v25, v27 ; RV32MV-NEXT: vmul.vv v25, v25, v26 ; RV32MV-NEXT: vsll.vi v26, v25, 1 -; RV32MV-NEXT: vmv.v.i v27, 10 ; RV32MV-NEXT: addi a1, zero, 9 +; RV32MV-NEXT: vmv.v.i v27, 10 ; RV32MV-NEXT: vmv.s.x v27, a1 ; RV32MV-NEXT: vsll.vv v26, v26, v27 ; RV32MV-NEXT: addi a1, zero, 2047 @@ -637,8 +637,8 @@ ; RV64MV-NEXT: vsub.vv v25, v25, v27 ; RV64MV-NEXT: vmul.vv v25, v25, v26 ; RV64MV-NEXT: vsll.vi v26, v25, 1 -; RV64MV-NEXT: vmv.v.i v27, 10 ; RV64MV-NEXT: addi a1, zero, 9 +; RV64MV-NEXT: vmv.v.i v27, 10 ; RV64MV-NEXT: vmv.s.x v27, a1 ; RV64MV-NEXT: vsll.vv v26, v26, v27 ; RV64MV-NEXT: addi a1, zero, 2047