diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -733,6 +733,17 @@ return NewInfo; } +// Return a VSETVLIInfo representing the VSETVLIInfo of PseudoReadVL. +// Although PseudoReadVL does not change VL/VTYPE, its operands could provide +// the value of VL/VTYPE at the location of it. +static VSETVLIInfo getInfoForReadVL(const MachineInstr &ReadVL) { + assert(ReadVL.getOpcode() == RISCV::PseudoReadVL); + VSETVLIInfo NewInfo; + NewInfo.setAVLReg(ReadVL.getOperand(0).getReg()); + NewInfo.setVTYPE(ReadVL.getOperand(1).getImm()); + return NewInfo; +} + bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo) { if (CurInfo.isCompatible(Require, /*Strict*/ false)) @@ -965,6 +976,11 @@ continue; } + if (MI.getOpcode() == RISCV::PseudoReadVL) { + BBInfo.Change = getInfoForReadVL(MI); + continue; + } + uint64_t TSFlags = MI.getDesc().TSFlags; if (RISCVII::hasSEWOp(TSFlags)) { HadVectorOp = true; @@ -1076,14 +1092,18 @@ !PBBInfo.Exit.hasCompatibleVTYPE(Require, /*Strict*/ false)) return true; - // We need the PHI input to the be the output of a VSET(I)VLI. + // We need the PHI input to the be the output of a VSET(I)VLI or + // PseudoReadVL. MachineInstr *DefMI = MRI->getVRegDef(InReg); - if (!DefMI || !isVectorConfigInstr(*DefMI)) + if (!DefMI || (!isVectorConfigInstr(*DefMI) && + DefMI->getOpcode() != RISCV::PseudoReadVL)) return true; - // We found a VSET(I)VLI make sure it matches the output of the - // predecessor block. - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + // We found a VSET(I)VLI or PseudoReadVL make sure it matches the output of + // the predecessor block. + VSETVLIInfo DefInfo = (DefMI->getOpcode() == RISCV::PseudoReadVL) + ? getInfoForReadVL(*DefMI) + : getInfoForVSETVLI(*DefMI); if (!DefInfo.hasSameAVL(PBBInfo.Exit) || !DefInfo.hasSameVTYPE(PBBInfo.Exit)) return true; @@ -1113,6 +1133,11 @@ continue; } + if (MI.getOpcode() == RISCV::PseudoReadVL) { + CurInfo = getInfoForReadVL(MI); + continue; + } + uint64_t TSFlags = MI.getDesc().TSFlags; if (RISCVII::hasSEWOp(TSFlags)) { VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); @@ -1271,13 +1296,19 @@ // with X0 to reduce register pressure. This is really a generic // optimization which can be applied to any dead def (TODO: generalize). for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { + for (auto I = MBB.begin(), E = MBB.end(); I != E;) { + MachineInstr &MI = *I++; if (MI.getOpcode() == RISCV::PseudoVSETVLI || MI.getOpcode() == RISCV::PseudoVSETIVLI) { Register VRegDef = MI.getOperand(0).getReg(); if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) MI.getOperand(0).setReg(RISCV::X0); } + + // PseudoReadVL MIs may be dead after emitVSETVLIs. + if (MI.getOpcode() == RISCV::PseudoReadVL && + MRI->use_nodbg_empty(MI.getOperand(0).getReg())) + MI.eraseFromParent(); } } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-modify-vl.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-modify-vl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-modify-vl.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs -< %s | FileCheck %s + +declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) +declare { , i64 } @llvm.riscv.vleff.nxv32i8.i64(, * nocapture, i64) +declare @llvm.riscv.vmseq.nxv32i8.i8.i64(, i8, i64) +declare @llvm.riscv.vadd.nxv32i8.i8.i64(, , i8, i64) +declare @llvm.riscv.vadd.nxv16i16.i16.i64(, , i16, i64) + +define @seq1(i1 zeroext %cond, i8* %str, i64 %n, i8 %x) { +; CHECK-LABEL: seq1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu +; CHECK-NEXT: vle8ff.v v8, (a1) +; CHECK-NEXT: vadd.vx v8, v8, a3 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 0, i64 2) + %1 = bitcast i8* %str to * + %2 = tail call { , i64 } @llvm.riscv.vleff.nxv32i8.i64( undef, * %1, i64 %0) + %3 = extractvalue { , i64 } %2, 0 + %4 = extractvalue { , i64 } %2, 1 + %5 = tail call @llvm.riscv.vadd.nxv32i8.i8.i64( undef, %3, i8 %x, i64 %4) + %6 = tail call @llvm.riscv.vmseq.nxv32i8.i8.i64( %5, i8 0, i64 %4) + ret %6 +} + +define @cross_bb(i1 zeroext %cond, i8 zeroext %x, %vv, i8* %str, i64 %n) { +; CHECK-LABEL: cross_bb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, mu +; CHECK-NEXT: beqz a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vle8ff.v v12, (a2) +; CHECK-NEXT: j .LBB1_3 +; CHECK-NEXT: .LBB1_2: # %if.else +; CHECK-NEXT: vsetvli a0, a3, e8, m4, ta, mu +; CHECK-NEXT: .LBB1_3: # %if.end +; CHECK-NEXT: vadd.vx v8, v8, a1 +; CHECK-NEXT: vadd.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 0, i64 2) + br i1 %cond, label %if.then, label %if.else + +if.then: ; preds = %entry + %1 = bitcast i8* %str to * + %2 = tail call { , i64 } @llvm.riscv.vleff.nxv32i8.i64( undef, * %1, i64 %0) + %3 = extractvalue { , i64 } %2, 1 + br label %if.end + +if.else: ; preds = %entry + %4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 0, i64 2) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %new_vl.0 = phi i64 [ %3, %if.then ], [ %4, %if.else ] + %5 = tail call @llvm.riscv.vadd.nxv32i8.i8.i64( undef, %vv, i8 %x, i64 %new_vl.0) + %6 = tail call @llvm.riscv.vadd.nxv32i8.i8.i64( undef, %5, i8 %x, i64 %new_vl.0) + ret %6 +} + +; Test not eleminating useful vsetvli. +define @no_work(i1 zeroext %cond, i8* %str, i64 %n, %v, i16 %x) { +; CHECK-LABEL: no_work: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu +; CHECK-NEXT: vle8ff.v v12, (a1) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vadd.vx v8, v8, a3 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 0, i64 2) + %1 = bitcast i8* %str to * + %2 = tail call { , i64 } @llvm.riscv.vleff.nxv32i8.i64( undef, * %1, i64 %0) + %3 = extractvalue { , i64 } %2, 1 + %4 = tail call @llvm.riscv.vadd.nxv16i16.i16.i64( undef, %v, i16 %x, i64 %3) + ret %4 +} +