diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -344,6 +344,22 @@ Res.MaskPolicy = false; } + // VLMAX does not need tail policy. + if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) { + RISCVII::VLMUL VLMul = RISCVII::getLMul(MI.getDesc().TSFlags); + // Fractional LMULs always require tail policy. + if (VLMul < RISCVII::LMUL_RESERVED) { + const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + if (VLOp.isImm()) { + int64_t Imm = VLOp.getImm(); + if (Imm == RISCV::VLMaxSentinel) + Res.TailPolicy = false; + } else if (VLOp.getReg() == RISCV::X0) { + Res.TailPolicy = false; + } + } + } + // If this is a mask reg operation, it only cares about VLMAX. // TODO: Possible extensions to this logic // * Probably ok if available VLMax is larger than demanded diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -385,7 +385,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.fadd( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -579,6 +579,32 @@ ret %2 } +define @test21( %a, %b, %mask) nounwind { +; CHECK-LABEL: test21: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: vadd.vv v9, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %x = call @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( + %a, + %a, + %a, + %mask, + i64 -1, + i64 0) + %y = call @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( + %b, + %b, + %x, + %mask, + i64 -1, + i64 1) + ret %y +} + ; This used to fail the machine verifier due to the vsetvli being removed ; while the add was still using it. define i64 @bad_removal(<2 x i64> %arg) {