diff --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp @@ -75,11 +75,19 @@ assert(MI.getOpcode() == RISCV::PseudoVSETVLI); Register AVLReg = MI.getOperand(1).getReg(); + Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg(); // If this VSETVLI isn't changing VL, it is redundant. if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0) return true; + // If the previous VSET{I}VLI's output (which isn't X0) is fed into this + // VSETVLI, this one isn't changing VL so is redundant. + // Only perform this on virtual registers to avoid the complexity of having + // to work out if the physical register was clobbered somewhere in between. + if (AVLReg.isVirtual() && AVLReg == PrevOutVL) + return true; + // If the previous opcode isn't vsetvli we can't do any more comparison. if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI) return false; @@ -94,7 +102,6 @@ // This instruction is setting VL to VLMAX, this is redundant if the // previous VSETVLI was also setting VL to VLMAX. But it is not redundant // if they were setting it to any other value or leaving VL unchanged. - Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg(); return PrevOutVL != RISCV::X0; } diff --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir --- a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir +++ b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir @@ -1,17 +1,17 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s -# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first -# keeps the previous value of VL, the second time sets it to VLMAX. We can't -# remove the first since we can't tell if this is a change VL. - --- | ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll' source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll" target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64" - define void @cleanup_vsetvli() #0 { + define void @cleanup_vsetvli0() #0 { + ret void + } + + define void @cleanup_vsetvli1() #0 { ret void } @@ -19,7 +19,10 @@ ... --- -name: cleanup_vsetvli +# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first +# keeps the previous value of VL, the second sets it to VLMAX. We can't remove +# the first since we can't tell if this is a change of VL. +name: cleanup_vsetvli0 alignment: 4 tracksRegLiveness: true registers: @@ -29,7 +32,7 @@ machineFunctionInfo: {} body: | bb.0 (%ir-block.0): - ; CHECK-LABEL: name: cleanup_vsetvli + ; CHECK-LABEL: name: cleanup_vsetvli0 ; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype ; CHECK: PseudoRET @@ -38,3 +41,39 @@ PseudoRET ... +--- +# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI. +# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI. +# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a +# physical register which is clobbered by a later instruction. +name: cleanup_vsetvli1 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $x3 + ; CHECK-LABEL: name: cleanup_vsetvli1 + ; CHECK: liveins: $x3 + ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype + ; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype + ; CHECK: $x1 = COPY $x3 + ; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype + ; CHECK: PseudoRET + %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype + dead %1:gpr = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype + + %2:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype + dead %3:gpr = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype + + $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype + $x1 = COPY $x3 + dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype + PseudoRET + +... diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll @@ -34,12 +34,10 @@ declare @llvm.riscv.vle.nxv4i32.i32(*, i32) ; Check that we remove the redundant vsetvli when followed by another operation -; FIXME: We don't define @redundant_vsetvli(i32 %avl, * %ptr) nounwind { ; CHECK-LABEL: redundant_vsetvli: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu -; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: ret %vl = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1) @@ -49,13 +47,13 @@ ; Check that we remove the repeated/redundant vsetvli when followed by another ; operation -; FIXME: We don't +; FIXME: We don't catch the second vsetvli because it has a use of its output. +; We could replace it with the output of the first vsetvli. define @repeated_vsetvli(i32 %avl, * %ptr) nounwind { ; CHECK-LABEL: repeated_vsetvli: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu -; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: ret %vl0 = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll @@ -52,12 +52,10 @@ declare @llvm.riscv.vle.nxv4i32.i64(*, i64) ; Check that we remove the redundant vsetvli when followed by another operation -; FIXME: We don't define @redundant_vsetvli(i64 %avl, * %ptr) nounwind { ; CHECK-LABEL: redundant_vsetvli: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu -; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: ret %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1) @@ -67,13 +65,13 @@ ; Check that we remove the repeated/redundant vsetvli when followed by another ; operation -; FIXME: We don't +; FIXME: We don't catch the second vsetvli because it has a use of its output. +; We could replace it with the output of the first vsetvli. define @repeated_vsetvli(i64 %avl, * %ptr) nounwind { ; CHECK-LABEL: repeated_vsetvli: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu -; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: ret %vl0 = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)