diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -633,6 +633,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; + MachineInstr *VSetMI = nullptr; for (MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. @@ -645,6 +646,7 @@ MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); CurInfo = getInfoForVSETVLI(MI); + VSetMI = &MI; continue; } @@ -678,10 +680,19 @@ // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. if (needVSETVLI(NewInfo, CurInfo)) { - insertVSETVLI(MBB, MI, NewInfo, CurInfo); + // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it + // with current VL/VTYPE. + if (VSetMI && + (CurInfo.hasSameAVL(NewInfo) || + (NewInfo.hasAVLReg() && + NewInfo.getAVLReg() == VSetMI->getOperand(0).getReg()))) + VSetMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); + else + insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; } } + VSetMI = nullptr; } // If this is something updates VL/VTYPE that we don't know about, set @@ -689,6 +700,7 @@ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { CurInfo = VSETVLIInfo::getUnknown(); + VSetMI = nullptr; } } } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \ +; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s + +declare i64 @llvm.riscv.vsetvli(i64, i64, i64) +declare @llvm.riscv.vfadd.nxv1f64.nxv1f64(, , i64) +declare void @foo() + +define @test1(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0) + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + ret %1 +} + +define @test2(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0) + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %avl) + ret %1 +} + +define @test3(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, a1, sp +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli s0, a0, e32, m1, ta, mu +; CHECK-NEXT: call foo@plt +; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfadd.vv v8, v26, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0) + tail call void @foo() + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + ret %1 +} + +define @test4(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, a1, sp +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: vsetvli a0, a0, e32, m1, ta, mu +; CHECK-NEXT: call foo@plt +; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfadd.vv v8, v26, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0) + tail call void @foo() + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %avl) + ret %1 +} +