Index: llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -373,24 +373,6 @@ return VSETVLIInfo::getUnknown(); } - // Calculate the VSETVLIInfo visible at the end of the block assuming this - // is the predecessor value, and Other is change for this block. - VSETVLIInfo merge(const VSETVLIInfo &Other) const { - assert(isValid() && "Can only merge with a valid VSETVLInfo"); - - // Nothing changed from the predecessor, keep it. - if (!Other.isValid()) - return *this; - - // If the change is compatible with the input, we won't create a VSETVLI - // and should keep the predecessor. - if (isCompatible(Other, /*Strict*/ true)) - return *this; - - // Otherwise just use whatever is in this block. - return Other; - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Support for debugging, callable in GDB: V->dump() LLVM_DUMP_METHOD void dump() const { @@ -946,6 +928,7 @@ bool HadVectorOp = false; BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + BBInfo.Change = BBInfo.Pred; for (const MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. if (isVectorConfigInstr(MI)) { @@ -983,15 +966,11 @@ BBInfo.Change = VSETVLIInfo::getUnknown(); } - // Initial exit state is whatever change we found in the block. - BBInfo.Exit = BBInfo.Change; - LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) - << " is " << BBInfo.Exit << "\n"); - return HadVectorOp; } void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; BBInfo.InQueue = false; @@ -1017,7 +996,12 @@ LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) << " changed to " << BBInfo.Pred << "\n"); - VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); + // Note: It's tempting to cache the state changes here, but due to the + // compatibility checks performed a blocks output state can change based on + // the input state. To cache, we'd have to add logic for finding + // never-compatible state changes. + computeVLVTYPEChanges(MBB); + VSETVLIInfo TmpStatus = BBInfo.Change; // If the new exit value matches the old exit value, we don't need to revisit // any blocks. @@ -1088,7 +1072,6 @@ VSETVLIInfo CurInfo; // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. MachineInstr *PrevVSETVLIMI = nullptr; - for (MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. if (isVectorConfigInstr(MI)) { @@ -1307,8 +1290,15 @@ bool HaveVectorOp = false; // Phase 1 - determine how VL/VTYPE are affected by the each block. - for (const MachineBasicBlock &MBB : MF) + for (const MachineBasicBlock &MBB : MF) { HaveVectorOp |= computeVLVTYPEChanges(MBB); + // Initial exit state is whatever change we found in the block. + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + BBInfo.Exit = BBInfo.Change; + LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) + << " is " << BBInfo.Exit << "\n"); + + } // If we didn't find any instructions that need VSETVLI, we're done. if (!HaveVectorOp) { Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -23,15 +23,13 @@ define @test1(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: # %if.else ; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) @@ -56,15 +54,14 @@ define @test2(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 -; CHECK-NEXT: j .LBB1_3 +; CHECK-NEXT: vfmul.vv v8, v9, v8 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 -; CHECK-NEXT: .LBB1_3: # %if.end -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: ret entry: @@ -183,23 +180,22 @@ ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a2, a1, 1 -; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu -; CHECK-NEXT: bnez a2, .LBB4_2 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: bnez a2, .LBB4_3 ; CHECK-NEXT: # %bb.1: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 -; CHECK-NEXT: j .LBB4_3 -; CHECK-NEXT: .LBB4_2: # %if.then +; CHECK-NEXT: andi a0, a1, 2 +; CHECK-NEXT: beqz a0, .LBB4_4 +; CHECK-NEXT: .LBB4_2: # %if.then4 +; CHECK-NEXT: vfmul.vv v8, v9, v8 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_3: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 -; CHECK-NEXT: .LBB4_3: # %if.end -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: andi a0, a1, 2 -; CHECK-NEXT: bnez a0, .LBB4_5 -; CHECK-NEXT: # %bb.4: # %if.else5 +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: .LBB4_4: # %if.else5 ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB4_5: # %if.then4 -; CHECK-NEXT: vfmul.vv v8, v9, v8 -; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %conv = zext i8 %cond to i32 @@ -242,17 +238,29 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a3, a1, 1 ; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, mu -; CHECK-NEXT: bnez a3, .LBB5_2 +; CHECK-NEXT: bnez a3, .LBB5_3 ; CHECK-NEXT: # %bb.1: # %if.else ; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: j .LBB5_3 -; CHECK-NEXT: .LBB5_2: # %if.then +; CHECK-NEXT: andi a1, a1, 2 +; CHECK-NEXT: beqz a1, .LBB5_4 +; CHECK-NEXT: .LBB5_2: # %if.then4 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) +; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: lui a0, %hi(.LCPI5_1) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1) +; CHECK-NEXT: vlse64.v v10, (a0), zero +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: lui a0, %hi(scratch) +; CHECK-NEXT: addi a0, a0, %lo(scratch) +; CHECK-NEXT: vse64.v v9, (a0) +; CHECK-NEXT: j .LBB5_5 +; CHECK-NEXT: .LBB5_3: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: .LBB5_3: # %if.end -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: andi a1, a1, 2 -; CHECK-NEXT: bnez a1, .LBB5_5 -; CHECK-NEXT: # %bb.4: # %if.else5 +; CHECK-NEXT: bnez a1, .LBB5_2 +; CHECK-NEXT: .LBB5_4: # %if.else5 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI5_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_2) @@ -264,20 +272,7 @@ ; CHECK-NEXT: lui a0, %hi(scratch) ; CHECK-NEXT: addi a0, a0, %lo(scratch) ; CHECK-NEXT: vse32.v v9, (a0) -; CHECK-NEXT: j .LBB5_6 -; CHECK-NEXT: .LBB5_5: # %if.then4 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: lui a0, %hi(.LCPI5_1) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1) -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: lui a0, %hi(scratch) -; CHECK-NEXT: addi a0, a0, %lo(scratch) -; CHECK-NEXT: vse64.v v9, (a0) -; CHECK-NEXT: .LBB5_6: # %if.end10 +; CHECK-NEXT: .LBB5_5: # %if.end10 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vfmul.vv v8, v8, v8 ; CHECK-NEXT: ret @@ -342,7 +337,6 @@ ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; CHECK-NEXT: j .LBB6_3 ; CHECK-NEXT: .LBB6_2: # %if.else ; CHECK-NEXT: csrr a0, vlenb @@ -418,7 +412,6 @@ ; CHECK-NEXT: j .LBB7_3 ; CHECK-NEXT: .LBB7_2: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 -; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; CHECK-NEXT: .LBB7_3: # %if.end ; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; CHECK-NEXT: vfmul.vv v8, v9, v8 Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -377,7 +377,6 @@ ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 ; CHECK-NEXT: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[PHI]], 6 /* e64 */, implicit $vtype ; CHECK-NEXT: $x10 = COPY [[PseudoVMV_X_S_M1_]] - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoRET implicit $x10 bb.0.entry: successors: %bb.2(0x30000000), %bb.1(0x50000000) @@ -437,7 +436,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: $x0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2 ; CHECK-NEXT: PseudoBR %bb.1 @@ -446,14 +445,12 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[PseudoVSETVLI]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.if.else: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[PseudoVSETVLI]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 @@ -849,7 +846,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %mask:vr = PseudoVMANDN_MM_MF8 %t6, %t3, -1, 0 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: %t2:gpr = COPY $x0 - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: BEQ %a, %t2, %bb.3 ; CHECK-NEXT: PseudoBR %bb.2 ; CHECK-NEXT: {{ $}} @@ -857,6 +853,7 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: early-clobber %t0:vrnov0 = PseudoVLUXEI64_V_M1_MF8_MASK %t5, killed %inaddr, %idxs, $v0, -1, 3 /* e8 */, 1, implicit $vl, implicit $vtype ; CHECK-NEXT: %ldval:vr = COPY %t0 ; CHECK-NEXT: PseudoBR %bb.3 @@ -864,6 +861,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: %stval:vr = PHI %t4, %bb.1, %ldval, %bb.2 ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoVSOXEI64_V_M1_MF8_MASK killed %stval, killed %b, %idxs, $v0, -1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET bb.0: