Index: llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -373,24 +373,6 @@ return VSETVLIInfo::getUnknown(); } - // Calculate the VSETVLIInfo visible at the end of the block assuming this - // is the predecessor value, and Other is change for this block. - VSETVLIInfo merge(const VSETVLIInfo &Other) const { - assert(isValid() && "Can only merge with a valid VSETVLInfo"); - - // Nothing changed from the predecessor, keep it. - if (!Other.isValid()) - return *this; - - // If the change is compatible with the input, we won't create a VSETVLI - // and should keep the predecessor. - if (isCompatible(Other, /*Strict*/ true)) - return *this; - - // Otherwise just use whatever is in this block. - return Other; - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Support for debugging, callable in GDB: V->dump() LLVM_DUMP_METHOD void dump() const { @@ -943,6 +925,7 @@ bool HadVectorOp = false; BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + BBInfo.Change = BBInfo.Pred; for (const MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. if (isVectorConfigInstr(MI)) { @@ -957,19 +940,18 @@ VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); - if (!BBInfo.Change.isValid()) { + // If this instruction isn't compatible with the previous VL/VTYPE + // we need to insert a VSETVLI. + // If this is a unit-stride or strided load/store, we may be able to use + // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. + // NOTE: We only do this if the vtype we're comparing against was + // created in this block. We need the first and third phase to treat + // the store the same way. + if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) && + needVSETVLI(NewInfo, BBInfo.Change)) { + LLVM_DEBUG(dbgs() << "Updated to " << NewInfo << " due to " << MI + << " (phase1/2)\n"); BBInfo.Change = NewInfo; - } else { - // If this instruction isn't compatible with the previous VL/VTYPE - // we need to insert a VSETVLI. - // If this is a unit-stride or strided load/store, we may be able to use - // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. - // NOTE: We only do this if the vtype we're comparing against was - // created in this block. We need the first and third phase to treat - // the store the same way. - if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) && - needVSETVLI(NewInfo, BBInfo.Change)) - BBInfo.Change = NewInfo; } } @@ -981,13 +963,11 @@ } } - // Initial exit state is whatever change we found in the block. - BBInfo.Exit = BBInfo.Change; - return HadVectorOp; } void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; BBInfo.InQueue = false; @@ -1000,10 +980,7 @@ for (MachineBasicBlock *P : MBB.predecessors()) InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); } - - // If we don't have any valid predecessor value, wait until we do. - if (!InInfo.isValid()) - return; + assert(InInfo.isValid()); // If no change, no need to rerun block if (InInfo == BBInfo.Pred) @@ -1013,7 +990,12 @@ LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) << " changed to " << BBInfo.Pred << "\n"); - VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); + // Note: It's tempting to cache the state changes here, but due to the + // compatibility checks performed a blocks output state can change based on + // the input state. To cache, we'd have to add logic for finding + // never-compatible state changes. + computeVLVTYPEChanges(MBB); + VSETVLIInfo TmpStatus = BBInfo.Change; // If the new exit value matches the old exit value, we don't need to revisit // any blocks. @@ -1081,10 +1063,12 @@ } void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { - VSETVLIInfo CurInfo; + assert(BlockInfo[MBB.getNumber()].Pred.isValid() && + "Expected a valid predecessor state."); + VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. MachineInstr *PrevVSETVLIMI = nullptr; - + bool IsFirstStateChange = true; for (MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. if (isVectorConfigInstr(MI)) { @@ -1096,6 +1080,7 @@ MI.getOperand(4).setIsDead(false); CurInfo = getInfoForVSETVLI(MI); PrevVSETVLIMI = &MI; + IsFirstStateChange = false; continue; } @@ -1119,60 +1104,59 @@ MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, /*isImp*/ true)); - if (!CurInfo.isValid()) { - // We haven't found any vector instructions or VL/VTYPE changes yet, - // use the predecessor information. - assert(BlockInfo[MBB.getNumber()].Pred.isValid() && - "Expected a valid predecessor state."); - if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) { - // If this is the first implicit state change, and the state change - // requested can be proven to produce the same register contents, we - // can skip emitting the actual state change and continue as if we - // had since we know the GPR result of the implicit state change - // wouldn't be used and VL/VTYPE registers are correct. Note that - // we *do* need to model the state as if it changed as while the - // register contents are unchanged, the abstract model can change. - if (needVSETVLIPHI(NewInfo, MBB)) - insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); + // If this instruction isn't compatible with the previous VL/VTYPE + // we need to insert a VSETVLI. + // If this is a unit-stride or strided load/store, we may be able to use + // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. + // NOTE: We can't use predecessor information for the store. We must + // treat it the same as the first phase so that we produce the correct + // vl/vtype for succesor blocks. + if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && + needVSETVLI(NewInfo, CurInfo)) { + + // If this is the first implicit state change, and the state change + // requested can be proven to produce the same register contents, we + // can skip emitting the actual state change and continue as if we + // had since we know the GPR result of the implicit state change + // wouldn't be used and VL/VTYPE registers are correct. Note that + // we *do* need to model the state as if it changed as while the + // register contents are unchanged, the abstract model can change. + if (IsFirstStateChange && !needVSETVLIPHI(NewInfo, MBB)) { CurInfo = NewInfo; + PrevVSETVLIMI = nullptr; + IsFirstStateChange = false; + continue; } - } else { - // If this instruction isn't compatible with the previous VL/VTYPE - // we need to insert a VSETVLI. - // If this is a unit-stride or strided load/store, we may be able to use - // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. - // NOTE: We can't use predecessor information for the store. We must - // treat it the same as the first phase so that we produce the correct - // vl/vtype for succesor blocks. - if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && - needVSETVLI(NewInfo, CurInfo)) { - // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it - // with current VL/VTYPE. - bool NeedInsertVSETVLI = true; - if (PrevVSETVLIMI) { - bool HasSameAVL = - CurInfo.hasSameAVL(NewInfo) || - (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() && - NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg()); - // If these two VSETVLI have the same AVL and the same VLMAX, - // we could merge these two VSETVLI. - if (HasSameAVL && - CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } - if (isScalarMoveInstr(MI) && - ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || - (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && - NewInfo.hasSameVLMAX(CurInfo)) { - PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); - NeedInsertVSETVLI = false; - } + + // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it + // with current VL/VTYPE. + bool NeedInsertVSETVLI = true; + if (PrevVSETVLIMI) { + bool HasSameAVL = + CurInfo.hasSameAVL(NewInfo) || + (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() && + NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg()); + // If these two VSETVLI have the same AVL and the same VLMAX, + // we could merge these two VSETVLI. + if (HasSameAVL && + CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) { + PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); + NeedInsertVSETVLI = false; + } + if (isScalarMoveInstr(MI) && + ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || + (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && + NewInfo.hasSameVLMAX(CurInfo)) { + PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); + NeedInsertVSETVLI = false; } - if (NeedInsertVSETVLI) - insertVSETVLI(MBB, MI, NewInfo, CurInfo); - CurInfo = NewInfo; } + if (NeedInsertVSETVLI) + insertVSETVLI(MBB, MI, NewInfo, CurInfo); + LLVM_DEBUG(dbgs() << "Updated to " << NewInfo << " due to " << MI + << " (phase3)\n"); + CurInfo = NewInfo; + IsFirstStateChange = false; } PrevVSETVLIMI = nullptr; } @@ -1182,6 +1166,7 @@ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { CurInfo = VSETVLIInfo::getUnknown(); + IsFirstStateChange = false; PrevVSETVLIMI = nullptr; } @@ -1192,6 +1177,7 @@ if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && CurInfo != ExitInfo) { insertVSETVLI(MBB, MI, ExitInfo, CurInfo); + IsFirstStateChange = false; CurInfo = ExitInfo; } } @@ -1227,8 +1213,13 @@ bool HaveVectorOp = false; // Phase 1 - determine how VL/VTYPE are affected by the each block. - for (const MachineBasicBlock &MBB : MF) + for (const MachineBasicBlock &MBB : MF) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + BBInfo.Pred = VSETVLIInfo::getUnknown(); HaveVectorOp |= computeVLVTYPEChanges(MBB); + // Initial exit state is whatever change we found in the block. + BBInfo.Exit = BBInfo.Change; + } // If we didn't find any instructions that need VSETVLI, we're done. if (!HaveVectorOp) { Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -505,8 +505,8 @@ ; CHECK-NEXT: andi a0, a3, 1 ; CHECK-NEXT: beqz a0, .LBB9_2 ; CHECK-NEXT: # %bb.1: # %if -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vwcvt.x.x.v v8, v10 ; CHECK-NEXT: .LBB9_2: # %if.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -544,8 +544,8 @@ ; CHECK-NEXT: andi a0, a4, 1 ; CHECK-NEXT: beqz a0, .LBB10_2 ; CHECK-NEXT: # %bb.1: # %if -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vwadd.wv v9, v9, v10 ; CHECK-NEXT: .LBB10_2: # %if.end ; CHECK-NEXT: andi a0, a5, 1 Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -600,10 +600,8 @@ ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]] - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 87 /* e32, mf2, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1 - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: BLTU [[ADDI]], [[COPY1]], %bb.1 ; CHECK-NEXT: PseudoBR %bb.2 ; CHECK-NEXT: {{ $}} @@ -744,9 +742,6 @@ ... --- -# FIXME: This test shows incorrect VSETVLI insertion. The VLUXEI64 needs -# configuration for SEW=8 but it instead inherits a SEW=64 from the entry -# block. name: vsetvli_vluxei64_regression tracksRegLiveness: true body: | @@ -779,6 +774,7 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: early-clobber %t0:vrnov0 = PseudoVLUXEI64_V_M1_MF8_MASK %t5, killed %inaddr, %idxs, $v0, -1, 3 /* e8 */, 1, implicit $vl, implicit $vtype ; CHECK-NEXT: %ldval:vr = COPY %t0 ; CHECK-NEXT: PseudoBR %bb.3 @@ -786,6 +782,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: %stval:vr = PHI %t4, %bb.1, %ldval, %bb.2 ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoVSOXEI64_V_M1_MF8_MASK killed %stval, killed %b, %idxs, $v0, -1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET bb.0: @@ -865,8 +862,8 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD %src, [[PHI]] - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: [[PseudoVLE8_V_MF8_:%[0-9]+]]:vrnov0 = PseudoVLE8_V_MF8 killed [[ADD1]], -1, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: [[PseudoVADD_VI_MF8_:%[0-9]+]]:vrnov0 = PseudoVADD_VI_MF8 [[PseudoVLE8_V_MF8_]], 4, -1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD %dst, [[PHI]] ; CHECK-NEXT: PseudoVSE8_V_MF8 killed [[PseudoVADD_VI_MF8_]], killed [[ADD2]], -1, 3 /* e8 */, implicit $vl, implicit $vtype