diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -999,6 +999,11 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; + // BBLocalInfo tracks the VL/VTYPE state way BBInfo.Change was calculated in + // computeIncomingVLVTYPE. We need this to apply canSkipVSETVLIForLoadStore + // the same way computeIncomingVLVTYPE did. We can't include predecessor + // information in that decision to avoid disagreeing with the global analysis. + VSETVLIInfo BBLocalInfo; // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. MachineInstr *PrevVSETVLIMI = nullptr; @@ -1014,6 +1019,7 @@ MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); CurInfo = getInfoForVSETVLI(MI); + BBLocalInfo = getInfoForVSETVLI(MI); PrevVSETVLIMI = &MI; continue; } @@ -1043,12 +1049,22 @@ // use the predecessor information. assert(BlockInfo[MBB.getNumber()].Pred.isValid() && "Expected a valid predecessor state."); - if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && + // Don't use predecessor information if there was an earlier instruction + // in this block that allowed a vsetvli to be skipped for load/store. + if (!(BBLocalInfo.isValid() && + canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) && + needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && needVSETVLIPHI(NewInfo, MBB)) { insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); CurInfo = NewInfo; + BBLocalInfo = NewInfo; } + + // We must update BBLocalInfo for every vector instruction. + if (!BBLocalInfo.isValid()) + BBLocalInfo = NewInfo; } else { + assert(BBLocalInfo.isValid()); // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. // If this is a unit-stride or strided load/store, we may be able to use @@ -1084,6 +1100,7 @@ if (NeedInsertVSETVLI) insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; + BBLocalInfo = NewInfo; } } PrevVSETVLIMI = nullptr; @@ -1094,6 +1111,7 @@ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { CurInfo = VSETVLIInfo::getUnknown(); + BBLocalInfo = VSETVLIInfo::getUnknown(); PrevVSETVLIMI = nullptr; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -567,10 +567,6 @@ ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6, implicit $vl, implicit $vtype ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]] - ; FIXME: We insert a SEW=32,LMUL=1/2 VSETVLI here but no SEW=64,LMUL=1 - ; VSETVLI before the VADD above. This misconfigures the VADD in the case that - ; the loop takes its backedge. - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 87, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1 ; CHECK-NEXT: BLTU [[ADDI]], [[COPY1]], %bb.1