diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1122,9 +1122,9 @@ if (!CurInfo.isValid()) { // We haven't found any vector instructions or VL/VTYPE changes yet, // use the predecessor information. - assert(BlockInfo[MBB.getNumber()].Pred.isValid() && - "Expected a valid predecessor state."); - if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) { + CurInfo = BlockInfo[MBB.getNumber()].Pred; + assert(CurInfo.isValid() && "Expected a valid predecessor state."); + if (needVSETVLI(NewInfo, CurInfo)) { // If this is the first implicit state change, and the state change // requested can be proven to produce the same register contents, we // can skip emitting the actual state change and continue as if we @@ -1133,7 +1133,7 @@ // we *do* need to model the state as if it changed as while the // register contents are unchanged, the abstract model can change. if (needVSETVLIPHI(NewInfo, MBB)) - insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); + insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; } } else { diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -23,13 +23,15 @@ define @test1(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: # %if.else ; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) @@ -54,14 +56,15 @@ define @test2(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 -; CHECK-NEXT: vfmul.vv v8, v9, v8 -; CHECK-NEXT: ret +; CHECK-NEXT: j .LBB1_3 ; CHECK-NEXT: .LBB1_2: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 +; CHECK-NEXT: .LBB1_3: # %if.end +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: ret entry: @@ -180,22 +183,23 @@ ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a2, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: bnez a2, .LBB4_3 +; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu +; CHECK-NEXT: bnez a2, .LBB4_2 ; CHECK-NEXT: # %bb.1: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 -; CHECK-NEXT: andi a0, a1, 2 -; CHECK-NEXT: beqz a0, .LBB4_4 -; CHECK-NEXT: .LBB4_2: # %if.then4 -; CHECK-NEXT: vfmul.vv v8, v9, v8 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB4_3: # %if.then +; CHECK-NEXT: j .LBB4_3 +; CHECK-NEXT: .LBB4_2: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 +; CHECK-NEXT: .LBB4_3: # %if.end +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: andi a0, a1, 2 -; CHECK-NEXT: bnez a0, .LBB4_2 -; CHECK-NEXT: .LBB4_4: # %if.else5 +; CHECK-NEXT: bnez a0, .LBB4_5 +; CHECK-NEXT: # %bb.4: # %if.else5 ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_5: # %if.then4 +; CHECK-NEXT: vfmul.vv v8, v9, v8 +; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %conv = zext i8 %cond to i32 @@ -238,29 +242,17 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a3, a1, 1 ; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, mu -; CHECK-NEXT: bnez a3, .LBB5_3 +; CHECK-NEXT: bnez a3, .LBB5_2 ; CHECK-NEXT: # %bb.1: # %if.else ; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: andi a1, a1, 2 -; CHECK-NEXT: beqz a1, .LBB5_4 -; CHECK-NEXT: .LBB5_2: # %if.then4 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: lui a0, %hi(.LCPI5_1) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1) -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: lui a0, %hi(scratch) -; CHECK-NEXT: addi a0, a0, %lo(scratch) -; CHECK-NEXT: vse64.v v9, (a0) -; CHECK-NEXT: j .LBB5_5 -; CHECK-NEXT: .LBB5_3: # %if.then +; CHECK-NEXT: j .LBB5_3 +; CHECK-NEXT: .LBB5_2: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: .LBB5_3: # %if.end +; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: andi a1, a1, 2 -; CHECK-NEXT: bnez a1, .LBB5_2 -; CHECK-NEXT: .LBB5_4: # %if.else5 +; CHECK-NEXT: bnez a1, .LBB5_5 +; CHECK-NEXT: # %bb.4: # %if.else5 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI5_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_2) @@ -272,7 +264,20 @@ ; CHECK-NEXT: lui a0, %hi(scratch) ; CHECK-NEXT: addi a0, a0, %lo(scratch) ; CHECK-NEXT: vse32.v v9, (a0) -; CHECK-NEXT: .LBB5_5: # %if.end10 +; CHECK-NEXT: j .LBB5_6 +; CHECK-NEXT: .LBB5_5: # %if.then4 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) +; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: lui a0, %hi(.LCPI5_1) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1) +; CHECK-NEXT: vlse64.v v10, (a0), zero +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: lui a0, %hi(scratch) +; CHECK-NEXT: addi a0, a0, %lo(scratch) +; CHECK-NEXT: vse64.v v9, (a0) +; CHECK-NEXT: .LBB5_6: # %if.end10 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vfmul.vv v8, v8, v8 ; CHECK-NEXT: ret @@ -337,6 +342,7 @@ ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; CHECK-NEXT: j .LBB6_3 ; CHECK-NEXT: .LBB6_2: # %if.else ; CHECK-NEXT: csrr a0, vlenb @@ -412,6 +418,7 @@ ; CHECK-NEXT: j .LBB7_3 ; CHECK-NEXT: .LBB7_2: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 +; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; CHECK-NEXT: .LBB7_3: # %if.end ; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; CHECK-NEXT: vfmul.vv v8, v9, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -377,6 +377,7 @@ ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 ; CHECK-NEXT: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[PHI]], 6 /* e64 */, implicit $vtype ; CHECK-NEXT: $x10 = COPY [[PseudoVMV_X_S_M1_]] + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoRET implicit $x10 bb.0.entry: successors: %bb.2(0x30000000), %bb.1(0x50000000) @@ -436,7 +437,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: $x0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2 ; CHECK-NEXT: PseudoBR %bb.1 @@ -445,12 +446,14 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[PseudoVSETVLI]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.if.else: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[PseudoVSETVLI]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 @@ -604,10 +607,8 @@ ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]] - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 87 /* e32, mf2, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1 - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: BLTU [[ADDI]], [[COPY1]], %bb.1 ; CHECK-NEXT: PseudoBR %bb.2 ; CHECK-NEXT: {{ $}} @@ -674,10 +675,8 @@ ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]] - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 87 /* e32, mf2, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1 - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) @@ -825,9 +824,6 @@ ... --- -# FIXME: This test shows incorrect VSETVLI insertion. The VLUXEI64 needs -# configuration for SEW=8 but it instead inherits a SEW=64 from the entry -# block. name: vsetvli_vluxei64_regression tracksRegLiveness: true body: | @@ -853,6 +849,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %mask:vr = PseudoVMANDN_MM_MF8 %t6, %t3, -1, 0 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: %t2:gpr = COPY $x0 + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: BEQ %a, %t2, %bb.3 ; CHECK-NEXT: PseudoBR %bb.2 ; CHECK-NEXT: {{ $}}