diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2672,7 +2672,16 @@ // Need True has same VL with N. unsigned TrueVLIndex = True.getNumOperands() - HasChainOp - 2; SDValue TrueVL = True.getOperand(TrueVLIndex); - if (TrueVL != VL) + + auto IsNoFPExcept = [this](SDValue N) { + return !this->mayRaiseFPException(N.getNode()) || + N->getFlags().hasNoFPExcept(); + }; + + // Allow the peephole for non-exception True with VLMAX vector length, since + // all the values after VL of N are dependent on Merge. VLMAX should be + // lowered to (XLenVT -1). + if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL))) continue; SDLoc DL(N); diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -602,3 +602,199 @@ %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } + +; Test vector operations with VLMAX vector length. + +; Test binary operator with vp.merge and add. +define @vpmerge_add( %passthru, %x, %y, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_add: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_add + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVADD_VV_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVADD_VV_M1_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 + %a = add %x, %y + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test binary operator with vp.merge and fadd. +define @vpmerge_fadd( %passthru, %x, %y, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_fadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_fadd + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: %5:vrnov0 = nofpexcept PseudoVFADD_VV_M1_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0, implicit $frm + ; MIR-NEXT: $v8 = COPY %5 + ; MIR-NEXT: PseudoRET implicit $v8 + %a = fadd %x, %y + %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test conversion by fptosi. +define @vpmerge_fptosi( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_fptosi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_fptosi + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_RTZ_X_F_W_MF2_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 4 /* e16 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 + %a = fptosi %x to + %b = call @llvm.vp.merge.nxv2i16( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test conversion by sitofp. +define @vpmerge_sitofp( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_sitofp: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_sitofp + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_F_X_W_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 + %a = sitofp %x to + %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test float extension by fpext. +define @vpmerge_fpext( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_fpext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_fpext + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8m2, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrm2nov0 = COPY $v8m2 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrm2nov0 = PseudoVFWCVT_F_F_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8m2 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8m2 + %a = fpext %x to + %b = call @llvm.vp.merge.nxv2f64( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test float truncation by fptrunc. +define @vpmerge_fptrunc( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_fptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_fptrunc + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_F_F_W_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 + %a = fptrunc %x to + %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test integer extension by zext. +define @vpmerge_zext( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vzext.vf4 v8, v9, v0.t +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_zext + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVZEXT_VF4_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 + %a = zext %x to + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} + +; Test integer truncation by trunc. +define @vpmerge_trunc( %passthru, %x, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_trunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_trunc + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVNSRL_WI_M1_MASK [[COPY3]], [[COPY2]], 0, $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 + %a = trunc %x to + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -964,10 +964,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v9, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; RV32-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1015,10 +1013,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1066,10 +1062,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, mu -; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1117,10 +1111,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ;