Index: llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -814,7 +814,146 @@ // By default we do not custom select any intrinsic. default: break; - case Intrinsic::riscv_vmsgeu: + case Intrinsic::riscv_vmsltu: { + SDValue Src1 = Node->getOperand(1); + SDValue Src2 = Node->getOperand(2); + auto *C = dyn_cast(Src2); + if (!C || C->getSExtValue() != 0) + break; + // Only deal with vmsltu.vi v0, v1, 0 + MVT Src1VT = Src1.getSimpleValueType(); + unsigned VMSLEUOpcode; + switch (RISCVTargetLowering::getLMUL(Src1VT)) { + default: + llvm_unreachable("Unexpected LMUL!"); + case RISCVII::VLMUL::LMUL_F8: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_MF8; + break; + case RISCVII::VLMUL::LMUL_F4: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_MF4; + break; + case RISCVII::VLMUL::LMUL_F2: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_MF2; + break; + case RISCVII::VLMUL::LMUL_1: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_M1; + break; + case RISCVII::VLMUL::LMUL_2: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_M2; + break; + case RISCVII::VLMUL::LMUL_4: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_M4; + break; + case RISCVII::VLMUL::LMUL_8: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_M8; + break; + } + + SDValue SEW = CurDAG->getTargetConstant( + Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); + SDValue VL; + selectVLOp(Node->getOperand(3), VL); + SDValue VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); + + // Expand to vmsleu.vv v0, v1, X0 + ReplaceNode(Node, + CurDAG->getMachineNode(VMSLEUOpcode, DL, VT, {Src1, VLOperand, VL, SEW})); + return; + } + case Intrinsic::riscv_vmsltu_mask: { + SDValue Src1 = Node->getOperand(2); + SDValue Src2 = Node->getOperand(3); + auto *C = dyn_cast(Src2); + if (!C || C->getSExtValue() != 0) + break; + // Only deal with vmsltu.vi v1, v2, 0, v0.t + MVT Src1VT = Src1.getSimpleValueType(); + unsigned VMSLEUOpcode; + switch (RISCVTargetLowering::getLMUL(Src1VT)) { + default: + llvm_unreachable("Unexpected LMUL!"); + case RISCVII::VLMUL::LMUL_F8: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_MF8_MASK; + break; + case RISCVII::VLMUL::LMUL_F4: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_MF4_MASK; + break; + case RISCVII::VLMUL::LMUL_F2: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_MF2_MASK; + break; + case RISCVII::VLMUL::LMUL_1: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_M1_MASK; + break; + case RISCVII::VLMUL::LMUL_2: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_M2_MASK; + break; + case RISCVII::VLMUL::LMUL_4: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_M4_MASK; + break; + case RISCVII::VLMUL::LMUL_8: + VMSLEUOpcode = RISCV::PseudoVMSLEU_VX_M8_MASK; + break; + } + + SDValue SEW = CurDAG->getTargetConstant( + Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); + SDValue VL; + selectVLOp(Node->getOperand(5), VL); + SDValue MaskedOff = Node->getOperand(1); + SDValue Mask = Node->getOperand(4); + + // Expand to vmsleu.vv v1, v2, X0, v0.t + ReplaceNode(Node, + CurDAG->getMachineNode(VMSLEUOpcode, DL, VT, {MaskedOff, Src1, Src2, Mask, VL, SEW})); + return; + } + case Intrinsic::riscv_vmsgeu: { + SDValue Src1 = Node->getOperand(1); + SDValue Src2 = Node->getOperand(2); + auto *C = dyn_cast(Src2); + // Only solve vmsgeu.vi v0, v1, 0 + if (C && C->getSExtValue() == 0) { + MVT Src1VT = Src1.getSimpleValueType(); + unsigned VMSetOpcode; + switch (RISCVTargetLowering::getLMUL(Src1VT)) { + default: + llvm_unreachable("Unexpected LMUL!"); + case RISCVII::VLMUL::LMUL_F8: + VMSetOpcode = RISCV::PseudoVMSET_M_B64; + break; + case RISCVII::VLMUL::LMUL_F4: + VMSetOpcode = RISCV::PseudoVMSET_M_B32; + break; + case RISCVII::VLMUL::LMUL_F2: + VMSetOpcode = RISCV::PseudoVMSET_M_B16; + break; + case RISCVII::VLMUL::LMUL_1: + VMSetOpcode = RISCV::PseudoVMSET_M_B8; + break; + case RISCVII::VLMUL::LMUL_2: + VMSetOpcode = RISCV::PseudoVMSET_M_B4; + break; + case RISCVII::VLMUL::LMUL_4: + VMSetOpcode = RISCV::PseudoVMSET_M_B2; + break; + case RISCVII::VLMUL::LMUL_8: + VMSetOpcode = RISCV::PseudoVMSET_M_B1; + break; + } + + SDValue SEW = CurDAG->getTargetConstant( + Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); + SDValue VL; + selectVLOp(Node->getOperand(3), VL); + + // Expand to vmset.m v0 + ReplaceNode(Node, + CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); + return; + } + // FALL THROUGH. + LLVM_FALLTHROUGH; + } case Intrinsic::riscv_vmsge: { SDValue Src1 = Node->getOperand(1); SDValue Src2 = Node->getOperand(2); @@ -887,18 +1026,23 @@ case Intrinsic::riscv_vmsge_mask: { SDValue Src1 = Node->getOperand(2); SDValue Src2 = Node->getOperand(3); + bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; + bool IsCmpUnsignedZero = false; // Only custom select scalar second operand. if (Src2.getValueType() != XLenVT) break; // Small constants are handled with patterns. if (auto *C = dyn_cast(Src2)) { int64_t CVal = C->getSExtValue(); - if (CVal >= -15 && CVal <= 16) - break; + if (CVal >= -15 && CVal <= 16) { + if (IsUnsigned && CVal == 0) + IsCmpUnsignedZero = true; + else + break; + } } - bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; MVT Src1VT = Src1.getSimpleValueType(); - unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode; + unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, VMSetOpcode; switch (RISCVTargetLowering::getLMUL(Src1VT)) { default: llvm_unreachable("Unexpected LMUL!"); @@ -907,42 +1051,49 @@ IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF8 : RISCV::PseudoVMSLT_VX_MF8; VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF8_MASK : RISCV::PseudoVMSLT_VX_MF8_MASK; + VMSetOpcode = RISCV::PseudoVMSET_M_B64; break; case RISCVII::VLMUL::LMUL_F4: VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF4 : RISCV::PseudoVMSLT_VX_MF4; VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF4_MASK : RISCV::PseudoVMSLT_VX_MF4_MASK; + VMSetOpcode = RISCV::PseudoVMSET_M_B32; break; case RISCVII::VLMUL::LMUL_F2: VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF2 : RISCV::PseudoVMSLT_VX_MF2; VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF2_MASK : RISCV::PseudoVMSLT_VX_MF2_MASK; + VMSetOpcode = RISCV::PseudoVMSET_M_B16; break; case RISCVII::VLMUL::LMUL_1: VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M1 : RISCV::PseudoVMSLT_VX_M1; VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M1_MASK : RISCV::PseudoVMSLT_VX_M1_MASK; + VMSetOpcode = RISCV::PseudoVMSET_M_B8; break; case RISCVII::VLMUL::LMUL_2: VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M2 : RISCV::PseudoVMSLT_VX_M2; VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M2_MASK : RISCV::PseudoVMSLT_VX_M2_MASK; + VMSetOpcode = RISCV::PseudoVMSET_M_B4; break; case RISCVII::VLMUL::LMUL_4: VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M4 : RISCV::PseudoVMSLT_VX_M4; VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M4_MASK : RISCV::PseudoVMSLT_VX_M4_MASK; + VMSetOpcode = RISCV::PseudoVMSET_M_B2; break; case RISCVII::VLMUL::LMUL_8: VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M8 : RISCV::PseudoVMSLT_VX_M8; VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M8_MASK : RISCV::PseudoVMSLT_VX_M8_MASK; + VMSetOpcode = RISCV::PseudoVMSET_M_B1; break; } // Mask operations use the LMUL from the mask type. @@ -985,6 +1136,15 @@ selectVLOp(Node->getOperand(5), VL); SDValue MaskedOff = Node->getOperand(1); SDValue Mask = Node->getOperand(4); + // Expand vmsgeu.vi v1, v2, 0, v0.t to vmset v1, vmand v0, v1, v3 + if (IsCmpUnsignedZero) { + SDValue VmSet = SDValue( + CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), + 0); + ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, + {Mask, VmSet, VL, MaskSEW})); + return; + } // If the MaskedOff value and the Mask are the same value use // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt // This avoids needing to copy v0 to vd before starting the next sequence. Index: llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll +++ llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll @@ -2097,11 +2097,10 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vv v10, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmandn.mm v0, v9, v8 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsgeu.mask.nxv2i16.i16( @@ -2118,7 +2117,7 @@ ; CHECK-LABEL: intrinsic_vmsgeu_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsgeu.nxv4i16.i16( Index: llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll +++ llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll @@ -2064,11 +2064,10 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vv v10, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmandn.mm v0, v9, v8 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsgeu.mask.nxv2i16.i16( @@ -2085,7 +2084,7 @@ ; CHECK-LABEL: intrinsic_vmsgeu_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsgeu.nxv4i16.i16( Index: llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll +++ llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll @@ -2085,7 +2085,7 @@ ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmsleu.vx v10, v8, zero, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -2103,7 +2103,7 @@ ; CHECK-LABEL: intrinsic_vmsltu_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v8 +; CHECK-NEXT: vmsleu.vx v0, v8, zero ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsltu.nxv4i16.i16( Index: llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll +++ llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll @@ -2049,7 +2049,7 @@ ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmsleu.vx v10, v8, zero, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -2067,7 +2067,7 @@ ; CHECK-LABEL: intrinsic_vmsltu_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v8 +; CHECK-NEXT: vmsleu.vx v0, v8, zero ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsltu.nxv4i16.i16(