diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -407,8 +407,11 @@ SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, - unsigned NewOpc) const; + SDValue lowerFixedLengthVectorLogicOpToRVV(SDValue Op, SelectionDAG &DAG, + unsigned MaskOpc, + unsigned VecOpc) const; + SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, + bool HasMask = true) const; bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -530,6 +530,9 @@ // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { + setOperationAction(ISD::AND, VT, Custom); + setOperationAction(ISD::OR, VT, Custom); + setOperationAction(ISD::XOR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); continue; } @@ -1205,11 +1208,14 @@ case ISD::MUL: return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); case ISD::AND: - return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL); + return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, + RISCVISD::AND_VL); case ISD::OR: - return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL); + return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, + RISCVISD::OR_VL); case ISD::XOR: - return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL); + return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, + RISCVISD::XOR_VL); case ISD::SDIV: return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); case ISD::SREM: @@ -2227,8 +2233,19 @@ return convertFromScalableVector(VT, Cmp, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( + SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { + MVT VT = Op.getSimpleValueType(); + + if (VT.getVectorElementType() == MVT::i1) + return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); + + return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); +} + SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, - unsigned NewOpc) const { + unsigned NewOpc, + bool HasMask) const { MVT VT = Op.getSimpleValueType(); assert(useRVVForFixedLengthVectorVT(VT) && "Only expected to lower fixed length vector operation!"); @@ -2254,7 +2271,8 @@ SDLoc DL(Op); SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - Ops.push_back(Mask); + if (HasMask) + Ops.push_back(Mask); Ops.push_back(VL); SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -558,12 +558,38 @@ (!cast("PseudoVMXOR_MM_" # mti.LMul.MX) VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; - // FIXME: Add remaining mask instructions. + def : Pat<(mti.Mask (riscv_vmand_vl (riscv_vmxor_vl VR:$rs1, true_mask, + (XLenVT (VLOp GPR:$vl))), + VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMANDNOT_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + def : Pat<(mti.Mask (riscv_vmor_vl (riscv_vmxor_vl VR:$rs1, true_mask, + (XLenVT (VLOp GPR:$vl))), + VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMORNOT_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + // XOR is associative so we need 2 patterns for VMXNOR. + def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmxor_vl VR:$rs1, true_mask, + (XLenVT (VLOp GPR:$vl))), + VR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMXNOR_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + + def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmand_vl VR:$rs1, VR:$rs2, + (XLenVT (VLOp GPR:$vl))), + true_mask, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMNAND_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmor_vl VR:$rs1, VR:$rs2, (XLenVT (VLOp GPR:$vl))), true_mask, (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMNOR_MM_" # mti.LMul.MX) VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; + def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmxor_vl VR:$rs1, VR:$rs2, + (XLenVT (VLOp GPR:$vl))), + true_mask, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMXNOR_MM_" # mti.LMul.MX) + VR:$rs1, VR:$rs2, GPR:$vl, mti.SEW)>; // Match the not idiom to the vnot.mm pseudo. def : Pat<(mti.Mask (riscv_vmxor_vl VR:$rs, true_mask, (XLenVT (VLOp GPR:$vl)))), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK + +define void @and_v8i1(<8 x i1>* %x, <8 x i1>* %y) { +; CHECK-LABEL: and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmand.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i1>, <8 x i1>* %x + %b = load <8 x i1>, <8 x i1>* %y + %c = and <8 x i1> %a, %b + store <8 x i1> %c, <8 x i1>* %x + ret void +} + +define void @or_v16i1(<16 x i1>* %x, <16 x i1>* %y) { +; CHECK-LABEL: or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i1>, <16 x i1>* %x + %b = load <16 x i1>, <16 x i1>* %y + %c = or <16 x i1> %a, %b + store <16 x i1> %c, <16 x i1>* %x + ret void +} + +define void @xor_v32i1(<32 x i1>* %x, <32 x i1>* %y) { +; CHECK-LABEL: xor_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmxor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <32 x i1>, <32 x i1>* %x + %b = load <32 x i1>, <32 x i1>* %y + %c = xor <32 x i1> %a, %b + store <32 x i1> %c, <32 x i1>* %x + ret void +} + +define void @not_v64i1(<64 x i1>* %x, <64 x i1>* %y) { +; CHECK-LABEL: not_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vmnand.mm v25, v25, v25 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <64 x i1>, <64 x i1>* %x + %b = load <64 x i1>, <64 x i1>* %y + %c = xor <64 x i1> %a, + store <64 x i1> %c, <64 x i1>* %x + ret void +} + +define void @andnot_v8i1(<8 x i1>* %x, <8 x i1>* %y) { +; CHECK-LABEL: andnot_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmandnot.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i1>, <8 x i1>* %x + %b = load <8 x i1>, <8 x i1>* %y + %c = xor <8 x i1> %a, + %d = and <8 x i1> %b, %c + store <8 x i1> %d, <8 x i1>* %x + ret void +} + +define void @ornot_v16i1(<16 x i1>* %x, <16 x i1>* %y) { +; CHECK-LABEL: ornot_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmornot.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i1>, <16 x i1>* %x + %b = load <16 x i1>, <16 x i1>* %y + %c = xor <16 x i1> %a, + %d = or <16 x i1> %b, %c + store <16 x i1> %d, <16 x i1>* %x + ret void +} + +define void @xornot_v32i1(<32 x i1>* %x, <32 x i1>* %y) { +; CHECK-LABEL: xornot_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmxnor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <32 x i1>, <32 x i1>* %x + %b = load <32 x i1>, <32 x i1>* %y + %c = xor <32 x i1> %a, + %d = xor <32 x i1> %b, %c + store <32 x i1> %d, <32 x i1>* %x + ret void +} + +define void @nand_v8i1(<8 x i1>* %x, <8 x i1>* %y) { +; CHECK-LABEL: nand_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmnand.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i1>, <8 x i1>* %x + %b = load <8 x i1>, <8 x i1>* %y + %c = and <8 x i1> %a, %b + %d = xor <8 x i1> %c, + store <8 x i1> %d, <8 x i1>* %x + ret void +} + +define void @nor_v16i1(<16 x i1>* %x, <16 x i1>* %y) { +; CHECK-LABEL: nor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmnor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i1>, <16 x i1>* %x + %b = load <16 x i1>, <16 x i1>* %y + %c = or <16 x i1> %a, %b + %d = xor <16 x i1> %c, + store <16 x i1> %d, <16 x i1>* %x + ret void +} + +define void @xnor_v32i1(<32 x i1>* %x, <32 x i1>* %y) { +; CHECK-LABEL: xnor_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v26, (a1) +; CHECK-NEXT: vmxnor.mm v25, v25, v26 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = load <32 x i1>, <32 x i1>* %x + %b = load <32 x i1>, <32 x i1>* %y + %c = xor <32 x i1> %a, %b + %d = xor <32 x i1> %c, + store <32 x i1> %d, <32 x i1>* %x + ret void +}