diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -524,10 +524,16 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); + + // Operations below are not valid for masks. + if (VT.getVectorElementType() == MVT::i1) + continue; + + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); @@ -774,23 +780,26 @@ unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT); assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!"); - switch (VT.getVectorElementType().SimpleTy) { + MVT EltVT = VT.getVectorElementType(); + switch (EltVT.SimpleTy) { default: llvm_unreachable("unexpected element type for RVV container"); + case MVT::i1: { + // Masks are calculated assuming 8-bit elements since that's when we need + // the most elements. + unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8; + return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock); + } case MVT::i8: - return MVT::getScalableVectorVT(MVT::i8, LMul * 8); case MVT::i16: - return MVT::getScalableVectorVT(MVT::i16, LMul * 4); case MVT::i32: - return MVT::getScalableVectorVT(MVT::i32, LMul * 2); case MVT::i64: - return MVT::getScalableVectorVT(MVT::i64, LMul); case MVT::f16: - return MVT::getScalableVectorVT(MVT::f16, LMul * 4); case MVT::f32: - return MVT::getScalableVectorVT(MVT::f32, LMul * 2); - case MVT::f64: - return MVT::getScalableVectorVT(MVT::f64, LMul); + case MVT::f64: { + unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits(); + return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock); + } } } @@ -829,6 +838,20 @@ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); + if (VT.getVectorElementType() == MVT::i1) { + if (ISD::isBuildVectorAllZeros(Op.getNode())) { + SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); + return convertFromScalableVector(VT, VMClr, DAG, Subtarget); + } + + if (ISD::isBuildVectorAllOnes(Op.getNode())) { + SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); + return convertFromScalableVector(VT, VMSet, DAG, Subtarget); + } + + return SDValue(); + } + if (SDValue Splat = cast(Op)->getSplatValue()) { unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; @@ -2032,6 +2055,10 @@ SDLoc DL(Op); MVT VT = Store->getValue().getSimpleValueType(); + + // FIXME: We probably need to zero any extra bits in a byte for mask stores. + // This is tricky to do. + MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); SDValue VL = diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3611,7 +3611,7 @@ defm PseudoVMORNOT: VPseudoBinaryM_MM; defm PseudoVMXNOR: VPseudoBinaryM_MM; -// Pseudo insturctions +// Pseudo instructions defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">; defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -192,10 +192,21 @@ def : Pat<(vti.Vector (riscv_vle_vl RVVBaseAddr:$rs1, (XLenVT (VLOp GPR:$vl)))), (load_instr RVVBaseAddr:$rs1, GPR:$vl, vti.SEW)>; // Store - def : Pat<(riscv_vse_vl (vti.Vector vti.RegClass:$rs2), RVVBaseAddr:$rs1, (XLenVT (VLOp GPR:$vl))), + def : Pat<(riscv_vse_vl (vti.Vector vti.RegClass:$rs2), RVVBaseAddr:$rs1, + (XLenVT (VLOp GPR:$vl))), (store_instr vti.RegClass:$rs2, RVVBaseAddr:$rs1, GPR:$vl, vti.SEW)>; } +foreach mti = AllMasks in { + defvar load_instr = !cast("PseudoVLE1_V_"#mti.BX); + defvar store_instr = !cast("PseudoVSE1_V_"#mti.BX); + def : Pat<(mti.Mask (riscv_vle_vl RVVBaseAddr:$rs1, (XLenVT (VLOp GPR:$vl)))), + (load_instr RVVBaseAddr:$rs1, GPR:$vl, mti.SEW)>; + def : Pat<(riscv_vse_vl (mti.Mask VR:$rs2), RVVBaseAddr:$rs1, + (XLenVT (VLOp GPR:$vl))), + (store_instr VR:$rs2, RVVBaseAddr:$rs1, GPR:$vl, mti.SEW)>; +} + // 12.1. Vector Single-Width Integer Add and Subtract defm "" : VPatBinaryVL_VV_VX_VI; defm "" : VPatBinaryVL_VV_VX; @@ -267,6 +278,18 @@ } // Predicates = [HasStdExtV, HasStdExtF] +// 16.1 Vector Mask-Register Logical Instructions +let Predicates = [HasStdExtV] in { + +foreach mti = AllMasks in { + def : Pat<(mti.Mask (riscv_vmset_vl (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMSET_M_" # mti.BX) GPR:$vl, mti.SEW)>; + def : Pat<(mti.Mask (riscv_vmclr_vl (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMCLR_M_" # mti.BX) GPR:$vl, mti.SEW)>; +} + +} // Predicates = [HasStdExtV] + // 17.4. Vector Register GAther Instruction let Predicates = [HasStdExtV] in { diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -144,5 +144,12 @@ unsigned RISCVSubtarget::getLMULForFixedLengthVector(MVT VT) const { unsigned MinVLen = getMinRVVVectorSizeInBits(); + + // Masks only occupy a single register. An LMUL==1 operation can only use + // at most 1/8 of the register. Only an LMUL==8 operaton on i8 types can + // use the whole register. + if (VT.getVectorElementType() == MVT::i1) + MinVLen /= 8; + return divideCeil(VT.getSizeInBits(), MinVLen); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 + +define void @load_store_v1i1(<1 x i1>* %x, <1 x i1>* %y) { +; CHECK-LABEL: load_store_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 1 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <1 x i1>, <1 x i1>* %x + store <1 x i1> %a, <1 x i1>* %y + ret void +} + +define void @load_store_v2i1(<2 x i1>* %x, <2 x i1>* %y) { +; CHECK-LABEL: load_store_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <2 x i1>, <2 x i1>* %x + store <2 x i1> %a, <2 x i1>* %y + ret void +} + +define void @load_store_v4i1(<4 x i1>* %x, <4 x i1>* %y) { +; CHECK-LABEL: load_store_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <4 x i1>, <4 x i1>* %x + store <4 x i1> %a, <4 x i1>* %y + ret void +} + +define void @load_store_v8i1(<8 x i1>* %x, <8 x i1>* %y) { +; CHECK-LABEL: load_store_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x i1>, <8 x i1>* %x + store <8 x i1> %a, <8 x i1>* %y + ret void +} + +define void @load_store_v16i1(<16 x i1>* %x, <16 x i1>* %y) { +; CHECK-LABEL: load_store_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vse1.v v25, (a1) +; CHECK-NEXT: ret + %a = load <16 x i1>, <16 x i1>* %x + store <16 x i1> %a, <16 x i1>* %y + ret void +} + +define void @load_store_v32i1(<32 x i1>* %x, <32 x i1>* %y) { +; LMULMAX2-LABEL: load_store_v32i1: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle1.v v25, (a0) +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: load_store_v32i1: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: lw a0, 0(a0) +; LMULMAX1-RV32-NEXT: sw a0, 0(a1) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: load_store_v32i1: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: lw a0, 0(a0) +; LMULMAX1-RV64-NEXT: sw a0, 0(a1) +; LMULMAX1-RV64-NEXT: ret + %a = load <32 x i1>, <32 x i1>* %x + store <32 x i1> %a, <32 x i1>* %y + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 + +define void @splat_ones_v1i1(<1 x i1>* %x) { +; CHECK-LABEL: splat_ones_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmset.m v25 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + store <1 x i1> , <1 x i1>* %x + ret void +} + +define void @splat_zeros_v2i1(<2 x i1>* %x) { +; CHECK-LABEL: splat_zeros_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmclr.m v25 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + store <2 x i1> zeroinitializer, <2 x i1>* %x + ret void +} + +define void @splat_ones_v4i1(<4 x i1>* %x) { +; CHECK-LABEL: splat_ones_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmset.m v25 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + store <4 x i1> , <4 x i1>* %x + ret void +} + +define void @splat_zeros_v8i1(<8 x i1>* %x) { +; CHECK-LABEL: splat_zeros_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmclr.m v25 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + store <8 x i1> zeroinitializer, <8 x i1>* %x + ret void +} + +define void @splat_ones_v16i1(<16 x i1>* %x) { +; CHECK-LABEL: splat_ones_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vmset.m v25 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + store <16 x i1> , <16 x i1>* %x + ret void +} + +define void @splat_zeros_v32i1(<32 x i1>* %x) { +; LMULMAX2-LABEL: splat_zeros_v32i1: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 32 +; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-NEXT: vmclr.m v25 +; LMULMAX2-NEXT: vse1.v v25, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zeros_v32i1: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a1, a0, 2 +; LMULMAX1-RV32-NEXT: addi a2, zero, 16 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmclr.m v25 +; LMULMAX1-RV32-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV32-NEXT: vse1.v v25, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zeros_v32i1: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a1, a0, 2 +; LMULMAX1-RV64-NEXT: addi a2, zero, 16 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmclr.m v25 +; LMULMAX1-RV64-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV64-NEXT: vse1.v v25, (a0) +; LMULMAX1-RV64-NEXT: ret + store <32 x i1> zeroinitializer, <32 x i1>* %x + ret void +} + +define void @splat_ones_v64i1(<64 x i1>* %x) { +; LMULMAX2-LABEL: splat_ones_v64i1: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, a0, 4 +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vmset.m v25 +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: vse1.v v25, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_ones_v64i1: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a1, a0, 6 +; LMULMAX1-RV32-NEXT: addi a2, zero, 16 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmset.m v25 +; LMULMAX1-RV32-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV32-NEXT: addi a1, a0, 4 +; LMULMAX1-RV32-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV32-NEXT: addi a1, a0, 2 +; LMULMAX1-RV32-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV32-NEXT: vse1.v v25, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_ones_v64i1: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a1, a0, 6 +; LMULMAX1-RV64-NEXT: addi a2, zero, 16 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmset.m v25 +; LMULMAX1-RV64-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV64-NEXT: addi a1, a0, 4 +; LMULMAX1-RV64-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV64-NEXT: addi a1, a0, 2 +; LMULMAX1-RV64-NEXT: vse1.v v25, (a1) +; LMULMAX1-RV64-NEXT: vse1.v v25, (a0) +; LMULMAX1-RV64-NEXT: ret + store <64 x i1> , <64 x i1>* %x + ret void +}