diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -164,6 +164,9 @@ VMCLR_VL, VMSET_VL, + // Matches the semantics of vrgather.vx with an extra operand for VL. + VRGATHER_VX_VL, + // Memory opcodes start here. VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, VSE_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -521,6 +521,7 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -551,6 +552,7 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -834,6 +836,36 @@ return SDValue(); } +static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDValue V1 = Op.getOperand(0); + //SDValue V2 = Op.getOperand(1); + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + + if (SVN->isSplat()) { + int Lane = SVN->getSplatIndex(); + if (Lane >= 0) { + MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + + V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); + assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); + + MVT XLenVT = Subtarget.getXLenVT(); + SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); + MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, + V1, DAG.getConstant(Lane, DL, XLenVT), + Mask, VL); + return convertFromScalableVector(VT, Gather, DAG, Subtarget); + } + } + + return SDValue(); +} + SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -1080,6 +1112,8 @@ return lowerVECREDUCE(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); + case ISD::VECTOR_SHUFFLE: + return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); case ISD::LOAD: return lowerFixedLengthVectorLoadToRVV(Op, DAG); case ISD::STORE: @@ -4573,6 +4607,7 @@ NODE_NAME_CASE(FMA_VL) NODE_NAME_CASE(VMCLR_VL) NODE_NAME_CASE(VMSET_VL) + NODE_NAME_CASE(VRGATHER_VX_VL) NODE_NAME_CASE(VLE_VL) NODE_NAME_CASE(VSE_VL) } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -79,6 +79,14 @@ SDTCisVT<5, XLenVT>]>; def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL>; +def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL", + SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, XLenVT>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>, + SDTCisVT<4, XLenVT>]>>; + def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisVT<1, XLenVT>]>; @@ -209,3 +217,38 @@ } } // Predicates = [HasStdExtV, HasStdExtF] + +// 17.4. Vector Register GAther Instruction +let Predicates = [HasStdExtV] in { + +foreach vti = AllIntegerVectors in { + def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX) + vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX) + vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.SEW)>; +} + +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { + +foreach vti = AllFloatVectors in { + def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX) + vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX) + vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.SEW)>; +} + +} // Predicates = [HasStdExtV, HasStdExtF] diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @gather_const_v8f16(<8 x half>* %x) { +; CHECK-LABEL: gather_const_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrgather.vi v26, v25, 5 +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = extractelement <8 x half> %a, i32 5 + %c = insertelement <8 x half> undef, half %b, i32 0 + %d = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @gather_const_v4f32(<4 x float>* %x) { +; CHECK-LABEL: gather_const_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrgather.vi v26, v25, 2 +; CHECK-NEXT: vse32.v v26, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = extractelement <4 x float> %a, i32 2 + %c = insertelement <4 x float> undef, float %b, i32 0 + %d = shufflevector <4 x float> %c, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @gather_const_v2f64(<2 x double>* %x) { +; CHECK-LABEL: gather_const_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vrgather.vi v26, v25, 0 +; CHECK-NEXT: vse64.v v26, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = extractelement <2 x double> %a, i32 0 + %c = insertelement <2 x double> undef, double %b, i32 0 + %d = shufflevector <2 x double> %c, <2 x double> undef, <2 x i32> zeroinitializer + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @gather_const_v64f16(<64 x half>* %x) { +; LMULMAX8-LABEL: gather_const_v64f16: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a1, zero, 64 +; LMULMAX8-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; LMULMAX8-NEXT: vle16.v v8, (a0) +; LMULMAX8-NEXT: addi a1, zero, 47 +; LMULMAX8-NEXT: vrgather.vx v16, v8, a1 +; LMULMAX8-NEXT: vse16.v v16, (a0) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: gather_const_v64f16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 80 +; LMULMAX1-NEXT: addi a2, zero, 8 +; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a1) +; LMULMAX1-NEXT: addi a6, a0, 16 +; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a4, a0, 32 +; LMULMAX1-NEXT: addi a5, a0, 64 +; LMULMAX1-NEXT: addi a2, a0, 112 +; LMULMAX1-NEXT: addi a3, a0, 96 +; LMULMAX1-NEXT: vrgather.vi v26, v25, 7 +; LMULMAX1-NEXT: vse16.v v26, (a3) +; LMULMAX1-NEXT: vse16.v v26, (a2) +; LMULMAX1-NEXT: vse16.v v26, (a5) +; LMULMAX1-NEXT: vse16.v v26, (a1) +; LMULMAX1-NEXT: vse16.v v26, (a4) +; LMULMAX1-NEXT: vse16.v v26, (a7) +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: vse16.v v26, (a6) +; LMULMAX1-NEXT: ret + %a = load <64 x half>, <64 x half>* %x + %b = extractelement <64 x half> %a, i32 47 + %c = insertelement <64 x half> undef, half %b, i32 0 + %d = shufflevector <64 x half> %c, <64 x half> undef, <64 x i32> zeroinitializer + store <64 x half> %d, <64 x half>* %x + ret void +} + +define void @gather_const_v32f32(<32 x float>* %x) { +; LMULMAX8-LABEL: gather_const_v32f32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a1, zero, 32 +; LMULMAX8-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; LMULMAX8-NEXT: vle32.v v8, (a0) +; LMULMAX8-NEXT: vrgather.vi v16, v8, 17 +; LMULMAX8-NEXT: vse32.v v16, (a0) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: gather_const_v32f32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 64 +; LMULMAX1-NEXT: addi a2, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a1) +; LMULMAX1-NEXT: addi a6, a0, 16 +; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a4, a0, 32 +; LMULMAX1-NEXT: addi a5, a0, 80 +; LMULMAX1-NEXT: addi a2, a0, 112 +; LMULMAX1-NEXT: addi a3, a0, 96 +; LMULMAX1-NEXT: vrgather.vi v26, v25, 1 +; LMULMAX1-NEXT: vse32.v v26, (a3) +; LMULMAX1-NEXT: vse32.v v26, (a2) +; LMULMAX1-NEXT: vse32.v v26, (a1) +; LMULMAX1-NEXT: vse32.v v26, (a5) +; LMULMAX1-NEXT: vse32.v v26, (a4) +; LMULMAX1-NEXT: vse32.v v26, (a7) +; LMULMAX1-NEXT: vse32.v v26, (a0) +; LMULMAX1-NEXT: vse32.v v26, (a6) +; LMULMAX1-NEXT: ret + %a = load <32 x float>, <32 x float>* %x + %b = extractelement <32 x float> %a, i32 17 + %c = insertelement <32 x float> undef, float %b, i32 0 + %d = shufflevector <32 x float> %c, <32 x float> undef, <32 x i32> zeroinitializer + store <32 x float> %d, <32 x float>* %x + ret void +} + +define void @gather_const_v16f64(<16 x double>* %x) { +; LMULMAX8-LABEL: gather_const_v16f64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a1, zero, 16 +; LMULMAX8-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; LMULMAX8-NEXT: vle64.v v8, (a0) +; LMULMAX8-NEXT: vrgather.vi v16, v8, 10 +; LMULMAX8-NEXT: vse64.v v16, (a0) +; LMULMAX8-NEXT: ret +; +; LMULMAX1-LABEL: gather_const_v16f64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 80 +; LMULMAX1-NEXT: addi a2, zero, 2 +; LMULMAX1-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-NEXT: vle64.v v25, (a1) +; LMULMAX1-NEXT: addi a6, a0, 16 +; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a4, a0, 32 +; LMULMAX1-NEXT: addi a5, a0, 64 +; LMULMAX1-NEXT: addi a2, a0, 112 +; LMULMAX1-NEXT: addi a3, a0, 96 +; LMULMAX1-NEXT: vrgather.vi v26, v25, 0 +; LMULMAX1-NEXT: vse64.v v26, (a3) +; LMULMAX1-NEXT: vse64.v v26, (a2) +; LMULMAX1-NEXT: vse64.v v26, (a5) +; LMULMAX1-NEXT: vse64.v v26, (a1) +; LMULMAX1-NEXT: vse64.v v26, (a4) +; LMULMAX1-NEXT: vse64.v v26, (a7) +; LMULMAX1-NEXT: vse64.v v26, (a0) +; LMULMAX1-NEXT: vse64.v v26, (a6) +; LMULMAX1-NEXT: ret + %a = load <16 x double>, <16 x double>* %x + %b = extractelement <16 x double> %a, i32 10 + %c = insertelement <16 x double> undef, double %b, i32 0 + %d = shufflevector <16 x double> %c, <16 x double> undef, <16 x i32> zeroinitializer + store <16 x double> %d, <16 x double>* %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll @@ -0,0 +1,202 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @gather_const_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: gather_const_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vrgather.vi v26, v25, 12 +; CHECK-NEXT: vse8.v v26, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = extractelement <16 x i8> %a, i32 12 + %c = insertelement <16 x i8> undef, i8 %b, i32 0 + %d = shufflevector <16 x i8> %c, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @gather_const_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: gather_const_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrgather.vi v26, v25, 5 +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = extractelement <8 x i16> %a, i32 5 + %c = insertelement <8 x i16> undef, i16 %b, i32 0 + %d = shufflevector <8 x i16> %c, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @gather_const_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: gather_const_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrgather.vi v26, v25, 3 +; CHECK-NEXT: vse32.v v26, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = extractelement <4 x i32> %a, i32 3 + %c = insertelement <4 x i32> undef, i32 %b, i32 0 + %d = shufflevector <4 x i32> %c, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @gather_const_v2i64(<2 x i64>* %x) { +; CHECK-LABEL: gather_const_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vrgather.vi v26, v25, 1 +; CHECK-NEXT: vse64.v v26, (a0) +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %b = extractelement <2 x i64> %a, i32 1 + %c = insertelement <2 x i64> undef, i64 %b, i32 0 + %d = shufflevector <2 x i64> %c, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %d, <2 x i64>* %x + ret void +} + +define void @gather_const_v64i8(<64 x i8>* %x) { +; LMULMAX4-LABEL: gather_const_v64i8: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: addi a1, zero, 64 +; LMULMAX4-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; LMULMAX4-NEXT: vle8.v v28, (a0) +; LMULMAX4-NEXT: addi a1, zero, 32 +; LMULMAX4-NEXT: vrgather.vx v8, v28, a1 +; LMULMAX4-NEXT: vse8.v v8, (a0) +; LMULMAX4-NEXT: ret +; +; LMULMAX1-LABEL: gather_const_v64i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 32 +; LMULMAX1-NEXT: addi a2, zero, 16 +; LMULMAX1-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a1) +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: addi a3, a0, 48 +; LMULMAX1-NEXT: vrgather.vi v26, v25, 0 +; LMULMAX1-NEXT: vse8.v v26, (a1) +; LMULMAX1-NEXT: vse8.v v26, (a3) +; LMULMAX1-NEXT: vse8.v v26, (a0) +; LMULMAX1-NEXT: vse8.v v26, (a2) +; LMULMAX1-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = extractelement <64 x i8> %a, i32 32 + %c = insertelement <64 x i8> undef, i8 %b, i32 0 + %d = shufflevector <64 x i8> %c, <64 x i8> undef, <64 x i32> zeroinitializer + store <64 x i8> %d, <64 x i8>* %x + ret void +} + +define void @gather_const_v16i16(<32 x i16>* %x) { +; LMULMAX4-LABEL: gather_const_v16i16: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: addi a1, zero, 32 +; LMULMAX4-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; LMULMAX4-NEXT: vle16.v v28, (a0) +; LMULMAX4-NEXT: vrgather.vi v8, v28, 25 +; LMULMAX4-NEXT: vse16.v v8, (a0) +; LMULMAX4-NEXT: ret +; +; LMULMAX1-LABEL: gather_const_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 48 +; LMULMAX1-NEXT: addi a2, zero, 8 +; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a1) +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: addi a3, a0, 32 +; LMULMAX1-NEXT: vrgather.vi v26, v25, 1 +; LMULMAX1-NEXT: vse16.v v26, (a3) +; LMULMAX1-NEXT: vse16.v v26, (a1) +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: vse16.v v26, (a2) +; LMULMAX1-NEXT: ret + %a = load <32 x i16>, <32 x i16>* %x + %b = extractelement <32 x i16> %a, i32 25 + %c = insertelement <32 x i16> undef, i16 %b, i32 0 + %d = shufflevector <32 x i16> %c, <32 x i16> undef, <32 x i32> zeroinitializer + store <32 x i16> %d, <32 x i16>* %x + ret void +} + +define void @gather_const_v16i32(<16 x i32>* %x) { +; LMULMAX4-LABEL: gather_const_v16i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: addi a1, zero, 16 +; LMULMAX4-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; LMULMAX4-NEXT: vle32.v v28, (a0) +; LMULMAX4-NEXT: vrgather.vi v8, v28, 9 +; LMULMAX4-NEXT: vse32.v v8, (a0) +; LMULMAX4-NEXT: ret +; +; LMULMAX1-LABEL: gather_const_v16i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 32 +; LMULMAX1-NEXT: addi a2, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a1) +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: addi a3, a0, 48 +; LMULMAX1-NEXT: vrgather.vi v26, v25, 1 +; LMULMAX1-NEXT: vse32.v v26, (a1) +; LMULMAX1-NEXT: vse32.v v26, (a3) +; LMULMAX1-NEXT: vse32.v v26, (a0) +; LMULMAX1-NEXT: vse32.v v26, (a2) +; LMULMAX1-NEXT: ret + %a = load <16 x i32>, <16 x i32>* %x + %b = extractelement <16 x i32> %a, i32 9 + %c = insertelement <16 x i32> undef, i32 %b, i32 0 + %d = shufflevector <16 x i32> %c, <16 x i32> undef, <16 x i32> zeroinitializer + store <16 x i32> %d, <16 x i32>* %x + ret void +} + +define void @gather_const_v8i64(<8 x i64>* %x) { +; LMULMAX4-LABEL: gather_const_v8i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: addi a1, zero, 8 +; LMULMAX4-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; LMULMAX4-NEXT: vle64.v v28, (a0) +; LMULMAX4-NEXT: vrgather.vi v8, v28, 3 +; LMULMAX4-NEXT: vse64.v v8, (a0) +; LMULMAX4-NEXT: ret +; +; LMULMAX1-LABEL: gather_const_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: addi a2, zero, 2 +; LMULMAX1-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-NEXT: vle64.v v25, (a1) +; LMULMAX1-NEXT: addi a2, a0, 48 +; LMULMAX1-NEXT: addi a3, a0, 32 +; LMULMAX1-NEXT: vrgather.vi v26, v25, 1 +; LMULMAX1-NEXT: vse64.v v26, (a3) +; LMULMAX1-NEXT: vse64.v v26, (a2) +; LMULMAX1-NEXT: vse64.v v26, (a0) +; LMULMAX1-NEXT: vse64.v v26, (a1) +; LMULMAX1-NEXT: ret + %a = load <8 x i64>, <8 x i64>* %x + %b = extractelement <8 x i64> %a, i32 3 + %c = insertelement <8 x i64> undef, i64 %b, i32 0 + %d = shufflevector <8 x i64> %c, <8 x i64> undef, <8 x i32> zeroinitializer + store <8 x i64> %d, <8 x i64>* %x + ret void +}