diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1826,14 +1826,13 @@ bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask; MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); if (!IsSelect) { - bool IsLHS = MaskIndex < (int)NumElts; - // For "undef" elements of -1, shuffle in element 0 instead. - GatherIndicesLHS.push_back( - DAG.getConstant(IsLHS ? std::max(MaskIndex, 0) : 0, DL, XLenVT)); - // TODO: If we're masking out unused elements anyway, it might produce - // better code if we use the most-common element index instead of 0. + bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; + GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 + ? DAG.getConstant(MaskIndex, DL, XLenVT) + : DAG.getUNDEF(XLenVT)); GatherIndicesRHS.push_back( - DAG.getConstant(IsLHS ? 0 : MaskIndex - NumElts, DL, XLenVT)); + IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) + : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); } } diff --git a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +target triple = "riscv64-unknown-unknown-elf" + +define dso_local <16 x i16> @interleave(<8 x i16> %v0, <8 x i16> %v1) { +; CHECK-LABEL: interleave: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m2 def $v8m2 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, mu +; CHECK-NEXT: vmv2r.v v28, v26 +; CHECK-NEXT: vslideup.vi v28, v8, 0 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v30, 0 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vi v28, v30, 8 +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0) +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vrgather.vv v12, v28, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vi v26, v8, 0 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vi v26, v30, 8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrgather.vv v8, v12, v28 +; CHECK-NEXT: lui a0, 11 +; CHECK-NEXT: addiw a0, a0, -1366 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI0_1) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_1) +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu +; CHECK-NEXT: vrgather.vv v8, v26, v28, v0.t +; CHECK-NEXT: ret +entry: + %v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> + %v3 = shufflevector <8 x i16> %v1, <8 x i16> poison, <16 x i32> + %v4 = shufflevector <16 x i16> %v2, <16 x i16> %v3, <16 x i32> + ret <16 x i16> %v4 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -63,28 +63,22 @@ ; LMULMAX2-NEXT: addi a0, zero, 2 ; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-NEXT: vmv.s.x v0, a0 -; LMULMAX2-NEXT: lui a0, %hi(.LCPI1_0) -; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI1_0) ; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-NEXT: vle32.v v27, (a0) +; LMULMAX2-NEXT: vmv.v.i v27, 3 ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; LMULMAX2-NEXT: vrgather.vv v26, v9, v27, v0.t ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; LMULMAX2-NEXT: vrgather.vv v27, v10, v25 +; LMULMAX2-NEXT: vrgather.vv v28, v10, v25 ; LMULMAX2-NEXT: addi a0, zero, 8 ; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-NEXT: vmv.s.x v0, a0 -; LMULMAX2-NEXT: lui a0, %hi(.LCPI1_1) -; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI1_1) -; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-NEXT: vle32.v v25, (a0) -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; LMULMAX2-NEXT: vrgather.vv v27, v11, v25, v0.t +; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, mu +; LMULMAX2-NEXT: vrgather.vv v28, v11, v27, v0.t ; LMULMAX2-NEXT: addi a0, zero, 3 ; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-NEXT: vmv.s.x v0, a0 ; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX2-NEXT: vmerge.vvm v8, v27, v26, v0 +; LMULMAX2-NEXT: vmerge.vvm v8, v28, v26, v0 ; LMULMAX2-NEXT: ret %z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> ret <4 x float> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -133,43 +133,35 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; RV32-NEXT: vslideup.vi v28, v25, 3 ; RV32-NEXT: lui a0, %hi(.LCPI6_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0) -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32-NEXT: vle16.v v25, (a0) ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV32-NEXT: vrgatherei16.vv v26, v8, v25 ; RV32-NEXT: addi a0, zero, 8 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu -; RV32-NEXT: vrgatherei16.vv v26, v10, v28, v0.t +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-NEXT: vmv.v.i v25, 1 +; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; RV32-NEXT: vrgatherei16.vv v26, v10, v25, v0.t ; RV32-NEXT: vmv2r.v v8, v26 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: addi a0, zero, 1 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vmv.s.x v26, a0 -; RV64-NEXT: vmv.v.i v28, 0 -; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu -; RV64-NEXT: vslideup.vi v28, v26, 3 ; RV64-NEXT: lui a0, %hi(.LCPI6_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0) -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64-NEXT: vle64.v v30, (a0) -; RV64-NEXT: vrgather.vv v26, v8, v30 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: vrgather.vv v26, v8, v28 ; RV64-NEXT: addi a0, zero, 8 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, mu +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmv.v.i v28, 1 +; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV64-NEXT: vrgather.vv v26, v10, v28, v0.t ; RV64-NEXT: vmv2r.v v8, v26 ; RV64-NEXT: ret @@ -185,28 +177,31 @@ ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; RV32-NEXT: vle16.v v25, (a0) -; RV32-NEXT: lui a0, %hi(.LCPI7_1) -; RV32-NEXT: addi a0, a0, %lo(.LCPI7_1) -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; RV32-NEXT: lui a0, 16 +; RV32-NEXT: addi a0, a0, 2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu ; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t ; RV32-NEXT: vmv2r.v v8, v26 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_xv_v4f64: ; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 2 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmv.s.x v26, a0 +; RV64-NEXT: vmv.v.i v28, 1 +; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, mu +; RV64-NEXT: vslideup.vi v28, v26, 2 ; RV64-NEXT: addi a0, zero, 12 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vle64.v v28, (a0) -; RV64-NEXT: lui a0, %hi(.LCPI7_1) -; RV64-NEXT: addi a0, a0, %lo(.LCPI7_1) ; RV64-NEXT: vlse64.v v26, (a0), zero ; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV64-NEXT: vrgather.vv v26, v8, v28, v0.t @@ -220,30 +215,27 @@ ; RV32-LABEL: vrgather_shuffle_vx_v4f64: ; RV32: # %bb.0: ; RV32-NEXT: addi a0, zero, 3 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, mu -; RV32-NEXT: vslideup.vi v28, v25, 1 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI8_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI8_0) ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu -; RV32-NEXT: vrgatherei16.vv v26, v8, v28, v0.t +; RV32-NEXT: lui a0, 48 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu +; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t ; RV32-NEXT: vmv2r.v v8, v26 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vx_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: addi a0, zero, 3 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vmv.s.x v26, a0 -; RV64-NEXT: vmv.v.i v28, 0 -; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, mu -; RV64-NEXT: vslideup.vi v28, v26, 1 +; RV64-NEXT: vmv.v.i v28, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; RV64-NEXT: vmv.s.x v28, zero +; RV64-NEXT: addi a0, zero, 3 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, %hi(.LCPI8_0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -85,21 +85,17 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: vrgather_shuffle_vv_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vmv.v.i v26, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; CHECK-NEXT: vslideup.vi v26, v25, 3 ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vle16.v v27, (a0) -; CHECK-NEXT: vrgather.vv v25, v8, v27 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vrgather.vv v25, v8, v26 ; CHECK-NEXT: addi a0, zero, 8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v26, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu ; CHECK-NEXT: vrgather.vv v25, v9, v26, v0.t ; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret @@ -108,20 +104,37 @@ } define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { -; CHECK-LABEL: vrgather_shuffle_xv_v4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 12 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: lui a0, %hi(.LCPI7_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vle16.v v26, (a0) -; CHECK-NEXT: vmv.v.i v25, 5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; CHECK-NEXT: vrgather.vv v25, v8, v26, v0.t -; CHECK-NEXT: vmv1r.v v8, v25 -; CHECK-NEXT: ret +; RV32-LABEL: vrgather_shuffle_xv_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 12 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: lui a0, 16 +; RV32-NEXT: addi a0, a0, 2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-NEXT: vmv.v.i v25, 5 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; RV32-NEXT: vrgather.vv v25, v8, v26, v0.t +; RV32-NEXT: vmv1r.v v8, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vrgather_shuffle_xv_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 12 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: lui a0, 16 +; RV64-NEXT: addiw a0, a0, 2 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV64-NEXT: vmv.v.i v25, 5 +; RV64-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; RV64-NEXT: vrgather.vv v25, v8, v26, v0.t +; RV64-NEXT: vmv1r.v v8, v25 +; RV64-NEXT: ret %s = shufflevector <4 x i16> , <4 x i16> %x, <4 x i32> ret <4 x i16> %s } @@ -130,13 +143,11 @@ ; CHECK-LABEL: vrgather_shuffle_vx_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, zero, 3 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vmv.v.i v26, 0 -; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu -; CHECK-NEXT: vslideup.vi v26, v25, 1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: lui a0, 48 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v25, 5 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu @@ -203,12 +214,7 @@ ; RV32-NEXT: addi a0, zero, 5 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: addi a0, zero, 36 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vmerge.vim v26, v26, 2, v0 +; RV32-NEXT: vmv.v.i v26, 2 ; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, mu ; RV32-NEXT: vslideup.vi v26, v25, 7 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) @@ -230,12 +236,7 @@ ; RV64-NEXT: addi a0, zero, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.s.x v28, a0 -; RV64-NEXT: addi a0, zero, 36 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vmv.v.i v16, 0 -; RV64-NEXT: vmerge.vim v16, v16, 2, v0 +; RV64-NEXT: vmv.v.i v16, 2 ; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, mu ; RV64-NEXT: vslideup.vi v16, v28, 7 ; RV64-NEXT: lui a0, %hi(.LCPI11_0) @@ -257,16 +258,6 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-LABEL: vrgather_shuffle_xv_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 6 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: addi a0, zero, 4 -; RV32-NEXT: vmv.s.x v26, a0 -; RV32-NEXT: vmv.v.i v27, 0 -; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; RV32-NEXT: vslideup.vi v27, v26, 5 -; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, mu -; RV32-NEXT: vslideup.vi v27, v25, 6 ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu @@ -277,30 +268,27 @@ ; RV32-NEXT: addi a0, zero, 113 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e64, m4, tu, mu -; RV32-NEXT: vrgatherei16.vv v28, v8, v27, v0.t +; RV32-NEXT: lui a0, %hi(.LCPI12_1) +; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, mu +; RV32-NEXT: vrgatherei16.vv v28, v8, v25, v0.t ; RV32-NEXT: vmv4r.v v8, v28 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_xv_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a0, zero, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vmv.s.x v28, a0 -; RV64-NEXT: addi a0, zero, 4 -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vmv.v.i v16, 0 -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, mu -; RV64-NEXT: vslideup.vi v16, v12, 5 -; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, mu -; RV64-NEXT: vslideup.vi v16, v28, 6 ; RV64-NEXT: addi a0, zero, 113 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: lui a0, %hi(.LCPI12_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0) ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vle64.v v12, (a0) ; RV64-NEXT: vmv.v.i v28, -1 ; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, mu -; RV64-NEXT: vrgather.vv v28, v8, v16, v0.t +; RV64-NEXT: vrgather.vv v28, v8, v12, v0.t ; RV64-NEXT: vmv4r.v v8, v28 ; RV64-NEXT: ret %s = shufflevector <8 x i64> , <8 x i64> %x, <8 x i32> @@ -355,11 +343,10 @@ ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vrgather.vi v25, v8, 1 -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: vmv.s.x v26, a1 -; CHECK-NEXT: vmv.v.i v27, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, mu -; CHECK-NEXT: vslideup.vi v27, v26, 3 +; CHECK-NEXT: vmv.s.x v26, zero +; CHECK-NEXT: vmv.v.i v27, 1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v27, v26, 1 ; CHECK-NEXT: addi a1, zero, 10 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll --- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll @@ -28,10 +28,8 @@ ; RV64-1024-NEXT: vslideup.vi v28, v8, 0 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, tu, mu ; RV64-1024-NEXT: vslideup.vx v28, v12, a3 -; RV64-1024-NEXT: lui a2, %hi(.LCPI0_1) -; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_1) ; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64-1024-NEXT: vle16.v v12, (a2) +; RV64-1024-NEXT: vid.v v12 ; RV64-1024-NEXT: vrgather.vv v8, v24, v12 ; RV64-1024-NEXT: lui a2, 1026731 ; RV64-1024-NEXT: addiw a2, a2, -1365 @@ -50,8 +48,8 @@ ; RV64-1024-NEXT: vslideup.vi v0, v25, 2 ; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, tu, mu ; RV64-1024-NEXT: vslideup.vi v0, v25, 3 -; RV64-1024-NEXT: lui a2, %hi(.LCPI0_2) -; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_2) +; RV64-1024-NEXT: lui a2, %hi(.LCPI0_1) +; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_1) ; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; RV64-1024-NEXT: vle16.v v12, (a2) ; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, tu, mu @@ -85,10 +83,8 @@ ; RV64-2048-NEXT: vslideup.vi v26, v30, 0 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, tu, mu ; RV64-2048-NEXT: vslideup.vx v26, v28, a3 -; RV64-2048-NEXT: lui a2, %hi(.LCPI0_1) -; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI0_1) ; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: vle16.v v28, (a2) +; RV64-2048-NEXT: vid.v v28 ; RV64-2048-NEXT: vrgather.vv v30, v12, v28 ; RV64-2048-NEXT: lui a2, 1026731 ; RV64-2048-NEXT: addiw a2, a2, -1365 @@ -107,8 +103,8 @@ ; RV64-2048-NEXT: vslideup.vi v0, v25, 2 ; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, tu, mu ; RV64-2048-NEXT: vslideup.vi v0, v25, 3 -; RV64-2048-NEXT: lui a2, %hi(.LCPI0_2) -; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI0_2) +; RV64-2048-NEXT: lui a2, %hi(.LCPI0_1) +; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI0_1) ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; RV64-2048-NEXT: vle16.v v28, (a2) ; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, tu, mu @@ -177,10 +173,8 @@ ; RV64-1024-NEXT: addi a2, sp, 16 ; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload ; RV64-1024-NEXT: vslideup.vx v8, v16, a3 -; RV64-1024-NEXT: lui a2, %hi(.LCPI1_1) -; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_1) ; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; RV64-1024-NEXT: vle16.v v24, (a2) +; RV64-1024-NEXT: vid.v v24 ; RV64-1024-NEXT: csrr a2, vlenb ; RV64-1024-NEXT: slli a2, a2, 3 ; RV64-1024-NEXT: add a2, sp, a2 @@ -212,8 +206,8 @@ ; RV64-1024-NEXT: vslideup.vi v0, v25, 6 ; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, tu, mu ; RV64-1024-NEXT: vslideup.vi v0, v25, 7 -; RV64-1024-NEXT: lui a2, %hi(.LCPI1_2) -; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_2) +; RV64-1024-NEXT: lui a2, %hi(.LCPI1_1) +; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_1) ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: vle16.v v24, (a2) ; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, tu, mu @@ -252,10 +246,8 @@ ; RV64-2048-NEXT: vslideup.vi v28, v8, 0 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, tu, mu ; RV64-2048-NEXT: vslideup.vx v28, v12, a3 -; RV64-2048-NEXT: lui a2, %hi(.LCPI1_1) -; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_1) ; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64-2048-NEXT: vle16.v v12, (a2) +; RV64-2048-NEXT: vid.v v12 ; RV64-2048-NEXT: vrgather.vv v8, v24, v12 ; RV64-2048-NEXT: lui a2, 1026731 ; RV64-2048-NEXT: addiw a2, a2, -1365 @@ -282,8 +274,8 @@ ; RV64-2048-NEXT: vslideup.vi v0, v25, 6 ; RV64-2048-NEXT: vsetivli zero, 8, e64, m1, tu, mu ; RV64-2048-NEXT: vslideup.vi v0, v25, 7 -; RV64-2048-NEXT: lui a2, %hi(.LCPI1_2) -; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_2) +; RV64-2048-NEXT: lui a2, %hi(.LCPI1_1) +; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_1) ; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; RV64-2048-NEXT: vle16.v v12, (a2) ; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, tu, mu