diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7798,8 +7798,9 @@ // Then perform the interleave // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... + SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG); Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT, - Concat, Idx, DAG.getUNDEF(ConcatVT), OddMask, VL); + Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL); } // Extract the two halves from the interleaved result diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -80,7 +80,7 @@ ; CHECK-NEXT: vsrl.vi v16, v12, 1 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv4i64( %a, %b) @@ -173,44 +173,32 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv8r.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu ; CHECK-NEXT: vid.v v24 ; CHECK-NEXT: vand.vi v26, v24, 1 -; CHECK-NEXT: vmsne.vi v0, v26, 0 -; CHECK-NEXT: vsrl.vi v2, v24, 1 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vx v2, v2, a0, v0.t -; CHECK-NEXT: vmv4r.v v12, v16 +; CHECK-NEXT: vmsne.vi v10, v26, 0 +; CHECK-NEXT: vsrl.vi v8, v24, 1 +; CHECK-NEXT: vmv8r.v v24, v0 +; CHECK-NEXT: vmv4r.v v12, v4 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv4r.v v28, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vrgatherei16.vv v24, v8, v2, v0.t +; CHECK-NEXT: vrgatherei16.vv v0, v24, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v16, v12 -; CHECK-NEXT: vrgatherei16.vv v24, v16, v2, v0.t -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vrgatherei16.vv v24, v16, v8 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -310,7 +298,7 @@ ; CHECK-NEXT: vsrl.vi v16, v12, 1 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv4f64( %a, %b) @@ -363,44 +351,32 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv8r.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu ; CHECK-NEXT: vid.v v24 ; CHECK-NEXT: vand.vi v26, v24, 1 -; CHECK-NEXT: vmsne.vi v0, v26, 0 -; CHECK-NEXT: vsrl.vi v2, v24, 1 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vx v2, v2, a0, v0.t -; CHECK-NEXT: vmv4r.v v12, v16 +; CHECK-NEXT: vmsne.vi v10, v26, 0 +; CHECK-NEXT: vsrl.vi v8, v24, 1 +; CHECK-NEXT: vmv8r.v v24, v0 +; CHECK-NEXT: vmv4r.v v12, v4 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv4r.v v28, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vrgatherei16.vv v24, v8, v2, v0.t +; CHECK-NEXT: vrgatherei16.vv v0, v24, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v16, v12 -; CHECK-NEXT: vrgatherei16.vv v24, v16, v2, v0.t -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vrgatherei16.vv v24, v16, v8 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret