diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -296,9 +296,14 @@ Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. if (IsStridedOrIndexed) { - Operands.push_back(Node->getOperand(CurOp++)); // Index. + SDValue StrideOrIndex = Node->getOperand(CurOp++); + if (IndexVT && StrideOrIndex.hasOneUse() && + (StrideOrIndex.getOpcode() == ISD::SIGN_EXTEND || + StrideOrIndex.getOpcode() == ISD::ZERO_EXTEND)) + StrideOrIndex = StrideOrIndex.getOperand(0); + Operands.push_back(StrideOrIndex); // Index. if (IndexVT) - *IndexVT = Operands.back()->getSimpleValueType(0); + *IndexVT = StrideOrIndex->getSimpleValueType(0); } if (IsMasked) { diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -247,23 +247,12 @@ } define @mgather_baseidx_nxv8i8(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_nxv8i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf4 v12, v8 -; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32-NEXT: vluxei32.v v9, (a0), v12, v0.t -; RV32-NEXT: vmv.v.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_nxv8i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v9, (a0), v16, v0.t -; RV64-NEXT: vmv.v.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vluxei8.v v9, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs %v = call @llvm.masked.gather.nxv8i8.nxv8p0( %ptrs, i32 1, %m, %passthru) ret %v @@ -2171,27 +2160,21 @@ define @mgather_baseidx_nxv16i8(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv16i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t +; RV32-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; RV32-NEXT: vluxei8.v v10, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_nxv16i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; RV64-NEXT: vluxei8.v v10, (a0), v8, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v9 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; RV64-NEXT: vluxei8.v v11, (a0), v9, v0.t ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs @@ -2204,49 +2187,37 @@ define @mgather_baseidx_nxv32i8(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv32i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; RV32-NEXT: vluxei8.v v12, (a0), v8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v16, v10 -; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vluxei32.v v14, (a0), v16, v0.t +; RV32-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; RV32-NEXT: vluxei8.v v14, (a0), v10, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_nxv32i8: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v16, v0 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; RV64-NEXT: vluxei8.v v12, (a0), v8, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v9 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t +; RV64-NEXT: vsetvli a3, zero, e8, m1, ta, mu +; RV64-NEXT: vluxei8.v v13, (a0), v9, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vx v0, v16, a1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; RV64-NEXT: vluxei8.v v14, (a0), v10, v0.t ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v11 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; RV64-NEXT: vluxei8.v v15, (a0), v11, v0.t ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -172,21 +172,11 @@ } define void @mscatter_baseidx_nxv8i8( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_nxv8i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf4 v12, v9 -; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_nxv8i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v9 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs call void @llvm.masked.scatter.nxv8i8.nxv8p0( %val, %ptrs, i32 1, %m) ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.gather.nxv1i8.nxv1p0(, , i32) @@ -231,21 +231,11 @@ } define @vpgather_baseidx_nxv8i8(ptr %base, %idxs, %m, i32 zeroext %evl) { -; RV32-LABEL: vpgather_baseidx_nxv8i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf4 v12, v8 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vpgather_baseidx_nxv8i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vpgather_baseidx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vluxei8.v v8, (a0), v8, v0.t +; CHECK-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs %v = call @llvm.vp.gather.nxv8i8.nxv8p0( %ptrs, %m, i32 %evl) ret %v @@ -266,19 +256,15 @@ ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v16, v10 ; RV32-NEXT: vsetvli zero, a4, e8, m2, ta, ma -; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t +; RV32-NEXT: vluxei8.v v10, (a0), v10, v0.t ; RV32-NEXT: bltu a1, a2, .LBB12_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB12_2: -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v16, v8 ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vluxei8.v v8, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv32i8: @@ -298,11 +284,9 @@ ; RV64-NEXT: srli a6, a2, 2 ; RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vx v12, v13, a6 -; RV64-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetvli zero, a5, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vluxei8.v v10, (a0), v10, v0.t ; RV64-NEXT: bltu a1, a4, .LBB12_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a1, a4 @@ -314,29 +298,23 @@ ; RV64-NEXT: srli a4, a2, 3 ; RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v13, a4 -; RV64-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v9 ; RV64-NEXT: vsetvli zero, a5, e8, m1, ta, ma -; RV64-NEXT: vluxei64.v v9, (a0), v16, v0.t +; RV64-NEXT: vluxei8.v v9, (a0), v9, v0.t ; RV64-NEXT: bltu a1, a2, .LBB12_6 ; RV64-NEXT: # %bb.5: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB12_6: -; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v0, v13 -; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vluxei8.v v8, (a0), v8, v0.t ; RV64-NEXT: sub a1, a3, a2 ; RV64-NEXT: sltu a2, a3, a1 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v12, a4 -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t +; RV64-NEXT: vluxei8.v v11, (a0), v11, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs %v = call @llvm.vp.gather.nxv32i8.nxv32p0( %ptrs, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare void @llvm.vp.scatter.nxv1i8.nxv1p0(, , , i32) @@ -170,21 +170,11 @@ } define void @vpscatter_baseidx_nxv8i8( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { -; RV32-LABEL: vpscatter_baseidx_nxv8i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf4 v12, v9 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vpscatter_baseidx_nxv8i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v9 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vpscatter_baseidx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsoxei8.v v8, (a0), v9, v0.t +; CHECK-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs call void @llvm.vp.scatter.nxv8i8.nxv8p0( %val, %ptrs, %m, i32 %evl) ret void