diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3504,6 +3504,10 @@ return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); } } + // If the hi bits of the splat are undefined, then it's fine to just splat Lo + // even if it might be sign extended. + if (Hi.isUndef()) + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); // Fall back to a stack store and stride x0 vector load. return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, @@ -6690,6 +6694,12 @@ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo, DAG.getRegister(RISCV::X0, MVT::i32)); + // If the hi bits of the splat are undefined, then it's fine to just splat Lo + // even if it might be sign extended. + if (Hi.isUndef()) + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo, + DAG.getRegister(RISCV::X0, MVT::i32)); + // Fall back to use a stack store and stride x0 vector load. Use X0 as VL. return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo, Hi, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll @@ -916,27 +916,19 @@ define <1 x i64> @vrol_vx_v1i64(<1 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vrol_vx_v1i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: sw a0, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 -; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero ; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: vmv.v.i v9, 0 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vv v11, v10, v9 -; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsub.vx v10, v9, a0 +; CHECK-RV32-NEXT: li a1, 63 ; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; CHECK-RV32-NEXT: vmv.s.x v10, a0 +; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-RV32-NEXT: vand.vv v11, v11, v10 -; CHECK-RV32-NEXT: vsrl.vv v11, v8, v11 -; CHECK-RV32-NEXT: vand.vv v9, v9, v10 +; CHECK-RV32-NEXT: vand.vv v10, v10, v9 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 ; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 -; CHECK-RV32-NEXT: vor.vv v8, v8, v11 -; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vrol_vx_v1i64: @@ -952,24 +944,11 @@ ; CHECK-RV64-NEXT: vor.vv v8, v10, v8 ; CHECK-RV64-NEXT: ret ; -; CHECK-ZVBB32-LABEL: vrol_vx_v1i64: -; CHECK-ZVBB32: # %bb.0: -; CHECK-ZVBB32-NEXT: addi sp, sp, -16 -; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-ZVBB32-NEXT: sw a0, 12(sp) -; CHECK-ZVBB32-NEXT: sw a0, 8(sp) -; CHECK-ZVBB32-NEXT: addi a0, sp, 8 -; CHECK-ZVBB32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-ZVBB32-NEXT: vlse64.v v9, (a0), zero -; CHECK-ZVBB32-NEXT: vrol.vv v8, v8, v9 -; CHECK-ZVBB32-NEXT: addi sp, sp, 16 -; CHECK-ZVBB32-NEXT: ret -; -; CHECK-ZVBB64-LABEL: vrol_vx_v1i64: -; CHECK-ZVBB64: # %bb.0: -; CHECK-ZVBB64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-ZVBB64-NEXT: vrol.vx v8, v8, a0 -; CHECK-ZVBB64-NEXT: ret +; CHECK-ZVBB-LABEL: vrol_vx_v1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret %b.head = insertelement <1 x i64> poison, i64 %b, i32 0 %b.splat = shufflevector <1 x i64> %b.head, <1 x i64> poison, <1 x i32> zeroinitializer %x = call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> %b.splat) @@ -1018,24 +997,18 @@ define <2 x i64> @vrol_vx_v2i64(<2 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vrol_vx_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: sw a0, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v10, v9, a0 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v9, a0 +; CHECK-RV32-NEXT: li a1, 63 +; CHECK-RV32-NEXT: vand.vx v9, v9, a1 +; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 ; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v11, 0 +; CHECK-RV32-NEXT: vmv.v.i v10, 0 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vv v9, v11, v9 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 -; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 +; CHECK-RV32-NEXT: vand.vx v10, v10, a1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v9, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vrol_vx_v2i64: @@ -1051,24 +1024,11 @@ ; CHECK-RV64-NEXT: vor.vv v8, v10, v8 ; CHECK-RV64-NEXT: ret ; -; CHECK-ZVBB32-LABEL: vrol_vx_v2i64: -; CHECK-ZVBB32: # %bb.0: -; CHECK-ZVBB32-NEXT: addi sp, sp, -16 -; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-ZVBB32-NEXT: sw a0, 12(sp) -; CHECK-ZVBB32-NEXT: sw a0, 8(sp) -; CHECK-ZVBB32-NEXT: addi a0, sp, 8 -; CHECK-ZVBB32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-ZVBB32-NEXT: vlse64.v v9, (a0), zero -; CHECK-ZVBB32-NEXT: vrol.vv v8, v8, v9 -; CHECK-ZVBB32-NEXT: addi sp, sp, 16 -; CHECK-ZVBB32-NEXT: ret -; -; CHECK-ZVBB64-LABEL: vrol_vx_v2i64: -; CHECK-ZVBB64: # %bb.0: -; CHECK-ZVBB64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-ZVBB64-NEXT: vrol.vx v8, v8, a0 -; CHECK-ZVBB64-NEXT: ret +; CHECK-ZVBB-LABEL: vrol_vx_v2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret %b.head = insertelement <2 x i64> poison, i64 %b, i32 0 %b.splat = shufflevector <2 x i64> %b.head, <2 x i64> poison, <2 x i32> zeroinitializer %x = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %b.splat) @@ -1117,24 +1077,18 @@ define <4 x i64> @vrol_vx_v4i64(<4 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vrol_vx_v4i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: sw a0, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v12, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: li a1, 63 +; CHECK-RV32-NEXT: vand.vx v10, v10, a1 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 ; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v14, 0 +; CHECK-RV32-NEXT: vmv.v.i v12, 0 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vv v10, v14, v10 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 -; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 +; CHECK-RV32-NEXT: vand.vx v12, v12, a1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vrol_vx_v4i64: @@ -1150,24 +1104,11 @@ ; CHECK-RV64-NEXT: vor.vv v8, v12, v8 ; CHECK-RV64-NEXT: ret ; -; CHECK-ZVBB32-LABEL: vrol_vx_v4i64: -; CHECK-ZVBB32: # %bb.0: -; CHECK-ZVBB32-NEXT: addi sp, sp, -16 -; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-ZVBB32-NEXT: sw a0, 12(sp) -; CHECK-ZVBB32-NEXT: sw a0, 8(sp) -; CHECK-ZVBB32-NEXT: addi a0, sp, 8 -; CHECK-ZVBB32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB32-NEXT: vlse64.v v10, (a0), zero -; CHECK-ZVBB32-NEXT: vrol.vv v8, v8, v10 -; CHECK-ZVBB32-NEXT: addi sp, sp, 16 -; CHECK-ZVBB32-NEXT: ret -; -; CHECK-ZVBB64-LABEL: vrol_vx_v4i64: -; CHECK-ZVBB64: # %bb.0: -; CHECK-ZVBB64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB64-NEXT: vrol.vx v8, v8, a0 -; CHECK-ZVBB64-NEXT: ret +; CHECK-ZVBB-LABEL: vrol_vx_v4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret %b.head = insertelement <4 x i64> poison, i64 %b, i32 0 %b.splat = shufflevector <4 x i64> %b.head, <4 x i64> poison, <4 x i32> zeroinitializer %x = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> %b.splat) @@ -1216,24 +1157,18 @@ define <8 x i64> @vrol_vx_v8i64(<8 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vrol_vx_v8i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: sw a0, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v16, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v16, v8, v16 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: li a1, 63 +; CHECK-RV32-NEXT: vand.vx v12, v12, a1 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 ; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v20, 0 +; CHECK-RV32-NEXT: vmv.v.i v16, 0 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vv v12, v20, v12 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v16, v8 -; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: vsub.vx v16, v16, a0 +; CHECK-RV32-NEXT: vand.vx v16, v16, a1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vrol_vx_v8i64: @@ -1249,27 +1184,17 @@ ; CHECK-RV64-NEXT: vor.vv v8, v16, v8 ; CHECK-RV64-NEXT: ret ; -; CHECK-ZVBB32-LABEL: vrol_vx_v8i64: -; CHECK-ZVBB32: # %bb.0: -; CHECK-ZVBB32-NEXT: addi sp, sp, -16 -; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-ZVBB32-NEXT: sw a0, 12(sp) -; CHECK-ZVBB32-NEXT: sw a0, 8(sp) -; CHECK-ZVBB32-NEXT: addi a0, sp, 8 -; CHECK-ZVBB32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-ZVBB32-NEXT: vlse64.v v12, (a0), zero -; CHECK-ZVBB32-NEXT: vrol.vv v8, v8, v12 -; CHECK-ZVBB32-NEXT: addi sp, sp, 16 -; CHECK-ZVBB32-NEXT: ret -; -; CHECK-ZVBB64-LABEL: vrol_vx_v8i64: -; CHECK-ZVBB64: # %bb.0: -; CHECK-ZVBB64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-ZVBB64-NEXT: vrol.vx v8, v8, a0 -; CHECK-ZVBB64-NEXT: ret +; CHECK-ZVBB-LABEL: vrol_vx_v8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret %b.head = insertelement <8 x i64> poison, i64 %b, i32 0 %b.splat = shufflevector <8 x i64> %b.head, <8 x i64> poison, <8 x i32> zeroinitializer %x = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> %b.splat) ret <8 x i64> %x } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-ZVBB32: {{.*}} +; CHECK-ZVBB64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll @@ -1595,27 +1595,19 @@ define <1 x i64> @vror_vx_v1i64(<1 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vror_vx_v1i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: sw a0, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 -; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero ; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: vmv.v.i v9, 0 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vv v11, v10, v9 -; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsub.vx v10, v9, a0 +; CHECK-RV32-NEXT: li a1, 63 ; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; CHECK-RV32-NEXT: vmv.s.x v10, a0 +; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-RV32-NEXT: vand.vv v11, v11, v10 -; CHECK-RV32-NEXT: vsll.vv v11, v8, v11 -; CHECK-RV32-NEXT: vand.vv v9, v9, v10 +; CHECK-RV32-NEXT: vand.vv v10, v10, v9 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 ; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 -; CHECK-RV32-NEXT: vor.vv v8, v8, v11 -; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vx_v1i64: @@ -1631,24 +1623,11 @@ ; CHECK-RV64-NEXT: vor.vv v8, v10, v8 ; CHECK-RV64-NEXT: ret ; -; CHECK-ZVBB32-LABEL: vror_vx_v1i64: -; CHECK-ZVBB32: # %bb.0: -; CHECK-ZVBB32-NEXT: addi sp, sp, -16 -; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-ZVBB32-NEXT: sw a0, 12(sp) -; CHECK-ZVBB32-NEXT: sw a0, 8(sp) -; CHECK-ZVBB32-NEXT: addi a0, sp, 8 -; CHECK-ZVBB32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-ZVBB32-NEXT: vlse64.v v9, (a0), zero -; CHECK-ZVBB32-NEXT: vror.vv v8, v8, v9 -; CHECK-ZVBB32-NEXT: addi sp, sp, 16 -; CHECK-ZVBB32-NEXT: ret -; -; CHECK-ZVBB64-LABEL: vror_vx_v1i64: -; CHECK-ZVBB64: # %bb.0: -; CHECK-ZVBB64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-ZVBB64-NEXT: vror.vx v8, v8, a0 -; CHECK-ZVBB64-NEXT: ret +; CHECK-ZVBB-LABEL: vror_vx_v1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret %b.head = insertelement <1 x i64> poison, i64 %b, i32 0 %b.splat = shufflevector <1 x i64> %b.head, <1 x i64> poison, <1 x i32> zeroinitializer %x = call <1 x i64> @llvm.fshr.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> %b.splat) @@ -1772,24 +1751,18 @@ define <2 x i64> @vror_vx_v2i64(<2 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vror_vx_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: sw a0, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v10, v9, a0 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v9, a0 +; CHECK-RV32-NEXT: li a1, 63 +; CHECK-RV32-NEXT: vand.vx v9, v9, a1 +; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 ; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v11, 0 +; CHECK-RV32-NEXT: vmv.v.i v10, 0 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vv v9, v11, v9 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 -; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 +; CHECK-RV32-NEXT: vand.vx v10, v10, a1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v9, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vx_v2i64: @@ -1805,24 +1778,11 @@ ; CHECK-RV64-NEXT: vor.vv v8, v10, v8 ; CHECK-RV64-NEXT: ret ; -; CHECK-ZVBB32-LABEL: vror_vx_v2i64: -; CHECK-ZVBB32: # %bb.0: -; CHECK-ZVBB32-NEXT: addi sp, sp, -16 -; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-ZVBB32-NEXT: sw a0, 12(sp) -; CHECK-ZVBB32-NEXT: sw a0, 8(sp) -; CHECK-ZVBB32-NEXT: addi a0, sp, 8 -; CHECK-ZVBB32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-ZVBB32-NEXT: vlse64.v v9, (a0), zero -; CHECK-ZVBB32-NEXT: vror.vv v8, v8, v9 -; CHECK-ZVBB32-NEXT: addi sp, sp, 16 -; CHECK-ZVBB32-NEXT: ret -; -; CHECK-ZVBB64-LABEL: vror_vx_v2i64: -; CHECK-ZVBB64: # %bb.0: -; CHECK-ZVBB64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-ZVBB64-NEXT: vror.vx v8, v8, a0 -; CHECK-ZVBB64-NEXT: ret +; CHECK-ZVBB-LABEL: vror_vx_v2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret %b.head = insertelement <2 x i64> poison, i64 %b, i32 0 %b.splat = shufflevector <2 x i64> %b.head, <2 x i64> poison, <2 x i32> zeroinitializer %x = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %b.splat) @@ -1944,24 +1904,18 @@ define <4 x i64> @vror_vx_v4i64(<4 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vror_vx_v4i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: sw a0, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v12, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: li a1, 63 +; CHECK-RV32-NEXT: vand.vx v10, v10, a1 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 ; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v14, 0 +; CHECK-RV32-NEXT: vmv.v.i v12, 0 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vv v10, v14, v10 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 -; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 +; CHECK-RV32-NEXT: vand.vx v12, v12, a1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vx_v4i64: @@ -1977,24 +1931,11 @@ ; CHECK-RV64-NEXT: vor.vv v8, v12, v8 ; CHECK-RV64-NEXT: ret ; -; CHECK-ZVBB32-LABEL: vror_vx_v4i64: -; CHECK-ZVBB32: # %bb.0: -; CHECK-ZVBB32-NEXT: addi sp, sp, -16 -; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-ZVBB32-NEXT: sw a0, 12(sp) -; CHECK-ZVBB32-NEXT: sw a0, 8(sp) -; CHECK-ZVBB32-NEXT: addi a0, sp, 8 -; CHECK-ZVBB32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB32-NEXT: vlse64.v v10, (a0), zero -; CHECK-ZVBB32-NEXT: vror.vv v8, v8, v10 -; CHECK-ZVBB32-NEXT: addi sp, sp, 16 -; CHECK-ZVBB32-NEXT: ret -; -; CHECK-ZVBB64-LABEL: vror_vx_v4i64: -; CHECK-ZVBB64: # %bb.0: -; CHECK-ZVBB64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB64-NEXT: vror.vx v8, v8, a0 -; CHECK-ZVBB64-NEXT: ret +; CHECK-ZVBB-LABEL: vror_vx_v4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret %b.head = insertelement <4 x i64> poison, i64 %b, i32 0 %b.splat = shufflevector <4 x i64> %b.head, <4 x i64> poison, <4 x i32> zeroinitializer %x = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> %b.splat) @@ -2116,24 +2057,18 @@ define <8 x i64> @vror_vx_v8i64(<8 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vror_vx_v8i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: sw a0, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v16, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: li a1, 63 +; CHECK-RV32-NEXT: vand.vx v12, v12, a1 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 ; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v20, 0 +; CHECK-RV32-NEXT: vmv.v.i v16, 0 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vv v12, v20, v12 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v16, v8 -; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: vsub.vx v16, v16, a0 +; CHECK-RV32-NEXT: vand.vx v16, v16, a1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vx_v8i64: @@ -2149,24 +2084,11 @@ ; CHECK-RV64-NEXT: vor.vv v8, v16, v8 ; CHECK-RV64-NEXT: ret ; -; CHECK-ZVBB32-LABEL: vror_vx_v8i64: -; CHECK-ZVBB32: # %bb.0: -; CHECK-ZVBB32-NEXT: addi sp, sp, -16 -; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-ZVBB32-NEXT: sw a0, 12(sp) -; CHECK-ZVBB32-NEXT: sw a0, 8(sp) -; CHECK-ZVBB32-NEXT: addi a0, sp, 8 -; CHECK-ZVBB32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-ZVBB32-NEXT: vlse64.v v12, (a0), zero -; CHECK-ZVBB32-NEXT: vror.vv v8, v8, v12 -; CHECK-ZVBB32-NEXT: addi sp, sp, 16 -; CHECK-ZVBB32-NEXT: ret -; -; CHECK-ZVBB64-LABEL: vror_vx_v8i64: -; CHECK-ZVBB64: # %bb.0: -; CHECK-ZVBB64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-ZVBB64-NEXT: vror.vx v8, v8, a0 -; CHECK-ZVBB64-NEXT: ret +; CHECK-ZVBB-LABEL: vror_vx_v8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret %b.head = insertelement <8 x i64> poison, i64 %b, i32 0 %b.splat = shufflevector <8 x i64> %b.head, <8 x i64> poison, <8 x i32> zeroinitializer %x = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> %b.splat) @@ -2245,3 +2167,6 @@ ret <8 x i64> %x } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-ZVBB32: {{.*}} +; CHECK-ZVBB64: {{.*}}