diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4947,7 +4947,7 @@ DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL); } else { // On RV32, i64-element vectors must be specially handled to place the - // value at element 0, by using two vslide1up instructions in sequence on + // value at element 0, by using two vslide1down instructions in sequence on // the i32 split lo/hi value. Use an equivalently-sized i32 vector for // this. SDValue One = DAG.getConstant(1, DL, XLenVT); @@ -4959,16 +4959,14 @@ getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; // Limit the active VL to two. SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); - // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied - // undef doesn't obey the earlyclobber constraint. Just splat a zero value. - ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, - DAG.getUNDEF(I32ContainerVT), Zero, InsertI64VL); - // First slide in the hi value, then the lo in underneath it. - ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, - DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, + // First slide in the lo value, then the hi in above it. We use slide1down + // to avoid the register group overlap constraint of vslide1up. + ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), + DAG.getUNDEF(I32ContainerVT), ValLo, I32Mask, InsertI64VL); - ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, - DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo, + ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, I32Mask, InsertI64VL); // Bitcast back to the right container type. ValInVec = DAG.getBitcast(ContainerVT, ValInVec); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -508,11 +508,10 @@ ; RV32-LABEL: bitcast_i64_v4i16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vslide1up.vx v9, v8, a1 -; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v9, v8, a1 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 0 +; RV32-NEXT: vslideup.vi v8, v9, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v4i16: @@ -547,11 +546,10 @@ ; RV32-LABEL: bitcast_i64_v2i32: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vslide1up.vx v9, v8, a1 -; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v9, v8, a1 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 0 +; RV32-NEXT: vslideup.vi v8, v9, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v2i32: @@ -586,11 +584,10 @@ ; RV32-LABEL: bitcast_i64_v1i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vslide1up.vx v9, v8, a1 -; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v9, v8, a1 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 0 +; RV32-NEXT: vslideup.vi v8, v9, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll @@ -199,11 +199,10 @@ ; RV32-FP-LABEL: bitcast_i64_v4f16: ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-FP-NEXT: vmv.v.i v8, 0 -; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 -; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 +; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v10, 0 +; RV32-FP-NEXT: vslideup.vi v8, v9, 0 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v4f16: @@ -219,11 +218,10 @@ ; RV32-FP-LABEL: bitcast_i64_v2f32: ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-FP-NEXT: vmv.v.i v8, 0 -; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 -; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 +; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v10, 0 +; RV32-FP-NEXT: vslideup.vi v8, v9, 0 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v2f32: @@ -239,11 +237,10 @@ ; RV32-FP-LABEL: bitcast_i64_v1f64: ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-FP-NEXT: vmv.v.i v8, 0 -; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 -; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 +; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v10, 0 +; RV32-FP-NEXT: vslideup.vi v8, v9, 0 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v1f64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -11,9 +11,8 @@ ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vslide1up.vx v12, v10, a2 -; RV32-NEXT: vslide1up.vx v10, v12, a1 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 3 ; RV32-NEXT: vse64.v v8, (a0) @@ -52,9 +51,8 @@ ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vslide1up.vx v12, v10, a2 -; RV32-NEXT: vslide1up.vx v10, v12, a1 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: sw a1, 16(a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -132,8 +132,6 @@ ; RV32-NEXT: vsetivli zero, 0, e8, mf8, ta, ma ; RV32-NEXT: vmv.x.s a0, v0 ; RV32-NEXT: andi a1, a0, 1 -; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: bnez a1, .LBB5_3 ; RV32-NEXT: # %bb.1: # %else ; RV32-NEXT: andi a0, a0, 2 @@ -142,26 +140,28 @@ ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; RV32-NEXT: .LBB5_3: # %cond.load +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a1, 0(a1) -; RV32-NEXT: vslide1up.vx v11, v10, a2 -; RV32-NEXT: vslide1up.vx v12, v11, a1 +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32-NEXT: vslide1down.vx v10, v8, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vsetivli zero, 1, e64, m1, tu, ma -; RV32-NEXT: vslideup.vi v9, v12, 0 +; RV32-NEXT: vslideup.vi v9, v10, 0 ; RV32-NEXT: andi a0, a0, 2 ; RV32-NEXT: beqz a0, .LBB5_2 ; RV32-NEXT: .LBB5_4: # %cond.load1 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: lw a1, 4(a0) -; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a0, 4(a0) ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vslide1up.vx v8, v10, a1 -; RV32-NEXT: vslide1up.vx v10, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma -; RV32-NEXT: vslideup.vi v9, v10, 1 +; RV32-NEXT: vslideup.vi v9, v8, 1 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -636,9 +636,8 @@ ; CHECK-LABEL: insertelt_nxv1i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vslide1up.vx v10, v9, a1 -; CHECK-NEXT: vslide1up.vx v9, v10, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 0 ; CHECK-NEXT: ret @@ -650,9 +649,8 @@ ; CHECK-LABEL: insertelt_nxv1i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vslide1up.vx v10, v9, a1 -; CHECK-NEXT: vslide1up.vx v9, v10, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret @@ -664,9 +662,8 @@ ; CHECK-LABEL: insertelt_nxv1i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vslide1up.vx v10, v9, a1 -; CHECK-NEXT: vslide1up.vx v9, v10, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: addi a0, a2, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a2 @@ -679,9 +676,8 @@ ; CHECK-LABEL: insertelt_nxv2i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vslide1up.vx v12, v10, a1 -; CHECK-NEXT: vslide1up.vx v10, v12, a0 +; CHECK-NEXT: vslide1down.vx v10, v8, a0 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 ; CHECK-NEXT: vsetivli zero, 1, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 0 ; CHECK-NEXT: ret @@ -693,9 +689,8 @@ ; CHECK-LABEL: insertelt_nxv2i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vslide1up.vx v12, v10, a1 -; CHECK-NEXT: vslide1up.vx v10, v12, a0 +; CHECK-NEXT: vslide1down.vx v10, v8, a0 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -707,9 +702,8 @@ ; CHECK-LABEL: insertelt_nxv2i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vslide1up.vx v12, v10, a1 -; CHECK-NEXT: vslide1up.vx v10, v12, a0 +; CHECK-NEXT: vslide1down.vx v10, v8, a0 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 ; CHECK-NEXT: addi a0, a2, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a2 @@ -722,9 +716,8 @@ ; CHECK-LABEL: insertelt_nxv4i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vslide1up.vx v16, v12, a1 -; CHECK-NEXT: vslide1up.vx v12, v16, a0 +; CHECK-NEXT: vslide1down.vx v12, v8, a0 +; CHECK-NEXT: vslide1down.vx v12, v12, a1 ; CHECK-NEXT: vsetivli zero, 1, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 0 ; CHECK-NEXT: ret @@ -736,9 +729,8 @@ ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vslide1up.vx v16, v12, a1 -; CHECK-NEXT: vslide1up.vx v12, v16, a0 +; CHECK-NEXT: vslide1down.vx v12, v8, a0 +; CHECK-NEXT: vslide1down.vx v12, v12, a1 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -750,9 +742,8 @@ ; CHECK-LABEL: insertelt_nxv4i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vslide1up.vx v16, v12, a1 -; CHECK-NEXT: vslide1up.vx v12, v16, a0 +; CHECK-NEXT: vslide1down.vx v12, v8, a0 +; CHECK-NEXT: vslide1down.vx v12, v12, a1 ; CHECK-NEXT: addi a0, a2, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v12, a2 @@ -765,9 +756,8 @@ ; CHECK-LABEL: insertelt_nxv8i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vslide1up.vx v24, v16, a1 -; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vslide1down.vx v16, v8, a0 +; CHECK-NEXT: vslide1down.vx v16, v16, a1 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 0 ; CHECK-NEXT: ret @@ -779,9 +769,8 @@ ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vslide1up.vx v24, v16, a1 -; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vslide1down.vx v16, v8, a0 +; CHECK-NEXT: vslide1down.vx v16, v16, a1 ; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -793,9 +782,8 @@ ; CHECK-LABEL: insertelt_nxv8i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vslide1up.vx v24, v16, a1 -; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vslide1down.vx v16, v8, a0 +; CHECK-NEXT: vslide1down.vx v16, v16, a1 ; CHECK-NEXT: addi a0, a2, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v16, a2