diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13664,6 +13664,24 @@ if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) return V; break; + case RISCVISD::ROTR_VL: + case RISCVISD::ROTL_VL: { + // An i16 bitrotate of 8 in either direction is equivalent to a swapping the + // bytes (bswap). This is normally caught by a generic ISD::ROT{L,R} + // combine, but on fixed vectors they are legalized before they can be + // combined, so handle it later here too. + EVT VT = N->getValueType(0); + if (VT.getScalarType() == MVT::i16 && + // The splat of 8 will have been legalized to a vmv_v_x_vl. + N->getOperand(1).getOpcode() == RISCVISD::VMV_V_X_VL && + N->getOperand(1).getOperand(0).isUndef() && + isa(N->getOperand(1).getOperand(1)) && + N->getOperand(1).getConstantOperandVal(1) == 8) { + return DAG.getNode(RISCVISD::BSWAP_VL, SDLoc(N), VT, N->getOperand(0), + N->getOperand(2), N->getOperand(3), N->getOperand(4)); + } + break; + } case RISCVISD::VMV_V_X_VL: { // Tail agnostic VMV.V.X only demands the vector element bitwidth from the // scalar input. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -169,13 +169,19 @@ } define <2 x i8> @reverse_v2i8(<2 x i8> %a) { -; CHECK-LABEL: reverse_v2i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; NO-ZVBB-LABEL: reverse_v2i8: +; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 +; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 +; NO-ZVBB-NEXT: vmv1r.v v8, v9 +; NO-ZVBB-NEXT: ret +; +; ZVBB-LABEL: reverse_v2i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVBB-NEXT: vrev8.v v8, v8 +; ZVBB-NEXT: ret %res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a) ret <2 x i8> %res } @@ -258,13 +264,19 @@ } define <2 x i16> @reverse_v2i16(<2 x i16> %a) { -; CHECK-LABEL: reverse_v2i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; NO-ZVBB-LABEL: reverse_v2i16: +; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 +; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 +; NO-ZVBB-NEXT: vmv1r.v v8, v9 +; NO-ZVBB-NEXT: ret +; +; ZVBB-LABEL: reverse_v2i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVBB-NEXT: vror.vi v8, v8, 16 +; ZVBB-NEXT: ret %res = call <2 x i16> @llvm.experimental.vector.reverse.v2i16(<2 x i16> %a) ret <2 x i16> %res } @@ -332,13 +344,19 @@ } define <2 x i32> @reverse_v2i32(<2 x i32> %a) { -; CHECK-LABEL: reverse_v2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; NO-ZVBB-LABEL: reverse_v2i32: +; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 +; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 +; NO-ZVBB-NEXT: vmv1r.v v8, v9 +; NO-ZVBB-NEXT: ret +; +; ZVBB-LABEL: reverse_v2i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVBB-NEXT: vror.vi v8, v8, 32 +; ZVBB-NEXT: ret %res = call <2 x i32> @llvm.experimental.vector.reverse.v2i32(<2 x i32> %a) ret <2 x i32> %res } @@ -572,13 +590,19 @@ } define <2 x half> @reverse_v2f16(<2 x half> %a) { -; CHECK-LABEL: reverse_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; NO-ZVBB-LABEL: reverse_v2f16: +; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 +; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 +; NO-ZVBB-NEXT: vmv1r.v v8, v9 +; NO-ZVBB-NEXT: ret +; +; ZVBB-LABEL: reverse_v2f16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVBB-NEXT: vror.vi v8, v8, 16 +; ZVBB-NEXT: ret %res = call <2 x half> @llvm.experimental.vector.reverse.v2f16(<2 x half> %a) ret <2 x half> %res } @@ -646,13 +670,19 @@ } define <2 x float> @reverse_v2f32(<2 x float> %a) { -; CHECK-LABEL: reverse_v2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; NO-ZVBB-LABEL: reverse_v2f32: +; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NO-ZVBB-NEXT: vslidedown.vi v9, v8, 1 +; NO-ZVBB-NEXT: vslideup.vi v9, v8, 1 +; NO-ZVBB-NEXT: vmv1r.v v8, v9 +; NO-ZVBB-NEXT: ret +; +; ZVBB-LABEL: reverse_v2f32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVBB-NEXT: vror.vi v8, v8, 32 +; ZVBB-NEXT: ret %res = call <2 x float> @llvm.experimental.vector.reverse.v2f32(<2 x float> %a) ret <2 x float> %res } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll @@ -202,13 +202,13 @@ ; ZVBB_V-LABEL: shuffle_v8i8_as_i16: ; ZVBB_V: # %bb.0: ; ZVBB_V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVBB_V-NEXT: vror.vi v8, v8, 8 +; ZVBB_V-NEXT: vrev8.v v8, v8 ; ZVBB_V-NEXT: ret ; ; ZVBB_ZVE32X-LABEL: shuffle_v8i8_as_i16: ; ZVBB_ZVE32X: # %bb.0: ; ZVBB_ZVE32X-NEXT: vsetivli zero, 4, e16, m2, ta, ma -; ZVBB_ZVE32X-NEXT: vror.vi v8, v8, 8 +; ZVBB_ZVE32X-NEXT: vrev8.v v8, v8 ; ZVBB_ZVE32X-NEXT: ret %shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> ret <8 x i8> %shuffle