diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -88,6 +88,7 @@ GREVIW, GORCI, GORCIW, + SHFLI, // Vector Extension // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand // for the VL value to be used for the operation. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2545,6 +2545,20 @@ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); break; } + case RISCVISD::SHFLI: { + // There is no SHFLIW instruction, but we can just promote the operation. + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + SDLoc DL(N); + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewRes = + DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1)); + // ReplaceNodeResults requires we maintain the same type for the return + // value. + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); + break; + } case ISD::BSWAP: case ISD::BITREVERSE: { assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && @@ -2674,19 +2688,21 @@ } }; -// Matches any of the following bit-manipulation patterns: -// (and (shl x, 1), (0x55555555 << 1)) -// (and (srl x, 1), 0x55555555) -// (shl (and x, 0x55555555), 1) -// (srl (and x, (0x55555555 << 1)), 1) -// where the shift amount and mask may vary thus: -// [1] = 0x55555555 / 0xAAAAAAAA -// [2] = 0x33333333 / 0xCCCCCCCC -// [4] = 0x0F0F0F0F / 0xF0F0F0F0 -// [8] = 0x00FF00FF / 0xFF00FF00 -// [16] = 0x0000FFFF / 0xFFFFFFFF -// [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) -static Optional matchRISCVBitmanipPat(SDValue Op) { +// Matches patterns of the form +// (and (shl x, C2), (C1 << C2)) +// (and (srl x, C2), C1) +// (shl (and x, C1), C2) +// (srl (and x, (C1 << C2)), C2) +// Where C2 is a power of 2 and C1 has at least that many leading zeroes. +// The expected masks for each shift amount are specified in BitmanipMasks where +// BitmanipMasks[log2(C2)] specifies the expected C1 value. +// The max allowed shift amount is either XLen/2 or XLen/4 determined by whether +// BitmanipMasks contains 6 or 5 entries assuming that the maximum possible +// XLen is 64. +static Optional +matchRISCVBitmanipPat(SDValue Op, ArrayRef BitmanipMasks) { + assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && + "Unexpected number of masks"); Optional Mask; // Optionally consume a mask around the shift operation. if (Op.getOpcode() == ISD::AND && isa(Op.getOperand(1))) { @@ -2699,26 +2715,17 @@ if (!isa(Op.getOperand(1))) return None; - auto ShAmt = Op.getConstantOperandVal(1); + uint64_t ShAmt = Op.getConstantOperandVal(1); - if (!isPowerOf2_64(ShAmt)) + unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; + if (ShAmt >= Width && !isPowerOf2_64(ShAmt)) return None; - - // These are the unshifted masks which we use to match bit-manipulation - // patterns. They may be shifted left in certain circumstances. - static const uint64_t BitmanipMasks[] = { - 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, - 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, - }; - - unsigned MaskIdx = Log2_64(ShAmt); - if (MaskIdx >= array_lengthof(BitmanipMasks)) + // If we don't have enough masks for 64 bit, then we must be trying to + // match SHFL so we're only allowed to shift 1/4 of the width. + if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) return None; - auto Src = Op.getOperand(0); - - unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; - auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes(Width); + SDValue Src = Op.getOperand(0); // The expected mask is shifted left when the AND is found around SHL // patterns. @@ -2745,6 +2752,9 @@ } } + unsigned MaskIdx = Log2_32(ShAmt); + uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes(Width); + if (SHLExpMask) ExpMask <<= ShAmt; @@ -2754,15 +2764,38 @@ return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; } +// Matches any of the following bit-manipulation patterns: +// (and (shl x, 1), (0x55555555 << 1)) +// (and (srl x, 1), 0x55555555) +// (shl (and x, 0x55555555), 1) +// (srl (and x, (0x55555555 << 1)), 1) +// where the shift amount and mask may vary thus: +// [1] = 0x55555555 / 0xAAAAAAAA +// [2] = 0x33333333 / 0xCCCCCCCC +// [4] = 0x0F0F0F0F / 0xF0F0F0F0 +// [8] = 0x00FF00FF / 0xFF00FF00 +// [16] = 0x0000FFFF / 0xFFFFFFFF +// [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) +static Optional matchGREVIPat(SDValue Op) { + // These are the unshifted masks which we use to match bit-manipulation + // patterns. They may be shifted left in certain circumstances. + static const uint64_t BitmanipMasks[] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, + 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; + + return matchRISCVBitmanipPat(Op, BitmanipMasks); +} + // Match the following pattern as a GREVI(W) operation // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); EVT VT = Op.getValueType(); if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { - auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); - auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); + auto LHS = matchGREVIPat(Op.getOperand(0)); + auto RHS = matchGREVIPat(Op.getOperand(1)); if (LHS && RHS && LHS->formsPairWith(*RHS)) { SDLoc DL(Op); return DAG.getNode( @@ -2784,6 +2817,7 @@ // 4. (or (rotl/rotr x, bitwidth/2), x) static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); EVT VT = Op.getValueType(); if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { @@ -2822,14 +2856,14 @@ return SDValue(); SDValue OrOp0 = Op0.getOperand(0); SDValue OrOp1 = Op0.getOperand(1); - auto LHS = matchRISCVBitmanipPat(OrOp0); + auto LHS = matchGREVIPat(OrOp0); // OR is commutable so swap the operands and try again: x might have been // on the left if (!LHS) { std::swap(OrOp0, OrOp1); - LHS = matchRISCVBitmanipPat(OrOp0); + LHS = matchGREVIPat(OrOp0); } - auto RHS = matchRISCVBitmanipPat(Op1); + auto RHS = matchGREVIPat(Op1); if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { return DAG.getNode( RISCVISD::GORCI, DL, VT, LHS->Op, @@ -2839,6 +2873,102 @@ return SDValue(); } +// Matches any of the following bit-manipulation patterns: +// (and (shl x, 1), (0x22222222 << 1)) +// (and (srl x, 1), 0x22222222) +// (shl (and x, 0x22222222), 1) +// (srl (and x, (0x22222222 << 1)), 1) +// where the shift amount and mask may vary thus: +// [1] = 0x22222222 / 0x44444444 +// [2] = 0x0C0C0C0C / 0x3C3C3C3C +// [4] = 0x00F000F0 / 0x0F000F00 +// [8] = 0x0000FF00 / 0x00FF0000 +// [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) +static Optional matchSHFLPat(SDValue Op) { + // These are the unshifted masks which we use to match bit-manipulation + // patterns. They may be shifted left in certain circumstances. + static const uint64_t BitmanipMasks[] = { + 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, + 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; + + return matchRISCVBitmanipPat(Op, BitmanipMasks); +} + +// Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) +static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); + EVT VT = Op.getValueType(); + + if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) + return SDValue(); + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + // Or is commutable so canonicalize the second OR to the LHS. + if (Op0.getOpcode() != ISD::OR) + std::swap(Op0, Op1); + if (Op0.getOpcode() != ISD::OR) + return SDValue(); + + // We found an inner OR, so our operands are the operands of the inner OR + // and the other operand of the outer OR. + SDValue A = Op0.getOperand(0); + SDValue B = Op0.getOperand(1); + SDValue C = Op1; + + auto Match1 = matchSHFLPat(A); + auto Match2 = matchSHFLPat(B); + + // If neither matched, we failed. + if (!Match1 && !Match2) + return SDValue(); + + // We had at least one match. if one failed, try the remaining C operand. + if (!Match1) { + std::swap(A, C); + Match1 = matchSHFLPat(A); + if (!Match1) + return SDValue(); + } else if (!Match2) { + std::swap(B, C); + Match2 = matchSHFLPat(B); + if (!Match2) + return SDValue(); + } + assert(Match1 && Match2); + + // Make sure our matches pair up. + if (!Match1->formsPairWith(*Match2)) + return SDValue(); + + // All the remains is to make sure C is an AND with the same input, that masks + // out the bits that are being shuffled. + if (C.getOpcode() != ISD::AND || !isa(C.getOperand(1)) || + C.getOperand(0) != Match1->Op) + return SDValue(); + + uint64_t Mask = C.getConstantOperandVal(1); + + static const uint64_t BitmanipMasks[] = { + 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, + 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, + }; + + unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; + unsigned MaskIdx = Log2_32(Match1->ShAmt); + uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes(Width); + + if (Mask != ExpMask) + return SDValue(); + + SDLoc DL(Op); + return DAG.getNode( + RISCVISD::SHFLI, DL, VT, Match1->Op, + DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT())); +} + // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does @@ -3018,6 +3148,8 @@ return GREV; if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) return GORC; + if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) + return SHFL; break; case RISCVISD::SELECT_CC: { // Transform @@ -3265,6 +3397,19 @@ // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. return 33; + case RISCVISD::SHFLI: { + // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word + // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but + // will stay within the upper 32 bits. If there were more than 32 sign bits + // before there will be at least 33 sign bits after. + if (Op.getValueType() == MVT::i64 && + (Op.getConstantOperandVal(1) & 0x10) == 0) { + unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp > 32) + return 33; + } + break; + } case RISCVISD::VMV_X_S: // The number of sign bits of the scalar result is computed by obtaining the // element type of the input vector operand, subtracting its width from the @@ -4928,6 +5073,7 @@ NODE_NAME_CASE(GREVIW) NODE_NAME_CASE(GORCI) NODE_NAME_CASE(GORCIW) + NODE_NAME_CASE(SHFLI) NODE_NAME_CASE(VMV_V_X_VL) NODE_NAME_CASE(VFMV_V_F_VL) NODE_NAME_CASE(VMV_X_S) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -671,8 +671,10 @@ def riscv_greviw : SDNode<"RISCVISD::GREVIW", SDTIntBinOp, []>; def riscv_gorci : SDNode<"RISCVISD::GORCI", SDTIntBinOp, []>; def riscv_gorciw : SDNode<"RISCVISD::GORCIW", SDTIntBinOp, []>; +def riscv_shfli : SDNode<"RISCVISD::SHFLI", SDTIntBinOp, []>; let Predicates = [HasStdExtZbp] in { +def : Pat<(riscv_shfli GPR:$rs1, timm:$shamt), (SHFLI GPR:$rs1, timm:$shamt)>; def : Pat<(riscv_grevi GPR:$rs1, timm:$shamt), (GREVI GPR:$rs1, timm:$shamt)>; def : Pat<(riscv_gorci GPR:$rs1, timm:$shamt), (GORCI GPR:$rs1, timm:$shamt)>; @@ -789,48 +791,6 @@ def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV64 GPR:$rs)>; } -let Predicates = [HasStdExtZbp, IsRV32] in { -def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)), - (and GPR:$rs1, (i32 0xFF0000FF))), - (and (srl GPR:$rs1, (i32 8)), (i32 0x0000FF00))), - (SHFLI GPR:$rs1, (i32 8))>; -def : Pat<(or (or (and (shl GPR:$rs1, (i32 4)), (i32 0x0F000F00)), - (and GPR:$rs1, (i32 0xF00FF00F))), - (and (srl GPR:$rs1, (i32 4)), (i32 0x00F000F0))), - (SHFLI GPR:$rs1, (i32 4))>; -def : Pat<(or (or (and (shl GPR:$rs1, (i32 2)), (i32 0x30303030)), - (and GPR:$rs1, (i32 0xC3C3C3C3))), - (and (srl GPR:$rs1, (i32 2)), (i32 0x0C0C0C0C))), - (SHFLI GPR:$rs1, (i32 2))>; -def : Pat<(or (or (and (shl GPR:$rs1, (i32 1)), (i32 0x44444444)), - (and GPR:$rs1, (i32 0x99999999))), - (and (srl GPR:$rs1, (i32 1)), (i32 0x22222222))), - (SHFLI GPR:$rs1, (i32 1))>; -} // Predicates = [HasStdExtZbp, IsRV32] - -let Predicates = [HasStdExtZbp, IsRV64] in { -def : Pat<(or (or (and (shl GPR:$rs1, (i64 16)), (i64 0x0000FFFF00000000)), - (and GPR:$rs1, (i64 0xFFFF00000000FFFF))), - (and (srl GPR:$rs1, (i64 16)), (i64 0x00000000FFFF0000))), - (SHFLI GPR:$rs1, (i64 16))>; -def : Pat<(or (or (and (shl GPR:$rs1, (i64 8)), (i64 0x00FF000000FF0000)), - (and GPR:$rs1, (i64 0xFF0000FFFF0000FF))), - (and (srl GPR:$rs1, (i64 8)), (i64 0x0000FF000000FF00))), - (SHFLI GPR:$rs1, (i64 8))>; -def : Pat<(or (or (and (shl GPR:$rs1, (i64 4)), (i64 0x0F000F000F000F00)), - (and GPR:$rs1, (i64 0xF00FF00FF00FF00F))), - (and (srl GPR:$rs1, (i64 4)), (i64 0x00F000F000F000F0))), - (SHFLI GPR:$rs1, (i64 4))>; -def : Pat<(or (or (and (shl GPR:$rs1, (i64 2)), (i64 0x3030303030303030)), - (and GPR:$rs1, (i64 0xC3C3C3C3C3C3C3C3))), - (and (srl GPR:$rs1, (i64 2)), (i64 0x0C0C0C0C0C0C0C0C))), - (SHFLI GPR:$rs1, (i64 2))>; -def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)), - (and GPR:$rs1, (i64 0x9999999999999999))), - (and (srl GPR:$rs1, (i64 1)), (i64 0x2222222222222222))), - (SHFLI GPR:$rs1, (i64 1))>; -} // Predicates = [HasStdExtZbp, IsRV64] - let Predicates = [HasStdExtZba] in { def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), GPR:$rs2), (SH1ADD GPR:$rs1, GPR:$rs2)>; diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll @@ -3430,36 +3430,12 @@ ; ; RV64IB-LABEL: shfl1_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: lui a1, 629146 -; RV64IB-NEXT: addiw a1, a1, -1639 -; RV64IB-NEXT: and a1, a0, a1 -; RV64IB-NEXT: slli a2, a0, 1 -; RV64IB-NEXT: lui a3, 279620 -; RV64IB-NEXT: addiw a3, a3, 1092 -; RV64IB-NEXT: and a2, a2, a3 -; RV64IB-NEXT: or a1, a2, a1 -; RV64IB-NEXT: srli a0, a0, 1 -; RV64IB-NEXT: lui a2, 139810 -; RV64IB-NEXT: addiw a2, a2, 546 -; RV64IB-NEXT: and a0, a0, a2 -; RV64IB-NEXT: or a0, a1, a0 +; RV64IB-NEXT: zip.n a0, a0 ; RV64IB-NEXT: ret ; ; RV64IBP-LABEL: shfl1_i32: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: lui a1, 629146 -; RV64IBP-NEXT: addiw a1, a1, -1639 -; RV64IBP-NEXT: and a1, a0, a1 -; RV64IBP-NEXT: slli a2, a0, 1 -; RV64IBP-NEXT: lui a3, 279620 -; RV64IBP-NEXT: addiw a3, a3, 1092 -; RV64IBP-NEXT: and a2, a2, a3 -; RV64IBP-NEXT: or a1, a2, a1 -; RV64IBP-NEXT: srli a0, a0, 1 -; RV64IBP-NEXT: lui a2, 139810 -; RV64IBP-NEXT: addiw a2, a2, 546 -; RV64IBP-NEXT: and a0, a0, a2 -; RV64IBP-NEXT: or a0, a1, a0 +; RV64IBP-NEXT: zip.n a0, a0 ; RV64IBP-NEXT: ret %and = and i32 %a, -1717986919 %shl = shl i32 %a, 1 @@ -3540,36 +3516,12 @@ ; ; RV64IB-LABEL: shfl2_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: lui a1, 801852 -; RV64IB-NEXT: addiw a1, a1, 963 -; RV64IB-NEXT: and a1, a0, a1 -; RV64IB-NEXT: slli a2, a0, 2 -; RV64IB-NEXT: lui a3, 197379 -; RV64IB-NEXT: addiw a3, a3, 48 -; RV64IB-NEXT: and a2, a2, a3 -; RV64IB-NEXT: or a1, a2, a1 -; RV64IB-NEXT: srli a0, a0, 2 -; RV64IB-NEXT: lui a2, 49345 -; RV64IB-NEXT: addiw a2, a2, -1012 -; RV64IB-NEXT: and a0, a0, a2 -; RV64IB-NEXT: or a0, a0, a1 +; RV64IB-NEXT: zip2.b a0, a0 ; RV64IB-NEXT: ret ; ; RV64IBP-LABEL: shfl2_i32: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: lui a1, 801852 -; RV64IBP-NEXT: addiw a1, a1, 963 -; RV64IBP-NEXT: and a1, a0, a1 -; RV64IBP-NEXT: slli a2, a0, 2 -; RV64IBP-NEXT: lui a3, 197379 -; RV64IBP-NEXT: addiw a3, a3, 48 -; RV64IBP-NEXT: and a2, a2, a3 -; RV64IBP-NEXT: or a1, a2, a1 -; RV64IBP-NEXT: srli a0, a0, 2 -; RV64IBP-NEXT: lui a2, 49345 -; RV64IBP-NEXT: addiw a2, a2, -1012 -; RV64IBP-NEXT: and a0, a0, a2 -; RV64IBP-NEXT: or a0, a0, a1 +; RV64IBP-NEXT: zip2.b a0, a0 ; RV64IBP-NEXT: ret %and = and i32 %a, -1010580541 %shl = shl i32 %a, 2 @@ -3652,36 +3604,12 @@ ; ; RV64IB-LABEL: shfl4_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: lui a1, 983295 -; RV64IB-NEXT: addiw a1, a1, 15 -; RV64IB-NEXT: and a1, a0, a1 -; RV64IB-NEXT: slli a2, a0, 4 -; RV64IB-NEXT: lui a3, 61441 -; RV64IB-NEXT: addiw a3, a3, -256 -; RV64IB-NEXT: and a2, a2, a3 -; RV64IB-NEXT: srli a0, a0, 4 -; RV64IB-NEXT: lui a3, 3840 -; RV64IB-NEXT: addiw a3, a3, 240 -; RV64IB-NEXT: and a0, a0, a3 -; RV64IB-NEXT: or a0, a0, a1 -; RV64IB-NEXT: or a0, a0, a2 +; RV64IB-NEXT: zip4.h a0, a0 ; RV64IB-NEXT: ret ; ; RV64IBP-LABEL: shfl4_i32: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: lui a1, 983295 -; RV64IBP-NEXT: addiw a1, a1, 15 -; RV64IBP-NEXT: and a1, a0, a1 -; RV64IBP-NEXT: slli a2, a0, 4 -; RV64IBP-NEXT: lui a3, 61441 -; RV64IBP-NEXT: addiw a3, a3, -256 -; RV64IBP-NEXT: and a2, a2, a3 -; RV64IBP-NEXT: srli a0, a0, 4 -; RV64IBP-NEXT: lui a3, 3840 -; RV64IBP-NEXT: addiw a3, a3, 240 -; RV64IBP-NEXT: and a0, a0, a3 -; RV64IBP-NEXT: or a0, a0, a1 -; RV64IBP-NEXT: or a0, a0, a2 +; RV64IBP-NEXT: zip4.h a0, a0 ; RV64IBP-NEXT: ret %and = and i32 %a, -267390961 %shl = shl i32 %a, 4 @@ -3761,34 +3689,12 @@ ; ; RV64IB-LABEL: shfl8_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: lui a1, 1044480 -; RV64IB-NEXT: addiw a1, a1, 255 -; RV64IB-NEXT: and a1, a0, a1 -; RV64IB-NEXT: slli a2, a0, 8 -; RV64IB-NEXT: lui a3, 4080 -; RV64IB-NEXT: and a2, a2, a3 -; RV64IB-NEXT: srli a0, a0, 8 -; RV64IB-NEXT: lui a3, 16 -; RV64IB-NEXT: addiw a3, a3, -256 -; RV64IB-NEXT: and a0, a0, a3 -; RV64IB-NEXT: or a0, a1, a0 -; RV64IB-NEXT: or a0, a0, a2 +; RV64IB-NEXT: zip8.w a0, a0 ; RV64IB-NEXT: ret ; ; RV64IBP-LABEL: shfl8_i32: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: lui a1, 1044480 -; RV64IBP-NEXT: addiw a1, a1, 255 -; RV64IBP-NEXT: and a1, a0, a1 -; RV64IBP-NEXT: slli a2, a0, 8 -; RV64IBP-NEXT: lui a3, 4080 -; RV64IBP-NEXT: and a2, a2, a3 -; RV64IBP-NEXT: srli a0, a0, 8 -; RV64IBP-NEXT: lui a3, 16 -; RV64IBP-NEXT: addiw a3, a3, -256 -; RV64IBP-NEXT: and a0, a0, a3 -; RV64IBP-NEXT: or a0, a1, a0 -; RV64IBP-NEXT: or a0, a0, a2 +; RV64IBP-NEXT: zip8.w a0, a0 ; RV64IBP-NEXT: ret %and = and i32 %a, -16776961 %shl = shl i32 %a, 8