diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4396,6 +4396,12 @@ bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG, bool IsNegative = false) const; + /// Expand BSWAPxa nodes. Expands scalar BSWAP nodes. Returns SDValue() + /// if expand fails. + /// \param N Node to expand + /// \returns The expansion result or SDValue() if it fails. + SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const; + /// Turn load of vector type into a load of the individual elements. /// \param LD load to expand /// \returns BUILD_VECTOR and TokenFactor nodes. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -182,7 +182,6 @@ SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl); SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); - SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); SDValue ExpandPARITY(SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); @@ -2846,58 +2845,6 @@ return Tmp; } -/// Open code the operations for BSWAP of the specified operation. -SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { - EVT VT = Op.getValueType(); - EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; - switch (VT.getSimpleVT().getScalarType().SimpleTy) { - default: llvm_unreachable("Unhandled Expand type in BSWAP!"); - case MVT::i16: - // Use a rotate by 8. This can be further expanded if necessary. - return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - case MVT::i32: - Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, - DAG.getConstant(0xFF0000, dl, VT)); - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); - Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); - Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); - return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); - case MVT::i64: - Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, - DAG.getConstant(255ULL<<48, dl, VT)); - Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, - DAG.getConstant(255ULL<<40, dl, VT)); - Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, - DAG.getConstant(255ULL<<32, dl, VT)); - Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, - DAG.getConstant(255ULL<<24, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, - DAG.getConstant(255ULL<<16, dl, VT)); - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, - DAG.getConstant(255ULL<<8 , dl, VT)); - Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); - Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); - Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); - Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); - Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); - Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); - return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); - } -} - /// Open code the operations for PARITY of the specified operation. SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); @@ -2949,7 +2896,8 @@ Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); break; case ISD::BSWAP: - Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + if ((Tmp1 = TLI.expandBSWAP(Node, DAG))) + Results.push_back(Tmp1); break; case ISD::PARITY: Results.push_back(ExpandPARITY(Node->getOperand(0), dl)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -455,6 +455,15 @@ EVT NVT = Op.getValueType(); SDLoc dl(N); + // If the larger BSWAP isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. We only do this for scalars since we have a shuffle + // based lowering for vectors in LegalizeVectorOps. + if (!OVT.isVector() && !TLI.isOperationLegalOrCustom(ISD::BSWAP, NVT)) { + if (SDValue Res = TLI.expandBSWAP(N, DAG)) + return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res); + } + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7006,6 +7006,64 @@ return true; } +SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + + if (!VT.isSimple()) + return SDValue(); + + EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT().getScalarType().SimpleTy) { + default: + return SDValue(); + case MVT::i16: + // Use a rotate by 8. This can be further expanded if necessary. + return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(0xFF0000, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, + DAG.getConstant(255ULL<<48, dl, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, + DAG.getConstant(255ULL<<40, dl, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, + DAG.getConstant(255ULL<<32, dl, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, + DAG.getConstant(255ULL<<8 , dl, VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + std::pair TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -17,23 +17,22 @@ define i16 @test_bswap_i16(i16 %a) nounwind { ; RV32I-LABEL: test_bswap_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a0, 8 -; RV32I-NEXT: lui a2, 4080 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -256 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: slli a0, a0, 8 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 40 -; RV64I-NEXT: addi a2, zero, 255 -; RV64I-NEXT: slli a2, a2, 48 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -256 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: ret %tmp = call i16 @llvm.bswap.i16(i16 %a) ret i16 %tmp @@ -58,21 +57,18 @@ ; ; RV64I-LABEL: test_bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: addi a2, zero, 255 -; RV64I-NEXT: slli a3, a2, 32 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: slli a4, a2, 40 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: slli a2, a2, 48 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a2, a0, 24 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret %tmp = call i32 @llvm.bswap.i32(i32 %a) ret i32 %tmp diff --git a/llvm/test/CodeGen/RISCV/rv64Zbb.ll b/llvm/test/CodeGen/RISCV/rv64Zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbb.ll @@ -825,21 +825,19 @@ define signext i32 @bswap_i32(i32 signext %a) nounwind { ; RV64I-LABEL: bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: addi a2, zero, 255 -; RV64I-NEXT: slli a3, a2, 32 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: slli a4, a2, 40 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: slli a2, a2, 48 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a2, a0, 24 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srai a0, a0, 32 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64IB-LABEL: bswap_i32: @@ -860,21 +858,18 @@ define void @bswap_i32_nosext(i32 signext %a, i32* %x) nounwind { ; RV64I-LABEL: bswap_i32_nosext: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: addi a3, zero, 255 -; RV64I-NEXT: slli a4, a3, 32 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli a5, a3, 40 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: or a2, a4, a2 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a3, a3, 48 -; RV64I-NEXT: and a3, a4, a3 -; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srliw a2, a0, 8 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: addiw a3, a3, -256 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: lui a4, 4080 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: sw a0, 0(a1) ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll @@ -2981,21 +2981,19 @@ define signext i32 @bswap_i32(i32 signext %a) nounwind { ; RV64I-LABEL: bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: addi a2, zero, 255 -; RV64I-NEXT: slli a3, a2, 32 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: slli a4, a2, 40 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: slli a2, a2, 48 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a2, a0, 24 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srai a0, a0, 32 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64IB-LABEL: bswap_i32: @@ -3015,21 +3013,18 @@ define void @bswap_i32_nosext(i32 signext %a, i32* %x) nounwind { ; RV64I-LABEL: bswap_i32_nosext: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: addi a3, zero, 255 -; RV64I-NEXT: slli a4, a3, 32 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli a5, a3, 40 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: or a2, a4, a2 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a3, a3, 48 -; RV64I-NEXT: and a3, a4, a3 -; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srliw a2, a0, 8 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: addiw a3, a3, -256 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: lui a4, 4080 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: sw a0, 0(a1) ; RV64I-NEXT: ret ; @@ -3422,39 +3417,20 @@ define i32 @bswap_rotr_i32(i32 %a) { ; RV64I-LABEL: bswap_rotr_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 8 -; RV64I-NEXT: addi a3, zero, 255 -; RV64I-NEXT: slli a4, a3, 24 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addiw a4, a4, -256 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: srli a4, a0, 56 -; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: srliw a2, a0, 24 ; RV64I-NEXT: or a1, a1, a2 ; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: slli a4, a3, 32 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli a5, a3, 40 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: or a2, a4, a2 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a3, a3, 48 -; RV64I-NEXT: and a3, a4, a3 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: lui a2, 1048560 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: srliw a0, a0, 16 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -3475,39 +3451,20 @@ define i32 @bswap_rotl_i32(i32 %a) { ; RV64I-LABEL: bswap_rotl_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 8 -; RV64I-NEXT: addi a3, zero, 255 -; RV64I-NEXT: slli a4, a3, 24 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addiw a4, a4, -256 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: srli a4, a0, 56 -; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: srliw a2, a0, 24 ; RV64I-NEXT: or a1, a1, a2 ; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: slli a4, a3, 32 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli a5, a3, 40 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: or a2, a4, a2 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a3, a3, 48 -; RV64I-NEXT: and a3, a4, a3 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: lui a2, 1048560 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: srliw a0, a0, 16 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -3528,35 +3485,35 @@ define i32 @bitreverse_bswap_i32(i32 %a) { ; RV64I-LABEL: bitreverse_bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a2, 4080 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 8 -; RV64I-NEXT: addi a3, zero, 255 -; RV64I-NEXT: slli a4, a3, 24 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addiw a4, a4, -256 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: srli a4, a0, 56 -; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: or a4, a1, a2 -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: slli a2, a3, 32 -; RV64I-NEXT: and a2, a1, a2 +; RV64I-NEXT: srli a2, a0, 24 +; RV64I-NEXT: lui a6, 4080 +; RV64I-NEXT: and a2, a2, a6 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: addi a4, zero, 255 +; RV64I-NEXT: slli a5, a4, 24 +; RV64I-NEXT: and a3, a3, a5 +; RV64I-NEXT: or a3, a3, a2 +; RV64I-NEXT: srli a5, a0, 40 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a7, a2, -256 +; RV64I-NEXT: and a5, a5, a7 +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: or a1, a5, a1 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: slli a5, a4, 32 +; RV64I-NEXT: and a3, a3, a5 ; RV64I-NEXT: slli a5, a0, 24 -; RV64I-NEXT: slli a6, a3, 40 -; RV64I-NEXT: and a5, a5, a6 -; RV64I-NEXT: or a5, a5, a2 -; RV64I-NEXT: slli a1, a0, 40 -; RV64I-NEXT: slli a2, a3, 48 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a4, 40 +; RV64I-NEXT: and a2, a5, a2 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: slli a4, a4, 48 +; RV64I-NEXT: and a3, a3, a4 ; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: lui a1, 3855 ; RV64I-NEXT: addiw a1, a1, 241 ; RV64I-NEXT: slli a1, a1, 12 @@ -3567,15 +3524,15 @@ ; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a1, a0, a1 ; RV64I-NEXT: slli a1, a1, 4 -; RV64I-NEXT: lui a3, 1044721 -; RV64I-NEXT: addiw a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 240 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 1044721 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: srli a0, a0, 4 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: lui a1, 13107 @@ -3588,15 +3545,15 @@ ; RV64I-NEXT: addi a1, a1, 819 ; RV64I-NEXT: and a1, a0, a1 ; RV64I-NEXT: slli a1, a1, 2 -; RV64I-NEXT: lui a3, 1035469 -; RV64I-NEXT: addiw a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -820 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 1035469 +; RV64I-NEXT: addiw a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: lui a1, 21845 @@ -3609,30 +3566,28 @@ ; RV64I-NEXT: addi a1, a1, 1365 ; RV64I-NEXT: and a1, a0, a1 ; RV64I-NEXT: slli a1, a1, 1 -; RV64I-NEXT: lui a3, 1026731 -; RV64I-NEXT: addiw a3, a3, -1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -1366 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 1026731 +; RV64I-NEXT: addiw a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: srli a0, a0, 1 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 24 -; RV64I-NEXT: addi a3, zero, -1 -; RV64I-NEXT: slli a3, a3, 56 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: and a3, a3, a6 -; RV64I-NEXT: srli a4, a0, 24 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: lui a2, 1044480 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 40 +; RV64I-NEXT: and a2, a2, a7 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: srli a0, a0, 24 +; RV64I-NEXT: and a0, a0, a6 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64IB-LABEL: bitreverse_bswap_i32: