diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -47,9 +47,11 @@ BSTRINS, BSTRPICK, - // Byte swapping operations + // Byte-swapping and bit-reversal REVB_2H, REVB_2W, + BITREV_4B, + BITREV_W, }; } // end namespace LoongArchISD diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -80,6 +80,16 @@ setOperationAction(ISD::BSWAP, MVT::i32, Custom); } + // Expand bitreverse.i16 with native-width bitrev and shift for now, before + // we get to know which of sll and revb.2h is faster. + setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + } else { + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + } + static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE}; @@ -466,6 +476,27 @@ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); break; } + case ISD::BITREVERSE: { + SDValue Src = N->getOperand(0); + EVT VT = N->getValueType(0); + assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && + "Unexpected custom legalization"); + MVT GRLenVT = Subtarget.getGRLenVT(); + SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); + SDValue Tmp; + switch (VT.getSizeInBits()) { + default: + llvm_unreachable("Unexpected operand width"); + case 8: + Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc); + break; + case 32: + Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc); + break; + } + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); + break; + } } } @@ -791,6 +822,21 @@ return SDValue(); } +// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. +static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Src = N->getOperand(0); + if (Src.getOpcode() != LoongArchISD::REVB_2W) + return SDValue(); + + return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0), + Src.getOperand(0)); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -803,6 +849,8 @@ return performORCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); + case LoongArchISD::BITREV_W: + return performBITREV_WCombine(N, DAG, DCI, Subtarget); } return SDValue(); } @@ -897,6 +945,8 @@ NODE_NAME_CASE(FTINT) NODE_NAME_CASE(REVB_2H) NODE_NAME_CASE(REVB_2W) + NODE_NAME_CASE(BITREV_4B) + NODE_NAME_CASE(BITREV_W) NODE_NAME_CASE(ROTR_W) NODE_NAME_CASE(ROTL_W) } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -59,6 +59,8 @@ : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>; def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>; +def loongarch_bitrev_4b : SDNode<"LoongArchISD::BITREV_4B", SDTUnaryOp>; +def loongarch_bitrev_w : SDNode<"LoongArchISD::BITREV_W", SDTUnaryOp>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -840,17 +842,25 @@ (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; } // Predicates = [IsLA64] -/// Byte-swapping +/// Byte-swapping and bit-reversal def : Pat<(loongarch_revb_2h GPR:$rj), (REVB_2H GPR:$rj)>; +def : Pat<(loongarch_bitrev_4b GPR:$rj), (BITREV_4B GPR:$rj)>; let Predicates = [IsLA32] in { def : Pat<(bswap GPR:$rj), (ROTRI_W (REVB_2H GPR:$rj), 16)>; +def : Pat<(bitreverse GPR:$rj), (BITREV_W GPR:$rj)>; +def : Pat<(bswap (bitreverse GPR:$rj)), (BITREV_4B GPR:$rj)>; +def : Pat<(bitreverse (bswap GPR:$rj)), (BITREV_4B GPR:$rj)>; } // Predicates = [IsLA32] let Predicates = [IsLA64] in { def : Pat<(loongarch_revb_2w GPR:$rj), (REVB_2W GPR:$rj)>; def : Pat<(bswap GPR:$rj), (REVB_D GPR:$rj)>; +def : Pat<(loongarch_bitrev_w GPR:$rj), (BITREV_W GPR:$rj)>; +def : Pat<(bitreverse GPR:$rj), (BITREV_D GPR:$rj)>; +def : Pat<(bswap (bitreverse GPR:$rj)), (BITREV_8B GPR:$rj)>; +def : Pat<(bitreverse (bswap GPR:$rj)), (BITREV_8B GPR:$rj)>; } // Predicates = [IsLA64] /// Loads diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare i7 @llvm.bitreverse.i7(i7) +declare i8 @llvm.bitreverse.i8(i8) +declare i16 @llvm.bitreverse.i16(i16) +declare i24 @llvm.bitreverse.i24(i24) +declare i32 @llvm.bitreverse.i32(i32) +declare i48 @llvm.bitreverse.i48(i48) +declare i64 @llvm.bitreverse.i64(i64) +declare i77 @llvm.bitreverse.i77(i77) +declare i128 @llvm.bitreverse.i128(i128) + +define i8 @test_bitreverse_i8(i8 %a) nounwind { +; LA32-LABEL: test_bitreverse_i8: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i8: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.4b $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i8 @llvm.bitreverse.i8(i8 %a) + ret i8 %tmp +} + +define i16 @test_bitreverse_i16(i16 %a) nounwind { +; LA32-LABEL: test_bitreverse_i16: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i16: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 48 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i16 @llvm.bitreverse.i16(i16 %a) + ret i16 %tmp +} + +define i32 @test_bitreverse_i32(i32 %a) nounwind { +; LA32-LABEL: test_bitreverse_i32: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i32: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.w $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %tmp +} + +define i64 @test_bitreverse_i64(i64 %a) nounwind { +; LA32-LABEL: test_bitreverse_i64: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a2, $a1 +; LA32-NEXT: bitrev.w $a1, $a0 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i64: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %tmp +} + +;; Bitreverse on non-native integer widths. + +define i7 @test_bitreverse_i7(i7 %a) nounwind { +; LA32-LABEL: test_bitreverse_i7: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 25 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i7: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 57 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i7 @llvm.bitreverse.i7(i7 %a) + ret i7 %tmp +} + +define i24 @test_bitreverse_i24(i24 %a) nounwind { +; LA32-LABEL: test_bitreverse_i24: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 8 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i24: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 40 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i24 @llvm.bitreverse.i24(i24 %a) + ret i24 %tmp +} + +define i48 @test_bitreverse_i48(i48 %a) nounwind { +; LA32-LABEL: test_bitreverse_i48: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a1, $a1 +; LA32-NEXT: srli.w $a1, $a1, 16 +; LA32-NEXT: bitrev.w $a2, $a0 +; LA32-NEXT: slli.w $a0, $a2, 16 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: srli.w $a1, $a2, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i48: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 16 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i48 @llvm.bitreverse.i48(i48 %a) + ret i48 %tmp +} + +define i77 @test_bitreverse_i77(i77 %a) nounwind { +; LA32-LABEL: test_bitreverse_i77: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: bitrev.w $a2, $a2 +; LA32-NEXT: ld.w $a3, $a1, 4 +; LA32-NEXT: bitrev.w $a3, $a3 +; LA32-NEXT: srli.w $a4, $a3, 19 +; LA32-NEXT: slli.w $a5, $a2, 13 +; LA32-NEXT: or $a4, $a5, $a4 +; LA32-NEXT: srli.w $a2, $a2, 19 +; LA32-NEXT: st.h $a2, $a0, 8 +; LA32-NEXT: st.w $a4, $a0, 4 +; LA32-NEXT: slli.w $a2, $a3, 13 +; LA32-NEXT: ld.w $a1, $a1, 8 +; LA32-NEXT: bitrev.w $a1, $a1 +; LA32-NEXT: srli.w $a1, $a1, 19 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i77: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a1, $a1 +; LA64-NEXT: srli.d $a1, $a1, 51 +; LA64-NEXT: bitrev.d $a2, $a0 +; LA64-NEXT: slli.d $a0, $a2, 13 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: srli.d $a1, $a2, 51 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i77 @llvm.bitreverse.i77(i77 %a) + ret i77 %tmp +} + +define i128 @test_bitreverse_i128(i128 %a) nounwind { +; LA32-LABEL: test_bitreverse_i128: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: bitrev.w $a2, $a2 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: bitrev.w $a2, $a2 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: bitrev.w $a2, $a2 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 12 +; LA32-NEXT: bitrev.w $a1, $a1 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_i128: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a2, $a1 +; LA64-NEXT: bitrev.d $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i128 @llvm.bitreverse.i128(i128 %a) + ret i128 %tmp +} diff --git a/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare i16 @llvm.bitreverse.i16(i16) +declare i32 @llvm.bitreverse.i32(i32) +declare i64 @llvm.bitreverse.i64(i64) +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) + +define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind { +; LA32-LABEL: test_bswap_bitreverse_i16: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_bitreverse_i16: +; LA64: # %bb.0: +; LA64-NEXT: revb.2h $a0, $a0 +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 48 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i16 @llvm.bswap.i16(i16 %a) + %tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp) + ret i16 %tmp2 +} + +define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind { +; LA32-LABEL: test_bswap_bitreverse_i32: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_bitreverse_i32: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.4b $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i32 @llvm.bswap.i32(i32 %a) + %tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp) + ret i32 %tmp2 +} + +define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind { +; LA32-LABEL: test_bswap_bitreverse_i64: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: bitrev.4b $a1, $a1 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_bitreverse_i64: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.8b $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i64 @llvm.bswap.i64(i64 %a) + %tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp) + ret i64 %tmp2 +} + +define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind { +; LA32-LABEL: test_bitreverse_bswap_i16: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_bswap_i16: +; LA64: # %bb.0: +; LA64-NEXT: revb.2h $a0, $a0 +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 48 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i16 @llvm.bitreverse.i16(i16 %a) + %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp) + ret i16 %tmp2 +} + +define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind { +; LA32-LABEL: test_bitreverse_bswap_i32: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_bswap_i32: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.4b $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i32 @llvm.bitreverse.i32(i32 %a) + %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp) + ret i32 %tmp2 +} + +define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind { +; LA32-LABEL: test_bitreverse_bswap_i64: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: bitrev.4b $a1, $a1 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bitreverse_bswap_i64: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.8b $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i64 @llvm.bitreverse.i64(i64 %a) + %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp) + ret i64 %tmp2 +} + +define i32 @pr55484(i32 %0) { +; LA32-LABEL: pr55484: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 8 +; LA32-NEXT: srli.w $a0, $a0, 8 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: ext.w.h $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: pr55484: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a0, 8 +; LA64-NEXT: srli.d $a0, $a0, 8 +; LA64-NEXT: or $a0, $a0, $a1 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %2 = lshr i32 %0, 8 + %3 = shl i32 %0, 8 + %4 = or i32 %2, %3 + %5 = trunc i32 %4 to i16 + %6 = sext i16 %5 to i32 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll --- a/llvm/test/CodeGen/LoongArch/bswap.ll +++ b/llvm/test/CodeGen/LoongArch/bswap.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=LA32 -; RUN: llc -mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=LA64 declare i16 @llvm.bswap.i16(i16)