diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -44,6 +44,9 @@ BSTRINS, BSTRPICK, + // Byte swapping operations + REVB_2H, + REVB_2W, }; } // end namespace LoongArchISD diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -65,6 +65,14 @@ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); } + // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and + // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 + // and i32 could still be byte-swapped relatively cheaply. + setOperationAction(ISD::BSWAP, MVT::i16, Custom); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::BSWAP, MVT::i32, Custom); + } + static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE}; @@ -127,6 +135,8 @@ assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); return SDValue(); + case ISD::BSWAP: + return SDValue(); case ISD::ConstantPool: return lowerConstantPool(Op, DAG); case ISD::FP_TO_SINT: @@ -396,6 +406,29 @@ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); break; } + case ISD::BSWAP: { + SDValue Src = N->getOperand(0); + EVT VT = N->getValueType(0); + assert((VT == MVT::i16 || VT == MVT::i32) && + "Unexpected custom legalization"); + MVT GRLenVT = Subtarget.getGRLenVT(); + SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); + SDValue Tmp; + switch (VT.getSizeInBits()) { + default: + llvm_unreachable("Unexpected operand width"); + case 16: + Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc); + break; + case 32: + // Only LA64 will get to here due to the size mismatch between VT and + // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. + Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc); + break; + } + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); + break; + } } } @@ -825,6 +858,8 @@ NODE_NAME_CASE(MOVGR2FR_W_LA64) NODE_NAME_CASE(MOVFR2GR_S_LA64) NODE_NAME_CASE(FTINT) + NODE_NAME_CASE(REVB_2H) + NODE_NAME_CASE(REVB_2W) } #undef NODE_NAME_CASE return nullptr; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -55,6 +55,8 @@ : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; def loongarch_bstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; +def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>; +def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -816,6 +818,19 @@ (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; } // Predicates = [IsLA64] +/// Byte-swapping + +def : Pat<(loongarch_revb_2h GPR:$rj), (REVB_2H GPR:$rj)>; + +let Predicates = [IsLA32] in { +def : Pat<(bswap GPR:$rj), (ROTRI_W (REVB_2H GPR:$rj), 16)>; +} // Predicates = [IsLA32] + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_revb_2w GPR:$rj), (REVB_2W GPR:$rj)>; +def : Pat<(bswap GPR:$rj), (REVB_D GPR:$rj)>; +} // Predicates = [IsLA64] + /// Loads multiclass LdPat { diff --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bswap.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc -mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i48 @llvm.bswap.i48(i48) +declare i64 @llvm.bswap.i64(i64) +declare i80 @llvm.bswap.i80(i80) +declare i128 @llvm.bswap.i128(i128) + +define i16 @test_bswap_i16(i16 %a) nounwind { +; LA32-LABEL: test_bswap_i16: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_i16: +; LA64: # %bb.0: +; LA64-NEXT: revb.2h $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i16 @llvm.bswap.i16(i16 %a) + ret i16 %tmp +} + +define i32 @test_bswap_i32(i32 %a) nounwind { +; LA32-LABEL: test_bswap_i32: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: rotri.w $a0, $a0, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_i32: +; LA64: # %bb.0: +; LA64-NEXT: revb.2w $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %tmp +} + +define i64 @test_bswap_i64(i64 %a) nounwind { +; LA32-LABEL: test_bswap_i64: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a1, $a1 +; LA32-NEXT: rotri.w $a2, $a1, 16 +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: rotri.w $a1, $a0, 16 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_i64: +; LA64: # %bb.0: +; LA64-NEXT: revb.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %tmp +} + +;; Bswap on non-native integer widths. + +define i48 @test_bswap_i48(i48 %a) nounwind { +; LA32-LABEL: test_bswap_i48: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a1, $a1 +; LA32-NEXT: rotri.w $a1, $a1, 16 +; LA32-NEXT: srli.w $a1, $a1, 16 +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: rotri.w $a2, $a0, 16 +; LA32-NEXT: slli.w $a0, $a2, 16 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: srli.w $a1, $a2, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_i48: +; LA64: # %bb.0: +; LA64-NEXT: revb.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 16 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i48 @llvm.bswap.i48(i48 %a) + ret i48 %tmp +} + +define i80 @test_bswap_i80(i80 %a) nounwind { +; LA32-LABEL: test_bswap_i80: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: revb.2h $a2, $a2 +; LA32-NEXT: rotri.w $a2, $a2, 16 +; LA32-NEXT: ld.w $a3, $a1, 4 +; LA32-NEXT: revb.2h $a3, $a3 +; LA32-NEXT: rotri.w $a3, $a3, 16 +; LA32-NEXT: srli.w $a4, $a3, 16 +; LA32-NEXT: slli.w $a5, $a2, 16 +; LA32-NEXT: or $a4, $a5, $a4 +; LA32-NEXT: srli.w $a2, $a2, 16 +; LA32-NEXT: st.h $a2, $a0, 8 +; LA32-NEXT: st.w $a4, $a0, 4 +; LA32-NEXT: slli.w $a2, $a3, 16 +; LA32-NEXT: ld.w $a1, $a1, 8 +; LA32-NEXT: revb.2h $a1, $a1 +; LA32-NEXT: rotri.w $a1, $a1, 16 +; LA32-NEXT: srli.w $a1, $a1, 16 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_i80: +; LA64: # %bb.0: +; LA64-NEXT: revb.d $a1, $a1 +; LA64-NEXT: srli.d $a1, $a1, 48 +; LA64-NEXT: revb.d $a2, $a0 +; LA64-NEXT: slli.d $a0, $a2, 16 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: srli.d $a1, $a2, 48 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i80 @llvm.bswap.i80(i80 %a) + ret i80 %tmp +} + +define i128 @test_bswap_i128(i128 %a) nounwind { +; LA32-LABEL: test_bswap_i128: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: revb.2h $a2, $a2 +; LA32-NEXT: rotri.w $a2, $a2, 16 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: revb.2h $a2, $a2 +; LA32-NEXT: rotri.w $a2, $a2, 16 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: revb.2h $a2, $a2 +; LA32-NEXT: rotri.w $a2, $a2, 16 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 12 +; LA32-NEXT: revb.2h $a1, $a1 +; LA32-NEXT: rotri.w $a1, $a1, 16 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_bswap_i128: +; LA64: # %bb.0: +; LA64-NEXT: revb.d $a2, $a1 +; LA64-NEXT: revb.d $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i128 @llvm.bswap.i128(i128 %a) + ret i128 %tmp +}