diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "LoongArchISelDAGToDAG.h" +#include "LoongArchISelLowering.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "MCTargetDesc/LoongArchMatInt.h" #include "llvm/Support/KnownBits.h" @@ -91,6 +92,17 @@ ShAmt = N.getOperand(0); return true; } + } else if (N.getOpcode() == LoongArchISD::BSTRPICK) { + // Similar to the above AND, if there is a BSTRPICK on the shift amount, we + // can bypass it. + assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); + assert(isa(N.getOperand(1)) && "Illegal msb operand!"); + assert(isa(N.getOperand(2)) && "Illegal lsb operand!"); + uint64_t msb = N.getConstantOperandVal(1), lsb = N.getConstantOperandVal(2); + if (lsb == 0 && Log2_32(ShiftWidth) <= msb + 1) { + ShAmt = N.getOperand(0); + return true; + } } else if (N.getOpcode() == ISD::SUB && isa(N.getOperand(0))) { uint64_t Imm = N.getConstantOperandVal(0); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -34,6 +34,8 @@ SRA_W, SRL_W, + BSTRPICK, + }; } // namespace LoongArchISD @@ -51,6 +53,8 @@ void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -73,6 +73,8 @@ // Function alignments. const Align FunctionAlignment(4); setMinFunctionAlignment(FunctionAlignment); + + setTargetDAGCombine(ISD::AND); } SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, @@ -238,6 +240,81 @@ } } +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue FirstOperand = N->getOperand(0); + SDValue SecondOperand = N->getOperand(1); + unsigned FirstOperandOpc = FirstOperand.getOpcode(); + EVT ValTy = N->getValueType(0); + SDLoc DL(N); + uint64_t lsb, msb; + unsigned SMIdx, SMLen; + ConstantSDNode *CN; + SDValue NewOperand; + MVT GRLenVT = Subtarget.getGRLenVT(); + + // Op's second operand must be a shifted mask. + if (!(CN = dyn_cast(SecondOperand)) || + !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) + return SDValue(); + + if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { + // Pattern match BSTRPICK. + // $dst = and ((sra or srl) $src , lsb), (2**len - 1) + // => BSTRPICK $dst, $src, msb, lsb + // where msb = lsb + len - 1 + + // The second operand of the shift must be an immediate. + if (!(CN = dyn_cast(FirstOperand.getOperand(1)))) + return SDValue(); + + lsb = CN->getZExtValue(); + + // Return if the shifted mask does not start at bit 0 or the sum of its + // length and lsb exceeds the word's size. + if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) + return SDValue(); + + NewOperand = FirstOperand.getOperand(0); + } else { + // Pattern match BSTRPICK. + // $dst = and $src, (2**len- 1) , if len > 12 + // => BSTRPICK $dst, $src, msb, lsb + // where lsb = 0 and msb = len - 1 + + // If the mask is <= 0xfff, andi can be used instead. + if (CN->getZExtValue() <= 0xfff) + return SDValue(); + + // Return if the mask doesn't start at position 0. + if (SMIdx) + return SDValue(); + + lsb = 0; + NewOperand = FirstOperand; + } + msb = lsb + SMLen - 1; + return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, + DAG.getConstant(msb, DL, GRLenVT), + DAG.getConstant(lsb, DL, GRLenVT)); +} + +SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + switch (N->getOpcode()) { + default: + break; + case ISD::AND: + return performANDCombine(N, DAG, DCI, Subtarget); + } + return SDValue(); +} + const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((LoongArchISD::NodeType)Opcode) { case LoongArchISD::FIRST_NUMBER: @@ -252,6 +329,7 @@ NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) NODE_NAME_CASE(SRL_W) + NODE_NAME_CASE(BSTRPICK) } #undef NODE_NAME_CASE return nullptr; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -19,6 +19,10 @@ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64> ]>; +def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3> +]>; + // TODO: Add LoongArch specific DAG Nodes // Target-dependent nodes. def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, @@ -26,6 +30,8 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; +def loongarch_bstrpick + : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -647,6 +653,16 @@ def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; +/// BSTRPICK + +let Predicates = [IsLA32] in +def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd), + (BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; + +let Predicates = [IsLA64] in +def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd), + (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; + //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/LoongArch/bstrpick_d.ll b/llvm/test/CodeGen/LoongArch/bstrpick_d.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bstrpick_d.ll @@ -0,0 +1,49 @@ +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define i64 @lshr40_and255(i64 %a) { +; CHECK-LABEL: lshr40_and255: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a0, $a0, 47, 40 +; CHECK-NEXT: jirl $zero, $ra, 0 + %shr = lshr i64 %a, 40 + %and = and i64 %shr, 255 + ret i64 %and +} + +define i64 @ashr50_and511(i64 %a) { +; CHECK-LABEL: ashr50_and511: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a0, $a0, 58, 50 +; CHECK-NEXT: jirl $zero, $ra, 0 + %shr = ashr i64 %a, 50 + %and = and i64 %shr, 511 + ret i64 %and +} + +define i64 @zext_i32_to_i64(i32 %a) { +; CHECK-LABEL: zext_i32_to_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: jirl $zero, $ra, 0 + %res = zext i32 %a to i64 + ret i64 %res +} + +define i64 @and8191(i64 %a) { +; CHECK-LABEL: and8191: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a0, $a0, 12, 0 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i64 %a, 8191 + ret i64 %and +} + +; Check that andi but not bstrpick.d is generated. +define i64 @and4095(i64 %a) { +; CHECK-LABEL: and4095: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $a0, $a0, 4095 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i64 %a, 4095 + ret i64 %and +} diff --git a/llvm/test/CodeGen/LoongArch/bstrpick_w.ll b/llvm/test/CodeGen/LoongArch/bstrpick_w.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bstrpick_w.ll @@ -0,0 +1,49 @@ +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s + +define i32 @lshr40_and255(i32 %a) { +; CHECK-LABEL: lshr40_and255: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.w $a0, $a0, 17, 10 +; CHECK-NEXT: jirl $zero, $ra, 0 + %shr = lshr i32 %a, 10 + %and = and i32 %shr, 255 + ret i32 %and +} + +define i32 @ashr50_and511(i32 %a) { +; CHECK-LABEL: ashr50_and511: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.w $a0, $a0, 28, 20 +; CHECK-NEXT: jirl $zero, $ra, 0 + %shr = ashr i32 %a, 20 + %and = and i32 %shr, 511 + ret i32 %and +} + +define i32 @zext_i16_to_i32(i16 %a) { +; CHECK-LABEL: zext_i16_to_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.w $a0, $a0, 15, 0 +; CHECK-NEXT: jirl $zero, $ra, 0 + %res = zext i16 %a to i32 + ret i32 %res +} + +define i32 @and8191(i32 %a) { +; CHECK-LABEL: and8191: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.w $a0, $a0, 12, 0 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i32 %a, 8191 + ret i32 %and +} + +; Check that andi but not bstrpick.d is generated. +define i32 @and4095(i32 %a) { +; CHECK-LABEL: and4095: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $a0, $a0, 4095 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i32 %a, 4095 + ret i32 %and +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll @@ -34,17 +34,13 @@ define i16 @lshr_i16(i16 %x, i16 %y) { ; LA32-LABEL: lshr_i16: ; LA32: # %bb.0: -; LA32-NEXT: lu12i.w $a2, 15 -; LA32-NEXT: ori $a2, $a2, 4095 -; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: srl.w $a0, $a0, $a1 ; LA32-NEXT: jirl $zero, $ra, 0 ; ; LA64-LABEL: lshr_i16: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a2, 15 -; LA64-NEXT: ori $a2, $a2, 4095 -; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: srl.d $a0, $a0, $a1 ; LA64-NEXT: jirl $zero, $ra, 0 %lshr = lshr i16 %x, %y diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll @@ -257,16 +257,12 @@ define i32 @zext_i16_to_i32(i16 %a) { ; LA32-LABEL: zext_i16_to_i32: ; LA32: # %bb.0: -; LA32-NEXT: lu12i.w $a1, 15 -; LA32-NEXT: ori $a1, $a1, 4095 -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: jirl $zero, $ra, 0 ; ; LA64-LABEL: zext_i16_to_i32: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 15 -; LA64-NEXT: ori $a1, $a1, 4095 -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: jirl $zero, $ra, 0 %1 = zext i16 %a to i32 ret i32 %1 @@ -275,17 +271,13 @@ define i64 @zext_i16_to_i64(i16 %a) { ; LA32-LABEL: zext_i16_to_i64: ; LA32: # %bb.0: -; LA32-NEXT: lu12i.w $a1, 15 -; LA32-NEXT: ori $a1, $a1, 4095 -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: move $a1, $zero ; LA32-NEXT: jirl $zero, $ra, 0 ; ; LA64-LABEL: zext_i16_to_i64: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, 15 -; LA64-NEXT: ori $a1, $a1, 4095 -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: jirl $zero, $ra, 0 %1 = zext i16 %a to i64 ret i64 %1 @@ -299,9 +291,7 @@ ; ; LA64-LABEL: zext_i32_to_i64: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a1, $zero, -1 -; LA64-NEXT: lu32i.d $a1, 0 -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: jirl $zero, $ra, 0 %1 = zext i32 %a to i64 ret i64 %1