diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -35,6 +35,7 @@ SRA_W, SRL_W, + BSTRINS, BSTRPICK, }; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -88,6 +88,7 @@ setMinFunctionAlignment(FunctionAlignment); setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRL); } @@ -402,6 +403,131 @@ return SDValue(); } +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + MVT GRLenVT = Subtarget.getGRLenVT(); + EVT ValTy = N->getValueType(0); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + ConstantSDNode *CN0, *CN1; + SDLoc DL(N); + unsigned ValBits = ValTy.getSizeInBits(); + unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; + unsigned Shamt; + bool SwapAndRetried = false; + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (ValBits != 32 && ValBits != 64) + return SDValue(); + +Retry: + // 1st pattern to match BSTRINS: + // R = or (and X, mask0), (and (shl Y, lsb), mask1) + // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 + // => + // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && + (CN1 = dyn_cast(N1.getOperand(1))) && + isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && + MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && + (CN1 = dyn_cast(N1.getOperand(0).getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskIdx0 && + (MaskIdx0 + MaskLen0 <= ValBits)) + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0).getOperand(0), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + + // 2nd pattern to match BSTRINS: + // R = or (and X, mask0), (shl (and Y, mask1), lsb) + // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) + // => + // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && + (CN1 = dyn_cast(N1.getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskIdx0 && + (CN1 = dyn_cast(N1.getOperand(0).getOperand(1))) && + isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && + MaskLen0 == MaskLen1 && MaskIdx1 == 0 && + (MaskIdx0 + MaskLen0 <= ValBits)) + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0).getOperand(0), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + + // 3rd pattern to match BSTRINS: + // R = or (and X, mask0), (and Y, mask1) + // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 + // => + // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb + // where msb = lsb + size - 1 + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && + (CN0 = dyn_cast(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + (MaskIdx0 + MaskLen0 <= 64) && + (CN1 = dyn_cast(N1->getOperand(1))) && + (CN1->getSExtValue() & CN0->getSExtValue()) == 0) + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, + DAG.getConstant(MaskIdx0, DL, GRLenVT)), + DAG.getConstant(ValBits == 32 + ? (MaskIdx0 + (MaskLen0 & 31) - 1) + : (MaskIdx0 + MaskLen0 - 1), + DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + + // 4th pattern to match BSTRINS: + // R = or (and X, mask), (shl Y, shamt) + // where mask = (2**shamt - 1) + // => + // R = BSTRINS X, Y, ValBits - 1, shamt + // where ValBits = 32 or 64 + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && + (CN0 = dyn_cast(N0.getOperand(1))) && + isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && + MaskIdx0 == 0 && (CN1 = dyn_cast(N1.getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskLen0 && + (MaskIdx0 + MaskLen0 <= ValBits)) + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0), + DAG.getConstant((ValBits - 1), DL, GRLenVT), + DAG.getConstant(Shamt, DL, GRLenVT)); + + // 5th pattern to match BSTRINS: + // R = or (and X, mask), const + // where ~mask = (2**size - 1) << lsb, mask & const = 0 + // => + // R = BSTRINS X, (const >> lsb), msb, lsb + // where msb = lsb + size - 1 + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + (CN1 = dyn_cast(N1)) && + (CN1->getSExtValue() & CN0->getSExtValue()) == 0) + return DAG.getNode( + LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + + // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. + if (!SwapAndRetried) { + std::swap(N0, N1); + SwapAndRetried = true; + goto Retry; + } + + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -410,6 +536,8 @@ break; case ISD::AND: return performANDCombine(N, DAG, DCI, Subtarget); + case ISD::OR: + return performORCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); } @@ -479,6 +607,7 @@ NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) NODE_NAME_CASE(SRL_W) + NODE_NAME_CASE(BSTRINS) NODE_NAME_CASE(BSTRPICK) } #undef NODE_NAME_CASE diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -26,6 +26,11 @@ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64> ]>; +def SDT_LoongArchBStrIns: SDTypeProfile<1, 4, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>, + SDTCisSameAs<3, 4> +]>; + def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3> ]>; @@ -46,6 +51,8 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; +def loongarch_bstrins + : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; def loongarch_bstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; @@ -757,15 +764,21 @@ def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; -/// BSTRPICK +/// BSTRINS and BSTRPICK -let Predicates = [IsLA32] in +let Predicates = [IsLA32] in { +def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd), + (BSTRINS_W GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd), (BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; +} // Predicates = [IsLA32] -let Predicates = [IsLA64] in +let Predicates = [IsLA64] in { +def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), + (BSTRINS_D GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd), (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; +} // Predicates = [IsLA64] /// Loads diff --git a/llvm/test/CodeGen/LoongArch/bstrins_d.ll b/llvm/test/CodeGen/LoongArch/bstrins_d.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bstrins_d.ll @@ -0,0 +1,152 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +;; Test generation of the bstrins.d instruction. +;; There are 5 patterns that can be matched to bstrins.d. See performORCombine +;; for details. + +;; Pattern 1 +;; R = or (and X, mask0), (and (shl Y, lsb), mask1) +;; => +;; R = BSTRINS X, Y, msb, lsb +define i64 @pat1(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: pat1: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff + %shl = shl i64 %b, 16 + %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 + %or = or i64 %and1, %and2 + ret i64 %or +} + +define i64 @pat1_swap(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: pat1_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff + %shl = shl i64 %b, 16 + %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 + %or = or i64 %and2, %and1 + ret i64 %or +} + +;; Pattern 2 +;; R = or (and X, mask0), (shl (and Y, mask1), lsb) +;; => +;; R = BSTRINS X, Y, msb, lsb +define i64 @pat2(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: pat2: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff + %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff + %shl = shl i64 %and2, 16 + %or = or i64 %and1, %shl + ret i64 %or +} + +define i64 @pat2_swap(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: pat2_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff + %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff + %shl = shl i64 %and2, 16 + %or = or i64 %shl, %and1 + ret i64 %or +} + +;; Pattern 3 +;; R = or (and X, mask0), (and Y, mask1) +;; => +;; R = BSTRINS X, (srl (and Y, mask1), lsb), msb, lsb +define i64 @pat3(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: pat3: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $a1, $a1, 288 +; CHECK-NEXT: srli.d $a1, $a1, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f + %and2 = and i64 %b, 288 ; 0x0000000000000120 + %or = or i64 %and1, %and2 + ret i64 %or +} + +define i64 @pat3_swap(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: pat3_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $a1, $a1, 288 +; CHECK-NEXT: srli.d $a1, $a1, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f + %and2 = and i64 %b, 288 ; 0x0000000000000120 + %or = or i64 %and2, %and1 + ret i64 %or +} + +;; Pattern 4 +;; R = or (and X, mask), (shl Y, shamt) +;; => +;; R = BSTRINS X, Y, 63, shamt +define i64 @pat4(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: pat4: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i64 %a, 255 + %shl = shl i64 %b, 8 + %or = or i64 %and, %shl + ret i64 %or +} + +define i64 @pat4_swap(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: pat4_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i64 %a, 255 + %shl = shl i64 %b, 8 + %or = or i64 %shl, %and + ret i64 %or +} + +;; Pattern 5 +;; R = or (and X, mask0), const +;; => +;; R = BSTRINS X, (const >> lsb), msb, lsb +define i64 @pat5(i64 %a) nounwind { +; CHECK-LABEL: pat5: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a1, 74565 +; CHECK-NEXT: ori $a1, $a1, 1656 +; CHECK-NEXT: bstrins.d $a0, $a1, 47, 16 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff + %or = or i64 %and, 20015998304256 ; 0x0000123456780000 + ret i64 %or +} + +;; Test that bstrins.d is not generated becasue constant OR operand +;; doesn't fit into bits cleared by constant AND operand. +define i64 @no_bstrins_d(i64 %a) nounwind { +; CHECK-LABEL: no_bstrins_d: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a1, 354185 +; CHECK-NEXT: lu32i.d $a1, 4660 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: lu12i.w $a1, 354191 +; CHECK-NEXT: ori $a1, $a1, 4095 +; CHECK-NEXT: lu32i.d $a1, -60876 +; CHECK-NEXT: and $a0, $a0, $a1 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff + %or = or i64 %and, 20015998341120 ; 0x0000123456789000 + ret i64 %or +} diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s + +;; Test generation of the bstrins.w instruction. +;; There are 5 patterns that can be matched to bstrins.w. See performORCombine +;; for details. + +;; Pattern 1 +;; R = or (and X, mask0), (and (shl Y, lsb), mask1) +;; => +;; R = BSTRINS X, Y, msb, lsb +define i32 @pat1(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat1: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i32 %a, -1048321 ; 0xfff000ff + %shl = shl i32 %b, 8 + %and2 = and i32 %shl, 1048320 ; 0x000fff00 + %or = or i32 %and1, %and2 + ret i32 %or +} + +define i32 @pat1_swap(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat1_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i32 %a, -1048321 ; 0xfff000ff + %shl = shl i32 %b, 8 + %and2 = and i32 %shl, 1048320 ; 0x000fff00 + %or = or i32 %and2, %and1 + ret i32 %or +} + +;; Pattern 2 +;; R = or (and X, mask0), (shl (and Y, mask1), lsb) +;; => +;; R = BSTRINS X, Y, msb, lsb +define i32 @pat2(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat2: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i32 %a, -1048321 ; 0xfff000ff + %and2 = and i32 %b, 4095 ; 0x00000fff + %shl = shl i32 %and2, 8 + %or = or i32 %and1, %shl + ret i32 %or +} + +define i32 @pat2_swap(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat2_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i32 %a, -1048321 ; 0xfff000ff + %and2 = and i32 %b, 4095 ; 0x00000fff + %shl = shl i32 %and2, 8 + %or = or i32 %shl, %and1 + ret i32 %or +} + +;; Pattern 3 +;; R = or (and X, mask0), (and Y, mask1) +;; => +;; R = BSTRINS X, (srl (and Y, mask1), lsb), msb, lsb +define i32 @pat3(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat3: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $a1, $a1, 288 +; CHECK-NEXT: srli.w $a1, $a1, 4 +; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i32 %a, -4081 ; 0xfffff00f + %and2 = and i32 %b, 288 ; 0x00000120 + %or = or i32 %and1, %and2 + ret i32 %or +} + +define i32 @pat3_swap(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat3_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: andi $a1, $a1, 288 +; CHECK-NEXT: srli.w $a1, $a1, 4 +; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i32 %a, -4081 ; 0xfffff00f + %and2 = and i32 %b, 288 ; 0x00000120 + %or = or i32 %and2, %and1 + ret i32 %or +} + +define i32 @pat3_positive_mask0(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat3_positive_mask0: +; CHECK: # %bb.0: +; CHECK-NEXT: srli.w $a1, $a1, 28 +; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and1 = and i32 %a, 268435455 ; 0x0fffffff + %and2 = and i32 %b, 4026531840 ; 0xf0000000 + %or = or i32 %and1, %and2 + ret i32 %or +} + +;; Pattern 4 +;; R = or (and X, mask), (shl Y, shamt) +;; => +;; R = BSTRINS X, Y, 31, shamt +define i32 @pat4(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat4: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i32 %a, 268435455 ; 0x0fffffff + %shl = shl i32 %b, 28 + %or = or i32 %and, %shl + ret i32 %or +} + +define i32 @pat4_swap(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pat4_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i32 %a, 268435455 ; 0x0fffffff + %shl = shl i32 %b, 28 + %or = or i32 %shl, %and + ret i32 %or +} + +;; Pattern 5 +;; R = or (and X, mask), const +;; => +;; R = BSTRINS X, (const >> lsb), msb, lsb +define i32 @pat5(i32 %a) nounwind { +; CHECK-LABEL: pat5: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a1, 1 +; CHECK-NEXT: ori $a1, $a1, 564 +; CHECK-NEXT: bstrins.w $a0, $a1, 23, 8 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i32 %a, 4278190335 ; 0xff0000ff + %or = or i32 %and, 1192960 ; 0x00123400 + ret i32 %or +} + +;; Test that bstrins.w is not generated becasue constant OR operand +;; doesn't fit into bits cleared by constant AND operand. +define i32 @no_bstrins_w(i32 %a) nounwind { +; CHECK-LABEL: no_bstrins_w: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a1, 291 +; CHECK-NEXT: ori $a1, $a1, 1104 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: lu12i.w $a1, -3805 +; CHECK-NEXT: ori $a1, $a1, 1279 +; CHECK-NEXT: and $a0, $a0, $a1 +; CHECK-NEXT: jirl $zero, $ra, 0 + %and = and i32 %a, 4278190335 ; 0xff0000ff + %or = or i32 %and, 1193040 ; 0x00123450 + ret i32 %or +}