diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -51,6 +51,7 @@ bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool tryShrinkShlLogicImm(SDNode *Node); + bool trySignedBitfieldExtract(SDNode *Node); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -661,6 +661,73 @@ return true; } +bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { + // Only supported with XTHeadBb at the moment. + if (!Subtarget->hasVendorXTHeadBb()) + return false; + + auto *N1C = dyn_cast(Node->getOperand(1)); + if (!N1C) + return false; + + SDValue N0 = Node->getOperand(0); + if (!N0.hasOneUse()) + return false; + + auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, + MVT VT) { + return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); + }; + + SDLoc DL(Node); + MVT VT = Node->getSimpleValueType(0); + const unsigned RightShAmt = N1C->getZExtValue(); + + // Transform (sra (shl X, C1) C2) with C1 < C2 + // -> (TH.EXT X, msb, lsb) + if (N0.getOpcode() == ISD::SHL) { + auto *N01C = dyn_cast(N0->getOperand(1)); + if (!N01C) + return false; + + const unsigned LeftShAmt = N01C->getZExtValue(); + // Make sure that this is a bitfield extraction (i.e., the shift-right + // amount can not be less than the left-shift). + if (LeftShAmt > RightShAmt) + return false; + + const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; + const unsigned Msb = MsbPlusOne - 1; + const unsigned Lsb = RightShAmt - LeftShAmt; + + SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); + ReplaceNode(Node, TH_EXT); + return true; + } + + // Transform (sra (sext_inreg X, _), C) -> + // (TH.EXT X, msb, lsb) + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { + unsigned ExtSize = + cast(N0.getOperand(1))->getVT().getSizeInBits(); + + // ExtSize of 32 should use sraiw via tablegen pattern. + if (ExtSize == 32) + return false; + + const unsigned Msb = ExtSize - 1; + const unsigned Lsb = RightShAmt; + + SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); + ReplaceNode(Node, TH_EXT); + return true; + } + + return false; +} + void RISCVDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we have already selected. if (Node->isMachineOpcode()) { @@ -846,6 +913,9 @@ return; } case ISD::SRA: { + if (trySignedBitfieldExtract(Node)) + return; + // Optimize (sra (sext_inreg X, i16), C) -> // (srai (slli X, (XLen-16), (XLen-16) + C) // And (sra (sext_inreg X, i8), C) -> @@ -886,9 +956,25 @@ auto *N1C = dyn_cast(Node->getOperand(1)); if (!N1C) break; + uint64_t C1 = N1C->getZExtValue(); + const bool isC1Mask = isMask_64(C1); + const bool isC1ANDI = isInt<12>(C1); SDValue N0 = Node->getOperand(0); + auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT, + SDValue X, unsigned Msb, + unsigned Lsb) { + if (!Subtarget->hasVendorXTHeadBb()) + return false; + + SDNode *TH_EXTU = CurDAG->getMachineNode( + RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); + ReplaceNode(Node, TH_EXTU); + return true; + }; + bool LeftShift = N0.getOpcode() == ISD::SHL; if (LeftShift || N0.getOpcode() == ISD::SRL) { auto *C = dyn_cast(N0.getOperand(1)); @@ -898,8 +984,6 @@ unsigned XLen = Subtarget->getXLen(); assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); - uint64_t C1 = N1C->getZExtValue(); - // Keep track of whether this is a c.andi. If we can't use c.andi, the // shift pair might offer more compression opportunities. // TODO: We could check for C extension here, but we don't have many lit @@ -922,7 +1006,7 @@ // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask // with c3 leading zeros. - if (!LeftShift && isMask_64(C1)) { + if (!LeftShift && isC1Mask) { unsigned Leading = XLen - llvm::bit_width(C1); if (C2 < Leading) { // If the number of leading zeros is C2+32 this can be SRLIW. @@ -951,6 +1035,18 @@ return; } + // Try to use an unsigned bitfield extract (e.g., th.extu) if + // available. + // Transform (and (srl x, C2), C1) + // -> ( x, msb, lsb) + // + // Make sure to keep this below the SRLIW cases, as we always want to + // prefer the more common instruction. + const unsigned Msb = llvm::bit_width(C1) + C2 - 1; + const unsigned Lsb = C2; + if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) + return; + // (srli (slli x, c3-c2), c3). // Skip if we could use (zext.w (sraiw X, C2)). bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && @@ -1068,6 +1164,17 @@ } } + // If C1 masks off the upper bits only (but can't be formed as an + // ANDI), use an unsigned bitfield extract (e.g., th.extu), if + // available. + // Transform (and x, C1) + // -> ( x, msb, lsb) + if (isC1Mask && !isC1ANDI) { + const unsigned Msb = llvm::bit_width(C1) - 1; + if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) + return; + } + if (tryShrinkShlLogicImm(Node)) return; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -315,10 +315,6 @@ def : Pat<(rotl GPR:$rs1, GPR:$rs2), (OR (SLL GPR:$rs1, GPR:$rs2), (SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>; -//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>; -//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>; -def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>; -def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>; def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>; def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>; def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>; diff --git a/llvm/test/CodeGen/RISCV/bitextract-mac.ll b/llvm/test/CodeGen/RISCV/bitextract-mac.ll --- a/llvm/test/CodeGen/RISCV/bitextract-mac.ll +++ b/llvm/test/CodeGen/RISCV/bitextract-mac.ll @@ -46,10 +46,8 @@ ; RV32XTHEADBB-LABEL: f: ; RV32XTHEADBB: # %bb.0: # %entry ; RV32XTHEADBB-NEXT: mul a0, a1, a0 -; RV32XTHEADBB-NEXT: slli a1, a0, 26 -; RV32XTHEADBB-NEXT: srli a1, a1, 28 -; RV32XTHEADBB-NEXT: slli a0, a0, 20 -; RV32XTHEADBB-NEXT: srli a0, a0, 25 +; RV32XTHEADBB-NEXT: th.extu a1, a0, 5, 2 +; RV32XTHEADBB-NEXT: th.extu a0, a0, 11, 5 ; RV32XTHEADBB-NEXT: mul a0, a1, a0 ; RV32XTHEADBB-NEXT: add a0, a0, a2 ; RV32XTHEADBB-NEXT: ret @@ -68,10 +66,8 @@ ; RV32XTHEAD-LABEL: f: ; RV32XTHEAD: # %bb.0: # %entry ; RV32XTHEAD-NEXT: mul a0, a1, a0 -; RV32XTHEAD-NEXT: slli a1, a0, 26 -; RV32XTHEAD-NEXT: srli a1, a1, 28 -; RV32XTHEAD-NEXT: slli a0, a0, 20 -; RV32XTHEAD-NEXT: srli a0, a0, 25 +; RV32XTHEAD-NEXT: th.extu a1, a0, 5, 2 +; RV32XTHEAD-NEXT: th.extu a0, a0, 11, 5 ; RV32XTHEAD-NEXT: th.mulah a2, a1, a0 ; RV32XTHEAD-NEXT: mv a0, a2 ; RV32XTHEAD-NEXT: ret @@ -111,22 +107,18 @@ ; ; RV64XTHEADBB-LABEL: f: ; RV64XTHEADBB: # %bb.0: # %entry -; RV64XTHEADBB-NEXT: mulw a0, a1, a0 -; RV64XTHEADBB-NEXT: slli a1, a0, 58 -; RV64XTHEADBB-NEXT: srli a1, a1, 60 -; RV64XTHEADBB-NEXT: slli a0, a0, 52 -; RV64XTHEADBB-NEXT: srli a0, a0, 57 +; RV64XTHEADBB-NEXT: mul a0, a1, a0 +; RV64XTHEADBB-NEXT: th.extu a1, a0, 5, 2 +; RV64XTHEADBB-NEXT: th.extu a0, a0, 11, 5 ; RV64XTHEADBB-NEXT: mulw a0, a1, a0 ; RV64XTHEADBB-NEXT: addw a0, a0, a2 ; RV64XTHEADBB-NEXT: ret ; ; RV64XTHEAD-LABEL: f: ; RV64XTHEAD: # %bb.0: # %entry -; RV64XTHEAD-NEXT: mulw a0, a1, a0 -; RV64XTHEAD-NEXT: slli a1, a0, 58 -; RV64XTHEAD-NEXT: srli a1, a1, 60 -; RV64XTHEAD-NEXT: slli a0, a0, 52 -; RV64XTHEAD-NEXT: srli a0, a0, 57 +; RV64XTHEAD-NEXT: mul a0, a1, a0 +; RV64XTHEAD-NEXT: th.extu a1, a0, 5, 2 +; RV64XTHEAD-NEXT: th.extu a0, a0, 11, 5 ; RV64XTHEAD-NEXT: th.mulah a2, a1, a0 ; RV64XTHEAD-NEXT: mv a0, a2 ; RV64XTHEAD-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -1623,8 +1623,7 @@ ; ; RV32XTHEADBB-LABEL: rotl_64_mask_shared: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: slli a5, a4, 26 -; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5 ; RV32XTHEADBB-NEXT: mv a7, a0 ; RV32XTHEADBB-NEXT: bnez a5, .LBB17_2 ; RV32XTHEADBB-NEXT: # %bb.1: @@ -2098,8 +2097,7 @@ ; ; RV32XTHEADBB-LABEL: rotl_64_mask_multiple: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: slli a5, a4, 26 -; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5 ; RV32XTHEADBB-NEXT: mv a6, a1 ; RV32XTHEADBB-NEXT: bnez a5, .LBB21_2 ; RV32XTHEADBB-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -338,6 +338,23 @@ ret i32 %shr } +define i32 @no_sexth_i32(i32 %a) nounwind { +; RV32I-LABEL: no_sexth_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 17 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: no_sexth_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: slli a0, a0, 17 +; RV32XTHEADBB-NEXT: srai a0, a0, 16 +; RV32XTHEADBB-NEXT: ret + %shl = shl i32 %a, 17 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + define i64 @sexth_i64(i64 %a) nounwind { ; RV32I-LABEL: sexth_i64: ; RV32I: # %bb.0: @@ -356,6 +373,25 @@ ret i64 %shr } +define i64 @no_sexth_i64(i64 %a) nounwind { +; RV32I-LABEL: no_sexth_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 17 +; RV32I-NEXT: srai a0, a1, 16 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: no_sexth_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: slli a1, a0, 17 +; RV32XTHEADBB-NEXT: srai a0, a1, 16 +; RV32XTHEADBB-NEXT: srai a1, a1, 31 +; RV32XTHEADBB-NEXT: ret + %shl = shl i64 %a, 49 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + define i32 @zexth_i32(i32 %a) nounwind { ; RV32I-LABEL: zexth_i32: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -627,6 +627,23 @@ ret i32 %shr } +define signext i32 @no_sexth_i32(i32 signext %a) nounwind { +; RV64I-LABEL: no_sexth_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 49 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: no_sexth_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: slli a0, a0, 49 +; RV64XTHEADBB-NEXT: srai a0, a0, 48 +; RV64XTHEADBB-NEXT: ret + %shl = shl i32 %a, 17 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + define i64 @sexth_i64(i64 %a) nounwind { ; RV64I-LABEL: sexth_i64: ; RV64I: # %bb.0: @@ -643,6 +660,23 @@ ret i64 %shr } +define i64 @no_sexth_i64(i64 %a) nounwind { +; RV64I-LABEL: no_sexth_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 49 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: no_sexth_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: slli a0, a0, 49 +; RV64XTHEADBB-NEXT: srai a0, a0, 48 +; RV64XTHEADBB-NEXT: ret + %shl = shl i64 %a, 49 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + define i32 @zexth_i32(i32 %a) nounwind { ; RV64I-LABEL: zexth_i32: ; RV64I: # %bb.0: @@ -673,6 +707,37 @@ ret i64 %and } +define i64 @zext_bf_i64(i64 %a) nounwind { +; RV64I-LABEL: zext_bf_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 47 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: zext_bf_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.extu a0, a0, 16, 1 +; RV64XTHEADBB-NEXT: ret + %1 = lshr i64 %a, 1 + %and = and i64 %1, 65535 + ret i64 %and +} + +define i64 @zext_i64_srliw(i64 %a) nounwind { +; RV64I-LABEL: zext_i64_srliw: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a0, a0, 16 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: zext_i64_srliw: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srliw a0, a0, 16 +; RV64XTHEADBB-NEXT: ret + %1 = lshr i64 %a, 16 + %and = and i64 %1, 65535 + ret i64 %and +} + declare i32 @llvm.bswap.i32(i32) define signext i32 @bswap_i32(i32 signext %a) nounwind {