diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -51,6 +51,7 @@ bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool tryShrinkShlLogicImm(SDNode *Node); + bool trySignedBitfieldExtract(SDNode *Node); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -661,6 +661,68 @@ return true; } +bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { + // Only supported with XTHeadBb at the moment. + if (!Subtarget->hasVendorXTHeadBb()) + return false; + + auto *N1C = dyn_cast(Node->getOperand(1)); + if (!N1C) + return false; + + SDValue N0 = Node->getOperand(0); + if (!N0.hasOneUse()) + return false; + + auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, + MVT VT) { + return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); + }; + + SDLoc DL(Node); + MVT VT = Node->getSimpleValueType(0); + const unsigned RightShAmt = N1C->getZExtValue(); + + // Transform (sra (shl X, C1) C2) -> + // (TH.EXT X, msb, lsb) + if (N0.getOpcode() == ISD::SHL) { + auto *N01C = dyn_cast(N0->getOperand(1)); + if (!N01C) + return false; + + const unsigned LeftShAmt = N01C->getZExtValue(); + const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; + const unsigned Msb = MsbPlusOne - 1; + const unsigned Lsb = RightShAmt - LeftShAmt; + + SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); + ReplaceNode(Node, TH_EXT); + return true; + } + + // Transform (sra (sext_inreg X, _), C) -> + // (TH.EXT X, msb, lsb) + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { + unsigned ExtSize = + cast(N0.getOperand(1))->getVT().getSizeInBits(); + + // ExtSize of 32 should use sraiw via tablegen pattern. + if (ExtSize == 32) + return false; + + const unsigned Msb = ExtSize - 1; + const unsigned Lsb = RightShAmt; + + SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); + ReplaceNode(Node, TH_EXT); + return true; + } + + return false; +} + void RISCVDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we have already selected. if (Node->isMachineOpcode()) { @@ -846,6 +908,9 @@ return; } case ISD::SRA: { + if (trySignedBitfieldExtract(Node)) + return; + // Optimize (sra (sext_inreg X, i16), C) -> // (srai (slli X, (XLen-16), (XLen-16) + C) // And (sra (sext_inreg X, i8), C) -> @@ -886,9 +951,25 @@ auto *N1C = dyn_cast(Node->getOperand(1)); if (!N1C) break; + uint64_t C1 = N1C->getZExtValue(); + const bool isC1Mask = isMask_64(C1); + const bool isC1ANDI = isInt<12>(C1); SDValue N0 = Node->getOperand(0); + auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT, + SDValue X, unsigned Msb, + unsigned Lsb) { + if (!Subtarget->hasVendorXTHeadBb()) + return false; + + SDNode *TH_EXTU = CurDAG->getMachineNode( + RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); + ReplaceNode(Node, TH_EXTU); + return true; + }; + bool LeftShift = N0.getOpcode() == ISD::SHL; if (LeftShift || N0.getOpcode() == ISD::SRL) { auto *C = dyn_cast(N0.getOperand(1)); @@ -898,8 +979,6 @@ unsigned XLen = Subtarget->getXLen(); assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); - uint64_t C1 = N1C->getZExtValue(); - // Keep track of whether this is a c.andi. If we can't use c.andi, the // shift pair might offer more compression opportunities. // TODO: We could check for C extension here, but we don't have many lit @@ -922,7 +1001,15 @@ // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask // with c3 leading zeros. - if (!LeftShift && isMask_64(C1)) { + if (!LeftShift && isC1Mask) { + // Try to use an unsigned bitfield extract (e.g., th.extu) if available. + // Transform (and (srl x, C2), C1) + // -> ( x, msb, lsb) + const unsigned Msb = llvm::bit_width(C1) + C2 - 1; + const unsigned Lsb = C2; + if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) + return; + unsigned Leading = XLen - llvm::bit_width(C1); if (C2 < Leading) { // If the number of leading zeros is C2+32 this can be SRLIW. @@ -1068,6 +1155,17 @@ } } + // If C1 masks off the upper bits only (but can't be formed as an + // ANDI), use an unsigned bitfield extract (e.g., th.extu), if + // available. + // Transform (and x, C1) + // -> ( x, msb, lsb) + if (isC1Mask && !isC1ANDI) { + const unsigned Msb = llvm::bit_width(C1) - 1; + if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) + return; + } + if (tryShrinkShlLogicImm(Node)) return; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -314,10 +314,6 @@ def : Pat<(rotl GPR:$rs1, GPR:$rs2), (OR (SLL GPR:$rs1, GPR:$rs2), (SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>; -//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>; -//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>; -def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>; -def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>; def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>; def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>; def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>; diff --git a/llvm/test/CodeGen/RISCV/bitextract-mac.ll b/llvm/test/CodeGen/RISCV/bitextract-mac.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bitextract-mac.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+zbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBB +; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEADBB +; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEADMAC +; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEAD +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+zbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64ZBB +; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEADMAC +; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB +; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEAD + +define i32 @f(i32 %A, i32 %B, i32 %C) { +; RV32I-LABEL: f: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: mul a0, a1, a0 +; RV32I-NEXT: slli a1, a0, 26 +; RV32I-NEXT: srli a1, a1, 28 +; RV32I-NEXT: slli a0, a0, 20 +; RV32I-NEXT: srli a0, a0, 25 +; RV32I-NEXT: mul a0, a1, a0 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: f: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: mul a0, a1, a0 +; RV32ZBB-NEXT: slli a1, a0, 26 +; RV32ZBB-NEXT: srli a1, a1, 28 +; RV32ZBB-NEXT: slli a0, a0, 20 +; RV32ZBB-NEXT: srli a0, a0, 25 +; RV32ZBB-NEXT: mul a0, a1, a0 +; RV32ZBB-NEXT: add a0, a0, a2 +; RV32ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: f: +; RV32XTHEADBB: # %bb.0: # %entry +; RV32XTHEADBB-NEXT: mul a0, a1, a0 +; RV32XTHEADBB-NEXT: th.extu a1, a0, 5, 2 +; RV32XTHEADBB-NEXT: th.extu a0, a0, 11, 5 +; RV32XTHEADBB-NEXT: mul a0, a1, a0 +; RV32XTHEADBB-NEXT: add a0, a0, a2 +; RV32XTHEADBB-NEXT: ret +; +; RV32XTHEADMAC-LABEL: f: +; RV32XTHEADMAC: # %bb.0: # %entry +; RV32XTHEADMAC-NEXT: mul a1, a1, a0 +; RV32XTHEADMAC-NEXT: slli a0, a1, 26 +; RV32XTHEADMAC-NEXT: srli a0, a0, 28 +; RV32XTHEADMAC-NEXT: slli a1, a1, 20 +; RV32XTHEADMAC-NEXT: srli a1, a1, 25 +; RV32XTHEADMAC-NEXT: th.mulah a0, a2, a1 +; RV32XTHEADMAC-NEXT: ret +; +; RV32XTHEAD-LABEL: f: +; RV32XTHEAD: # %bb.0: # %entry +; RV32XTHEAD-NEXT: mul a1, a1, a0 +; RV32XTHEAD-NEXT: th.extu a0, a1, 5, 2 +; RV32XTHEAD-NEXT: th.extu a1, a1, 11, 5 +; RV32XTHEAD-NEXT: th.mulah a0, a2, a1 +; RV32XTHEAD-NEXT: ret +; +; RV64I-LABEL: f: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: mulw a0, a1, a0 +; RV64I-NEXT: slli a1, a0, 58 +; RV64I-NEXT: srli a1, a1, 60 +; RV64I-NEXT: slli a0, a0, 52 +; RV64I-NEXT: srli a0, a0, 57 +; RV64I-NEXT: mulw a0, a1, a0 +; RV64I-NEXT: addw a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: f: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: mulw a0, a1, a0 +; RV64ZBB-NEXT: slli a1, a0, 58 +; RV64ZBB-NEXT: srli a1, a1, 60 +; RV64ZBB-NEXT: slli a0, a0, 52 +; RV64ZBB-NEXT: srli a0, a0, 57 +; RV64ZBB-NEXT: mulw a0, a1, a0 +; RV64ZBB-NEXT: addw a0, a0, a2 +; RV64ZBB-NEXT: ret +; +; RV64XTHEADMAC-LABEL: f: +; RV64XTHEADMAC: # %bb.0: # %entry +; RV64XTHEADMAC-NEXT: mulw a1, a1, a0 +; RV64XTHEADMAC-NEXT: slli a0, a1, 58 +; RV64XTHEADMAC-NEXT: srli a0, a0, 60 +; RV64XTHEADMAC-NEXT: slli a1, a1, 52 +; RV64XTHEADMAC-NEXT: srli a1, a1, 57 +; RV64XTHEADMAC-NEXT: th.mulah a0, a2, a1 +; RV64XTHEADMAC-NEXT: ret +; +; RV64XTHEADBB-LABEL: f: +; RV64XTHEADBB: # %bb.0: # %entry +; RV64XTHEADBB-NEXT: mul a0, a1, a0 +; RV64XTHEADBB-NEXT: th.extu a1, a0, 5, 2 +; RV64XTHEADBB-NEXT: th.extu a0, a0, 11, 5 +; RV64XTHEADBB-NEXT: mulw a0, a1, a0 +; RV64XTHEADBB-NEXT: addw a0, a0, a2 +; RV64XTHEADBB-NEXT: ret +; +; RV64XTHEAD-LABEL: f: +; RV64XTHEAD: # %bb.0: # %entry +; RV64XTHEAD-NEXT: mul a1, a1, a0 +; RV64XTHEAD-NEXT: th.extu a0, a1, 5, 2 +; RV64XTHEAD-NEXT: th.extu a1, a1, 11, 5 +; RV64XTHEAD-NEXT: th.mulah a0, a2, a1 +; RV64XTHEAD-NEXT: ret +entry: + %mul = mul nsw i32 %B, %A + %0 = lshr i32 %mul, 2 + %and = and i32 %0, 15 + %1 = lshr i32 %mul, 5 + %and2 = and i32 %1, 127 + %mul3 = mul nuw nsw i32 %and, %and2 + %add = add i32 %mul3, %C + ret i32 %add +} diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -1623,8 +1623,7 @@ ; ; RV32XTHEADBB-LABEL: rotl_64_mask_shared: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: slli a5, a4, 26 -; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5 ; RV32XTHEADBB-NEXT: mv a7, a0 ; RV32XTHEADBB-NEXT: bnez a5, .LBB17_2 ; RV32XTHEADBB-NEXT: # %bb.1: @@ -2098,8 +2097,7 @@ ; ; RV32XTHEADBB-LABEL: rotl_64_mask_multiple: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: slli a5, a4, 26 -; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5 ; RV32XTHEADBB-NEXT: mv a6, a1 ; RV32XTHEADBB-NEXT: bnez a5, .LBB21_2 ; RV32XTHEADBB-NEXT: # %bb.1: