diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -51,6 +51,7 @@ bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool tryShrinkShlLogicImm(SDNode *Node); + bool trySignedBitfieldExtract(SDNode *Node); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -661,6 +661,68 @@ return true; } +bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { + // Only supported with XTHeadBb at the moment. + if (!Subtarget->hasVendorXTHeadBb()) + return false; + + auto *N1C = dyn_cast(Node->getOperand(1)); + if (!N1C) + return false; + + SDValue N0 = Node->getOperand(0); + if (!N0.hasOneUse()) + return false; + + auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, + MVT VT) { + return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); + }; + + SDLoc DL(Node); + MVT VT = Node->getSimpleValueType(0); + const unsigned RightShAmt = N1C->getZExtValue(); + + // Transform (sra (shl X, C1) C2) -> + // (TH.EXT X, msb, lsb) + if (N0.getOpcode() == ISD::SHL) { + auto *N01C = dyn_cast(N0->getOperand(1)); + if (!N01C) + return false; + + const unsigned LeftShAmt = N01C->getZExtValue(); + const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; + const unsigned Msb = MsbPlusOne - 1; + const unsigned Lsb = RightShAmt - LeftShAmt; + + SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); + ReplaceNode(Node, TH_EXT); + return true; + } + + // Transform (sra (sext_inreg X, _), C) -> + // (TH.EXT X, msb, lsb) + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { + unsigned ExtSize = + cast(N0.getOperand(1))->getVT().getSizeInBits(); + + // ExtSize of 32 should use sraiw via tablegen pattern. + if (ExtSize == 32) + return false; + + const unsigned Msb = ExtSize - 1; + const unsigned Lsb = RightShAmt; + + SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); + ReplaceNode(Node, TH_EXT); + return true; + } + + return false; +} + void RISCVDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we have already selected. if (Node->isMachineOpcode()) { @@ -846,6 +908,9 @@ return; } case ISD::SRA: { + if (trySignedBitfieldExtract(Node)) + return; + // Optimize (sra (sext_inreg X, i16), C) -> // (srai (slli X, (XLen-16), (XLen-16) + C) // And (sra (sext_inreg X, i8), C) -> @@ -923,6 +988,17 @@ // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask // with c3 leading zeros. if (!LeftShift && isMask_64(C1)) { + // Use a th.extu instruction + if (Subtarget->hasVendorXTHeadBb()) { + unsigned Msb = llvm::bit_width(C1) + C2 - 1; + unsigned Lsb = C2; + SDNode *TH_EXTU = CurDAG->getMachineNode( + RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); + ReplaceNode(Node, TH_EXTU); + return; + } + unsigned Leading = XLen - llvm::bit_width(C1); if (C2 < Leading) { // If the number of leading zeros is C2+32 this can be SRLIW. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -314,10 +314,6 @@ def : Pat<(rotl GPR:$rs1, GPR:$rs2), (OR (SLL GPR:$rs1, GPR:$rs2), (SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>; -//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>; -//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>; -def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>; -def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>; def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>; def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>; def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>; diff --git a/llvm/test/CodeGen/RISCV/bitextract-mac.ll b/llvm/test/CodeGen/RISCV/bitextract-mac.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bitextract-mac.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+zbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBB +; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEADBB +; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEADMAC +; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEAD +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+zbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64ZBB +; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEADMAC +; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB +; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEAD + +define i32 @f(i32 %A, i32 %B, i32 %C) { +; RV32I-LABEL: f: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: mul a0, a1, a0 +; RV32I-NEXT: slli a1, a0, 26 +; RV32I-NEXT: srli a1, a1, 28 +; RV32I-NEXT: slli a0, a0, 20 +; RV32I-NEXT: srli a0, a0, 25 +; RV32I-NEXT: mul a0, a1, a0 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: f: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: mul a0, a1, a0 +; RV32ZBB-NEXT: slli a1, a0, 26 +; RV32ZBB-NEXT: srli a1, a1, 28 +; RV32ZBB-NEXT: slli a0, a0, 20 +; RV32ZBB-NEXT: srli a0, a0, 25 +; RV32ZBB-NEXT: mul a0, a1, a0 +; RV32ZBB-NEXT: add a0, a0, a2 +; RV32ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: f: +; RV32XTHEADBB: # %bb.0: # %entry +; RV32XTHEADBB-NEXT: mul a0, a1, a0 +; RV32XTHEADBB-NEXT: th.extu a1, a0, 5, 2 +; RV32XTHEADBB-NEXT: th.extu a0, a0, 11, 5 +; RV32XTHEADBB-NEXT: mul a0, a1, a0 +; RV32XTHEADBB-NEXT: add a0, a0, a2 +; RV32XTHEADBB-NEXT: ret +; +; RV32XTHEADMAC-LABEL: f: +; RV32XTHEADMAC: # %bb.0: # %entry +; RV32XTHEADMAC-NEXT: mul a1, a1, a0 +; RV32XTHEADMAC-NEXT: slli a0, a1, 26 +; RV32XTHEADMAC-NEXT: srli a0, a0, 28 +; RV32XTHEADMAC-NEXT: slli a1, a1, 20 +; RV32XTHEADMAC-NEXT: srli a1, a1, 25 +; RV32XTHEADMAC-NEXT: th.mulah a0, a2, a1 +; RV32XTHEADMAC-NEXT: ret +; +; RV32XTHEAD-LABEL: f: +; RV32XTHEAD: # %bb.0: # %entry +; RV32XTHEAD-NEXT: mul a1, a1, a0 +; RV32XTHEAD-NEXT: th.extu a0, a1, 5, 2 +; RV32XTHEAD-NEXT: th.extu a1, a1, 11, 5 +; RV32XTHEAD-NEXT: th.mulah a0, a2, a1 +; RV32XTHEAD-NEXT: ret +; +; RV64I-LABEL: f: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: mulw a0, a1, a0 +; RV64I-NEXT: slli a1, a0, 58 +; RV64I-NEXT: srli a1, a1, 60 +; RV64I-NEXT: slli a0, a0, 52 +; RV64I-NEXT: srli a0, a0, 57 +; RV64I-NEXT: mulw a0, a1, a0 +; RV64I-NEXT: addw a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: f: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: mulw a0, a1, a0 +; RV64ZBB-NEXT: slli a1, a0, 58 +; RV64ZBB-NEXT: srli a1, a1, 60 +; RV64ZBB-NEXT: slli a0, a0, 52 +; RV64ZBB-NEXT: srli a0, a0, 57 +; RV64ZBB-NEXT: mulw a0, a1, a0 +; RV64ZBB-NEXT: addw a0, a0, a2 +; RV64ZBB-NEXT: ret +; +; RV64XTHEADMAC-LABEL: f: +; RV64XTHEADMAC: # %bb.0: # %entry +; RV64XTHEADMAC-NEXT: mulw a1, a1, a0 +; RV64XTHEADMAC-NEXT: slli a0, a1, 58 +; RV64XTHEADMAC-NEXT: srli a0, a0, 60 +; RV64XTHEADMAC-NEXT: slli a1, a1, 52 +; RV64XTHEADMAC-NEXT: srli a1, a1, 57 +; RV64XTHEADMAC-NEXT: th.mulah a0, a2, a1 +; RV64XTHEADMAC-NEXT: ret +; +; RV64XTHEADBB-LABEL: f: +; RV64XTHEADBB: # %bb.0: # %entry +; RV64XTHEADBB-NEXT: mul a0, a1, a0 +; RV64XTHEADBB-NEXT: th.extu a1, a0, 5, 2 +; RV64XTHEADBB-NEXT: th.extu a0, a0, 11, 5 +; RV64XTHEADBB-NEXT: mulw a0, a1, a0 +; RV64XTHEADBB-NEXT: addw a0, a0, a2 +; RV64XTHEADBB-NEXT: ret +; +; RV64XTHEAD-LABEL: f: +; RV64XTHEAD: # %bb.0: # %entry +; RV64XTHEAD-NEXT: mul a1, a1, a0 +; RV64XTHEAD-NEXT: th.extu a0, a1, 5, 2 +; RV64XTHEAD-NEXT: th.extu a1, a1, 11, 5 +; RV64XTHEAD-NEXT: th.mulah a0, a2, a1 +; RV64XTHEAD-NEXT: ret +entry: + %mul = mul nsw i32 %B, %A + %0 = lshr i32 %mul, 2 + %and = and i32 %0, 15 + %1 = lshr i32 %mul, 5 + %and2 = and i32 %1, 127 + %mul3 = mul nuw nsw i32 %and, %and2 + %add = add i32 %mul3, %C + ret i32 %add +} diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -1140,14 +1140,16 @@ ; ; RV32XTHEADBB-LABEL: test_ctlz_i16: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: slli a0, a0, 16 +; RV32XTHEADBB-NEXT: srli a0, a0, 16 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 ; RV32XTHEADBB-NEXT: addi a0, a0, -16 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctlz_i16: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: slli a0, a0, 48 +; RV64XTHEADBB-NEXT: srli a0, a0, 48 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0 ; RV64XTHEADBB-NEXT: addi a0, a0, -48 ; RV64XTHEADBB-NEXT: ret @@ -1797,14 +1799,16 @@ ; ; RV32XTHEADBB-LABEL: test_ctlz_i16_zero_undef: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: slli a0, a0, 16 +; RV32XTHEADBB-NEXT: srli a0, a0, 16 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 ; RV32XTHEADBB-NEXT: addi a0, a0, -16 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctlz_i16_zero_undef: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: slli a0, a0, 48 +; RV64XTHEADBB-NEXT: srli a0, a0, 48 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0 ; RV64XTHEADBB-NEXT: addi a0, a0, -48 ; RV64XTHEADBB-NEXT: ret @@ -2966,7 +2970,8 @@ ; ; RV32XTHEADBB-LABEL: test_parity_i16: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: slli a0, a0, 16 +; RV32XTHEADBB-NEXT: srli a0, a0, 16 ; RV32XTHEADBB-NEXT: srli a1, a0, 8 ; RV32XTHEADBB-NEXT: xor a0, a0, a1 ; RV32XTHEADBB-NEXT: srli a1, a0, 4 @@ -2980,7 +2985,8 @@ ; ; RV64XTHEADBB-LABEL: test_parity_i16: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: slli a0, a0, 48 +; RV64XTHEADBB-NEXT: srli a0, a0, 48 ; RV64XTHEADBB-NEXT: srli a1, a0, 8 ; RV64XTHEADBB-NEXT: xor a0, a0, a1 ; RV64XTHEADBB-NEXT: srli a1, a0, 4 @@ -3058,7 +3064,8 @@ ; ; RV64XTHEADBB-LABEL: test_parity_i32: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a1, a0, 31, 0 +; RV64XTHEADBB-NEXT: slli a1, a0, 32 +; RV64XTHEADBB-NEXT: srli a1, a1, 32 ; RV64XTHEADBB-NEXT: srliw a0, a0, 16 ; RV64XTHEADBB-NEXT: xor a0, a1, a0 ; RV64XTHEADBB-NEXT: srli a1, a0, 8 diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -1623,8 +1623,7 @@ ; ; RV32XTHEADBB-LABEL: rotl_64_mask_shared: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: slli a5, a4, 26 -; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5 ; RV32XTHEADBB-NEXT: mv a7, a0 ; RV32XTHEADBB-NEXT: bnez a5, .LBB17_2 ; RV32XTHEADBB-NEXT: # %bb.1: @@ -2098,8 +2097,7 @@ ; ; RV32XTHEADBB-LABEL: rotl_64_mask_multiple: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: slli a5, a4, 26 -; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5 ; RV32XTHEADBB-NEXT: mv a6, a1 ; RV32XTHEADBB-NEXT: bnez a5, .LBB21_2 ; RV32XTHEADBB-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -365,7 +365,8 @@ ; ; RV32XTHEADBB-LABEL: zexth_i32: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: slli a0, a0, 16 +; RV32XTHEADBB-NEXT: srli a0, a0, 16 ; RV32XTHEADBB-NEXT: ret %and = and i32 %a, 65535 ret i32 %and @@ -381,7 +382,8 @@ ; ; RV32XTHEADBB-LABEL: zexth_i64: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: slli a0, a0, 16 +; RV32XTHEADBB-NEXT: srli a0, a0, 16 ; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: ret %and = and i64 %a, 65535 diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -652,7 +652,8 @@ ; ; RV64XTHEADBB-LABEL: zexth_i32: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: slli a0, a0, 48 +; RV64XTHEADBB-NEXT: srli a0, a0, 48 ; RV64XTHEADBB-NEXT: ret %and = and i32 %a, 65535 ret i32 %and @@ -667,7 +668,8 @@ ; ; RV64XTHEADBB-LABEL: zexth_i64: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: slli a0, a0, 48 +; RV64XTHEADBB-NEXT: srli a0, a0, 48 ; RV64XTHEADBB-NEXT: ret %and = and i64 %a, 65535 ret i64 %and