Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -4720,6 +4720,12 @@ /// \returns The expansion result or SDValue() if it fails. SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const; + /// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal. + /// \param N Node to expand + /// \returns Reference to table generated in Constant Pool. + SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, SDLoc dl, EVT VT, + SDValue Op, unsigned NumBitsPerElt) const; + /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7788,11 +7788,55 @@ return DAG.getNode(ISD::CTPOP, dl, VT, Op); } +SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG, + SDLoc dl, EVT VT, SDValue Op, + unsigned NumBitsPerElt) const { + APInt DeBruijn(32, 0x077CB531U); + APInt BitWidth(32, NumBitsPerElt); + APInt ShiftAmt(32, (BitWidth - BitWidth.logBase2()).getZExtValue()); + SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Op); + SDValue Lookup = DAG.getNode( + ISD::SRL, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Neg), + DAG.getConstant(DeBruijn.getZExtValue(), dl, VT)), + DAG.getConstant(ShiftAmt.getZExtValue(), dl, VT)); + + // Create a table in constant data pool + std::vector Elts; + uint8_t RshrArr[32]; + + for (unsigned int i = 0; i < NumBitsPerElt; i++) { + APInt Lshr = DeBruijn.rotl(i); + unsigned int Rshr = Lshr.getZExtValue() >> ShiftAmt.getZExtValue(); + RshrArr[Rshr] = i; + } + + for (unsigned int i = 0; i < NumBitsPerElt; i++) { + SDValue Index = DAG.getConstant(RshrArr[i], dl, VT); + ConstantSDNode *IndexNode = cast(Index); + ConstantInt *CI = + const_cast(IndexNode->getConstantIntValue()); + Elts.push_back(CI); + } + const DataLayout &TD = DAG.getDataLayout(); + + // Create a ConstantArray of size NumBitsPerElt + auto *CA = ConstantArray::get( + ArrayType::get(Elts[0]->getType(), NumBitsPerElt), Elts); + SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD), + TD.getPrefTypeAlign(Elts[0]->getType())); + Align Alignment = cast(CPIdx)->getAlign(); + return DAG.getLoad( + VT, dl, DAG.getEntryNode(), DAG.getMemBasePlusOffset(CPIdx, Lookup, dl), + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); +} + SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Op = Node->getOperand(0); unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If the non-ZERO_UNDEF version is supported we can use that instead. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF && @@ -7821,6 +7865,11 @@ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return SDValue(); + // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal. + if (NumBitsPerElt == 32 && !VT.isVector() && + TLI.isOperationExpand(ISD::CTPOP, VT) && !isOperationLegal(ISD::CTLZ, VT)) + return CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt); + // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } Index: llvm/test/CodeGen/SPARC/cttz.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SPARC/cttz.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=sparc -mcpu=v9 | FileCheck %s + +@f.table = internal unnamed_addr constant [32 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 1 + +define i32 @f(i32 %x) { +; CHECK-LABEL: f: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: ! %entry +; CHECK-NEXT: mov %g0, %o1 +; CHECK-NEXT: sub %o1, %o0, %o1 +; CHECK-NEXT: and %o0, %o1, %o1 +; CHECK-NEXT: sethi 122669, %o2 +; CHECK-NEXT: or %o2, 305, %o2 +; CHECK-NEXT: smul %o1, %o2, %o1 +; CHECK-NEXT: srl %o1, 27, %o1 +; CHECK-NEXT: sethi %hi(.LCPI0_0), %o2 +; CHECK-NEXT: add %o2, %lo(.LCPI0_0), %o2 +; CHECK-NEXT: ld [%o2+%o1], %o1 +; CHECK-NEXT: cmp %o0, 0 +; CHECK-NEXT: move %icc, 0, %o1 +; CHECK-NEXT: retl +; CHECK-NEXT: mov %o1, %o0 +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %2 = select i1 %1, i32 0, i32 %0 + %3 = trunc i32 %2 to i8 + %conv = zext i8 %3 to i32 + ret i32 %conv +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i32 @llvm.cttz.i32(i32, i1 immarg) #0 + +attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }