Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -822,8 +822,8 @@ /// also combined within this function. Currently, the minimum size check is /// performed in findJumpTable() in SelectionDAGBuiler and /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl. - bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, - uint64_t Range) const { + virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, + uint64_t Range) const { const bool OptForSize = SI->getParent()->getParent()->optForSize(); const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); const unsigned MaxJumpTableSize = @@ -1274,7 +1274,7 @@ } /// Return lower limit for number of blocks in a jump table. - unsigned getMinimumJumpTableEntries() const; + virtual unsigned getMinimumJumpTableEntries() const; /// Return lower limit of the density in a jump table. unsigned getMinimumJumpTableDensity(bool OptForSize) const; Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -560,6 +560,18 @@ bool useSoftFloat() const override; + // A jump table is emitted as lwax -> mtctr -> bctr. All three of those + // are high latency instructions, so we don't want to emit jump tables for + // switches with fewer than 7 cases as the jump table overhead will outweigh + // the benefit. + unsigned getMinimumJumpTableEntries() const override { + return 7; + } + bool mightUseCTR(const BasicBlock *BB, + const SwitchInst *SI = nullptr) const; + bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, + uint64_t Range) const override; + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -113,6 +113,14 @@ STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); +STATISTIC(NumCallsToIsSuitableForJT, + "Number of times a switch is checked for JT eligibility"); +STATISTIC(NumSkippedAlreadyExaminedSwitches, + "Number of switches previously proven not eligible for JT"); +STATISTIC(NumSkippedAlreadyVisitedCases, + "Number of cases not examined for CTR use due to previous visits"); +STATISTIC(NumSwitchesNotSuitableForJT, + "Number of switch instructions not suitable for jump tables"); static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); @@ -1127,6 +1135,85 @@ return Subtarget.useSoftFloat(); } +/// Does the basic block use the count register (CTR)? This will happen if the +/// block has a CTR instrinsic, an indirect call or a switch instruction. +/// The \p SI parameter is used to avoid reporting that a block that contains +/// the SwitchInst we're inspecting uses the CTR. +bool PPCTargetLowering::mightUseCTR(const BasicBlock *BB, + const SwitchInst *SI) const { + for (auto &J : *BB) { + if (const CallInst *CI = dyn_cast(&J)) { + if (Function *F = CI->getCalledFunction()) { + if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::ppc_is_decremented_ctr_nonzero: + case Intrinsic::ppc_mtctr: + return true; + } + } + } else // Indirect calls are uses of the CTR. + return true; + } else if (const SwitchInst *CI = dyn_cast(&J)) { + // Ignore the switch instruction we're actually examining. + if (SI == CI) + continue; + // Does the block have a switch instruction large enough to actually + // be lowered to a jump table? + if ((CI->getNumCases() >= getMinimumJumpTableEntries())) + return true; + } + } + return false; +} + +// This function is called a huge number of times so we try to limit the number +// of traversals of all basic blocks. Just as an example, a bootstrap build +// calls this function 11,822,805 times on 7554 unique SwitchInst instances. We +// conservatively re-check any SwitchInst's that were previously suitable for a +// jump table to ensure that some transformation hasn't made it unsuitable. +bool PPCTargetLowering::isSuitableForJumpTable(const SwitchInst *SI, + uint64_t NumCases, + uint64_t Range) const { + static std::set KnownNoJTSwitches; + std::set VisitedBlocks; + NumCallsToIsSuitableForJT++; + if (KnownNoJTSwitches.count(SI)) { + NumSkippedAlreadyExaminedSwitches++; + return false; + } + if (mightUseCTR(SI->getParent(), SI)) { + KnownNoJTSwitches.insert(SI); + DEBUG(dbgs() << "In function: " << SI->getParent()->getParent()->getName()); + DEBUG(dbgs() << "\nThe switch instruction in following block isn't " + "suitable for a Jump Table:\n"); + DEBUG(SI->getParent()->dump()); + NumSwitchesNotSuitableForJT++; + return false; + } + for (auto It : SI->cases()) { + // Any number of cases can go to the same block. No need to traverse + // a basic block multiple times if we already know it doesn't use the CTR. + if (!VisitedBlocks.insert(It.getCaseSuccessor()).second) { + NumSkippedAlreadyVisitedCases++; + continue; + } + if (mightUseCTR(It.getCaseSuccessor())) { + NumSwitchesNotSuitableForJT++; + KnownNoJTSwitches.insert(SI); + DEBUG(dbgs() << "In function: " << + SI->getParent()->getParent()->getName()); + DEBUG(dbgs() << "\nSwitch instruction:\n"); + DEBUG(SI->dump()); + DEBUG(dbgs() << "Not suitable for a jump table due to possible CTR " + "use in successor:\n"); + DEBUG(It.getCaseSuccessor()->dump()); + return false; + } + } + return TargetLoweringBase::isSuitableForJumpTable(SI, NumCases, Range); +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; Index: test/CodeGen/PowerPC/NoJTsDueToCTRUse.ll =================================================================== --- test/CodeGen/PowerPC/NoJTsDueToCTRUse.ll +++ test/CodeGen/PowerPC/NoJTsDueToCTRUse.ll @@ -0,0 +1,396 @@ +; RUN: llc -verify-machineinstrs -debug-only=ppc-lowering < %s > /dev/null 2>%t && FileCheck <%t %s +; REQUIRES: asserts +define signext i32 @case1(i32 signext %a, i32 signext %b, i32 signext %c) { +entry: +; CHECK: In function: case1 +; CHECK: The switch instruction in following block isn't suitable for a Jump Table: +; CHECK: entry: +; CHECK: call void @llvm.ppc.mtctr.i64(i64 2045) + call void @llvm.ppc.mtctr.i64(i64 2045) + switch i32 %b, label %sw.epilog [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb2 + i32 4, label %sw.bb4 + i32 5, label %sw.bb7 + i32 6, label %sw.bb9 + i32 21, label %sw.bb11 + i32 22, label %sw.bb14 + i32 23, label %sw.bb17 + i32 24, label %sw.bb20 + ] + +sw.bb: ; preds = %entry + br label %sw.epilog + +sw.bb1: ; preds = %entry + br label %sw.epilog + +sw.bb2: ; preds = %entry + br label %sw.epilog + +sw.bb4: ; preds = %entry + %add6 = add nsw i32 %a, 15 + br label %sw.epilog + +sw.bb7: ; preds = %entry + %0 = mul i32 %a, -89 + %sub8 = add i32 %0, 5 + br label %sw.epilog + +sw.bb9: ; preds = %entry + %div = sdiv i32 89, %a + %sub10 = sub nsw i32 6, %div + br label %sw.epilog + +sw.bb11: ; preds = %entry + %div12 = sdiv i32 89, %a + %add13 = add nsw i32 %div12, 21 + br label %sw.epilog + +sw.bb14: ; preds = %entry + %mul15 = mul nsw i32 %a, 89 + %add16 = add nsw i32 %mul15, 22 + br label %sw.epilog + +sw.bb17: ; preds = %entry + %sub18 = sub i32 89, %a + %add19 = add nsw i32 %sub18, 23 + br label %sw.epilog + +sw.bb20: ; preds = %entry + %1 = mul i32 %a, -11 + %add23 = add i32 %1, 113 + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb20, %sw.bb17, %sw.bb14, %sw.bb11, %sw.bb9, %sw.bb7, %sw.bb4, %sw.bb2, %sw.bb1, %sw.bb + %b.addr.0 = phi i32 [ %b, %entry ], [ %add23, %sw.bb20 ], [ %add19, %sw.bb17 ], [ %add16, %sw.bb14 ], [ %add13, %sw.bb11 ], [ %sub10, %sw.bb9 ], [ %sub8, %sw.bb7 ], [ %add6, %sw.bb4 ], [ 703, %sw.bb2 ], [ -85, %sw.bb1 ], [ 8, %sw.bb ] + %add24 = add nsw i32 %b.addr.0, %a + ret i32 %add24 +} + +declare void @llvm.ppc.mtctr.i64(i64) + +define fastcc void @case2() { +; CHECK: In function: case2 +; CHECK: Switch instruction: +; CHECK: switch i32 %0, label %sw.default [ +; CHECK: Not suitable for a jump table due to possible CTR use in successor: +; CHEC: sw.bb20: +entry: + switch i16 undef, label %sw.default [ + i16 54, label %sw.bb + i16 55, label %sw.bb3 + i16 57, label %sw.bb14 + i16 68, label %sw.bb20 + i16 67, label %sw.bb27 + i16 58, label %sw.bb36 + i16 134, label %sw.bb45 + i16 61, label %sw.bb54 + i16 63, label %sw.bb82 + i16 62, label %while.body.i486 + i16 69, label %while.body.i499 + i16 59, label %while.body.i512 + i16 66, label %sw.bb102 + ] + +sw.bb: ; preds = %entry + unreachable + +sw.bb3: ; preds = %entry + unreachable + +sw.bb14: ; preds = %entry + unreachable + +sw.bb20: ; preds = %entry + br label %land.rhs.i258 + +land.rhs.i258: ; preds = %while.body.i262, %sw.bb20 + %cur.addr.019.i255.idx = phi i64 [ 1, %sw.bb20 ], [ %cur.addr.019.i255.add, %while.body.i262 ] + br label %while.body.i262 + +while.body.i262: ; preds = %land.rhs.i258 + %cur.addr.019.i255.add = add nuw nsw i64 %cur.addr.019.i255.idx, 1 + %cmp2.i261 = icmp ult i64 %cur.addr.019.i255.idx, 2045 + br i1 %cmp2.i261, label %land.rhs.i258, label %safe_concat.exit264 + +safe_concat.exit264: ; preds = %while.body.i262 + unreachable + +sw.bb27: ; preds = %entry + unreachable + +sw.bb36: ; preds = %entry + unreachable + +sw.bb45: ; preds = %entry + unreachable + +sw.bb54: ; preds = %entry + unreachable + +sw.bb82: ; preds = %entry + unreachable + +while.body.i486: ; preds = %entry + unreachable + +while.body.i499: ; preds = %entry + unreachable + +while.body.i512: ; preds = %entry + ret void + +sw.bb102: ; preds = %entry + unreachable + +sw.default: ; preds = %entry + unreachable +} + +define signext i32 @case3(i32 signext %a, i32 (i32)* nocapture %Fptr) { +; CHECK: In function: case3 +; CHECK: The switch instruction in following block isn't suitable for a Jump Table: +; CHECK: entry: +; CHECK: %call = tail call signext i32 %Fptr(i32 signext %a) +entry: + %call = tail call signext i32 %Fptr(i32 signext %a) #1 + switch i32 %call, label %sw.epilog [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb2 + i32 4, label %sw.bb4 + i32 5, label %sw.bb7 + i32 6, label %sw.bb9 + i32 21, label %sw.bb11 + i32 22, label %sw.bb14 + i32 23, label %sw.bb17 + i32 24, label %sw.bb20 + ] + +sw.bb: ; preds = %entry + br label %sw.epilog + +sw.bb1: ; preds = %entry + br label %sw.epilog + +sw.bb2: ; preds = %entry + br label %sw.epilog + +sw.bb4: ; preds = %entry + %add6 = add nsw i32 %a, 15 + br label %sw.epilog + +sw.bb7: ; preds = %entry + %0 = mul i32 %a, -89 + %sub8 = add i32 %0, 5 + br label %sw.epilog + +sw.bb9: ; preds = %entry + %div = sdiv i32 89, %a + %sub10 = sub nsw i32 6, %div + br label %sw.epilog + +sw.bb11: ; preds = %entry + %div12 = sdiv i32 89, %a + %add13 = add nsw i32 %div12, 21 + br label %sw.epilog + +sw.bb14: ; preds = %entry + %mul15 = mul nsw i32 %a, 89 + %add16 = add nsw i32 %mul15, 22 + br label %sw.epilog + +sw.bb17: ; preds = %entry + %sub18 = sub i32 89, %a + %add19 = add nsw i32 %sub18, 23 + br label %sw.epilog + +sw.bb20: ; preds = %entry + %1 = mul i32 %a, -11 + %add23 = add i32 %1, 113 + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb20, %sw.bb17, %sw.bb14, %sw.bb11, %sw.bb9, %sw.bb7, %sw.bb4, %sw.bb2, %sw.bb1, %sw.bb + %b.0 = phi i32 [ %call, %entry ], [ %add23, %sw.bb20 ], [ %add19, %sw.bb17 ], [ %add16, %sw.bb14 ], [ %add13, %sw.bb11 ], [ %sub10, %sw.bb9 ], [ %sub8, %sw.bb7 ], [ %add6, %sw.bb4 ], [ 703, %sw.bb2 ], [ -85, %sw.bb1 ], [ 8, %sw.bb ] + %add24 = add nsw i32 %b.0, %a + ret i32 %add24 +} + +define void @case4() { +; CHECK: In function: case4 +; CHECK: Switch instruction: +; CHECK: switch i32 undef, label %sw.default660 [ +; CHECK: Not suitable for a jump table due to possible CTR use in successor: +; CHECK: sw.bb466: +; CHECK: tail call void undef(i8* undef) +entry: + switch i32 undef, label %sw.default660 [ + i32 0, label %sw.bb + i32 20, label %sw.bb458 + i32 19, label %sw.bb444 + i32 15, label %sw.bb361 + i32 29, label %sw.bb44 + i32 13, label %sw.bb53 + i32 17, label %sw.bb377 + i32 16, label %sw.bb372 + i32 25, label %sw.bb535 + i32 24, label %sw.bb484 + i32 36, label %sw.bb366 + i32 14, label %sw.bb315 + i32 12, label %sw.bb310 + i32 30, label %sw.bb466 + ] + +sw.bb: ; preds = %entry + unreachable + +sw.bb44: ; preds = %entry + unreachable + +sw.bb53: ; preds = %entry + unreachable + +sw.bb310: ; preds = %entry + unreachable + +sw.bb315: ; preds = %entry + unreachable + +sw.bb361: ; preds = %entry + unreachable + +sw.bb366: ; preds = %entry + unreachable + +sw.bb372: ; preds = %entry + unreachable + +sw.bb377: ; preds = %entry + unreachable + +sw.bb444: ; preds = %entry + unreachable + +sw.bb458: ; preds = %entry + unreachable + +sw.bb466: ; preds = %entry + tail call void undef(i8* undef) #1 + unreachable + +sw.bb484: ; preds = %entry + unreachable + +sw.bb535: ; preds = %entry + unreachable + +sw.default660: ; preds = %entry + unreachable +} + +define void @case5(i1 %in) { +; CHECK: In function: case5 +; CHECK: Switch instruction: +; CHECK: switch i32 %0, label %sw.default [ +; CHECK: Not suitable for a jump table due to possible CTR use in successor: +; CHECK: sw.bb189: ; preds = %if.end, %if.end, %if.end, %if.end +; CHECK: switch i32 %1, label %sw.default.i [ +entry: + br i1 %in, label %cleanup, label %if.end + +if.end: ; preds = %entry + switch i8 undef, label %sw.default [ + i8 0, label %undef_sstr + i8 1, label %sw.bb31 + i8 2, label %sw.bb80 + i8 3, label %sw.bb124 + i8 4, label %sw.bb171 + i8 14, label %sw.bb171 + i8 5, label %sw.bb177 + i8 6, label %sw.bb183 + i8 10, label %sw.bb189 + i8 11, label %sw.bb189 + i8 12, label %sw.bb189 + i8 15, label %sw.bb189 + i8 13, label %sw.bb201 + ] + +undef_sstr: ; preds = %sw.bb31, %if.end + unreachable + +sw.bb31: ; preds = %if.end + br label %undef_sstr + +sw.bb80: ; preds = %if.end + unreachable + +sw.bb124: ; preds = %if.end + unreachable + +sw.bb171: ; preds = %if.end, %if.end + unreachable + +sw.bb177: ; preds = %if.end + unreachable + +sw.bb183: ; preds = %if.end + unreachable + +sw.bb189: ; preds = %if.end, %if.end, %if.end, %if.end + switch i8 undef, label %sw.default.i [ + i8 0, label %sw.bb.i + i8 1, label %sw.bb.i + i8 2, label %sw.bb.i + i8 3, label %sw.bb.i + i8 4, label %sw.bb.i + i8 5, label %sw.bb.i + i8 6, label %sw.bb.i + i8 7, label %sw.bb.i + i8 8, label %sw.bb.i + i8 9, label %sw.bb11.i + i8 10, label %Perl_sv_reftype.exit + i8 11, label %sw.bb28.i + i8 12, label %sw.bb29.i + i8 13, label %sw.bb30.i + i8 14, label %sw.bb31.i + i8 15, label %sw.bb32.i + ] + +sw.bb.i: ; preds = %sw.bb189, %sw.bb189, %sw.bb189, %sw.bb189, %sw.bb189, %sw.bb189, %sw.bb189, %sw.bb189, %sw.bb189 + unreachable + +sw.bb11.i: ; preds = %sw.bb189 + unreachable + +sw.bb28.i: ; preds = %sw.bb189 + unreachable + +sw.bb29.i: ; preds = %sw.bb189 + unreachable + +sw.bb30.i: ; preds = %sw.bb189 + unreachable + +sw.bb31.i: ; preds = %sw.bb189 + unreachable + +sw.bb32.i: ; preds = %sw.bb189 + unreachable + +sw.default.i: ; preds = %sw.bb189 + unreachable + +Perl_sv_reftype.exit: ; preds = %sw.bb189 + unreachable + +sw.bb201: ; preds = %if.end + unreachable + +sw.default: ; preds = %if.end + unreachable + +cleanup: ; preds = %entry + ret void +}