diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3774,12 +3774,19 @@ // Don't tail-fold for tight loops where we would be better off interleaving // with an unpredicated loop. unsigned NumInsns = 0; + unsigned NumComparisons = 0; for (BasicBlock *BB : TFI->LVL->getLoop()->blocks()) { NumInsns += BB->sizeWithoutDebug(); + NumComparisons += count_if( + *BB, [](Instruction &I) { return isa(&I); }); } // We expect 4 of these to be a IV PHI, IV add, IV compare and branch. - return NumInsns >= SVETailFoldInsnThreshold; + // If there is more than one comparison in the loop, increase the required + // number of instructions for predicated tail folding. This is because the + // throughput of comparison and `whileXX` instructions is only one, and + // insufficient computation between comparisons can slow down the code. + return NumInsns >= SVETailFoldInsnThreshold * NumComparisons; } InstructionCost