Index: lib/Transforms/Scalar/MergeICmps.cpp =================================================================== --- lib/Transforms/Scalar/MergeICmps.cpp +++ lib/Transforms/Scalar/MergeICmps.cpp @@ -24,6 +24,8 @@ #include "llvm/ADT/APSInt.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -41,7 +43,7 @@ #define DEBUG_TYPE "mergeicmps" -#define MERGEICMPS_DOT_ON +// #define MERGEICMPS_DOT_ON // A BCE atom. struct BCEAtom { @@ -605,22 +607,33 @@ bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; const auto &TLI = getAnalysis().getTLI(); - auto PA = runImpl(F, &TLI); + const auto &TTI = getAnalysis().getTTI(F); + auto PA = runImpl(F, &TLI, &TTI); return !PA.areAllPreserved(); } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } - PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI); + PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI); }; PreservedAnalyses MergeICmps::runImpl(Function &F, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI) { DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); + // We only try merging comparisons if the target wants to expand memcmp later. + // The rationale is to avoid turning small chains into memcmp calls. + unsigned MaxLoadSize; + if(!TTI->enableMemCmpExpansion(MaxLoadSize)) { + return PreservedAnalyses::all(); + } + bool MadeChange = false; for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { @@ -640,6 +653,7 @@ INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) Index: test/Transforms/MergeICmps/pair-int32-int32.ll =================================================================== --- test/Transforms/MergeICmps/pair-int32-int32.ll +++ test/Transforms/MergeICmps/pair-int32-int32.ll @@ -1,4 +1,5 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s --check-prefix=X86 +; RUN: opt -mergeicmps -S -o - %s | FileCheck %s --check-prefix=NOEXPANSION %"struct.std::pair" = type { i32, i32 } @@ -26,20 +27,21 @@ ret i1 %4 ; CHECK-LABEL: @opeq1( ; The entry block with zero-offset GEPs is kept, loads are removed. -; CHECK: entry -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load +; X86: entry +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 +; X86: @memcmp({{.*}}8) +; X86: icmp eq {{.*}} 0 ; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br +; X86: br label %opeq1.exit +; X86-NOT: br ; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %entry ] -; CHECK-NEXT: ret +; X86: phi i1 [ %{{[^,]*}}, %entry ] +; X86-NEXT: ret +; NOEXPANSION-NOT: @memcmp({{.*}}8) } ; Same as above, but the two blocks are in inverse order. @@ -68,19 +70,20 @@ ; CHECK-LABEL: @opeq1_inverse( ; The second block with zero-offset GEPs is kept, loads are removed. ; CHECK: land.rhs.i -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 +; X86: @memcmp({{.*}}8) +; X86: icmp eq {{.*}} 0 ; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br +; X86: br label %opeq1.exit +; X86-NOT: br ; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %land.rhs.i ] -; CHECK-NEXT: ret +; X86: phi i1 [ %{{[^,]*}}, %land.rhs.i ] +; X86-NEXT: ret +; NOEXPANSION-NOT: @memcmp({{.*}}8) } Index: test/Transforms/MergeICmps/tuple-four-int8.ll =================================================================== --- test/Transforms/MergeICmps/tuple-four-int8.ll +++ test/Transforms/MergeICmps/tuple-four-int8.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s ; This is a more involved test: clang generates this weird pattern for ; tuple. Right now we skip the entry block Index: test/Transforms/MergeICmps/volatile.ll =================================================================== --- test/Transforms/MergeICmps/volatile.ll +++ test/Transforms/MergeICmps/volatile.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s %"struct.std::pair" = type { i32, i32 }