Index: lib/Transforms/Scalar/MergeICmps.cpp =================================================================== --- lib/Transforms/Scalar/MergeICmps.cpp +++ lib/Transforms/Scalar/MergeICmps.cpp @@ -28,6 +28,8 @@ #include #include "llvm/ADT/APSInt.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -41,8 +43,6 @@ #define DEBUG_TYPE "mergeicmps" -#define MERGEICMPS_DOT_ON - // A BCE atom. struct BCEAtom { BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {} @@ -589,22 +589,31 @@ bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; const auto &TLI = getAnalysis().getTLI(); - auto PA = runImpl(F, &TLI); + const auto &TTI = getAnalysis().getTTI(F); + auto PA = runImpl(F, &TLI, &TTI); return !PA.areAllPreserved(); } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } - PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI); + PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI); }; PreservedAnalyses MergeICmps::runImpl(Function &F, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI) { DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); + // We only try merging comparisons if the target wants to expand memcmp later. + // The rationale is to avoid turning small chains into memcmp calls. + unsigned MaxLoadSize; + if(!TTI->enableMemCmpExpansion(MaxLoadSize)) return PreservedAnalyses::all(); + bool MadeChange = false; for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { @@ -623,6 +632,7 @@ INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) Index: test/Transforms/MergeICmps/X86/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/MergeICmps/X86/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'X86' in config.root.targets: + config.unsupported = True + Index: test/Transforms/MergeICmps/X86/pair-int32-int32.ll =================================================================== --- test/Transforms/MergeICmps/X86/pair-int32-int32.ll +++ test/Transforms/MergeICmps/X86/pair-int32-int32.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s --check-prefix=X86 %"struct.std::pair" = type { i32, i32 } @@ -26,20 +26,20 @@ ret i1 %4 ; CHECK-LABEL: @opeq1( ; The entry block with zero-offset GEPs is kept, loads are removed. -; CHECK: entry -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load +; X86: entry +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 +; X86: @memcmp({{.*}}8) +; X86: icmp eq {{.*}} 0 ; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br +; X86: br label %opeq1.exit +; X86-NOT: br ; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %entry ] -; CHECK-NEXT: ret +; X86: phi i1 [ %{{[^,]*}}, %entry ] +; X86-NEXT: ret } ; Same as above, but the two blocks are in inverse order. @@ -68,19 +68,19 @@ ; CHECK-LABEL: @opeq1_inverse( ; The second block with zero-offset GEPs is kept, loads are removed. ; CHECK: land.rhs.i -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 +; X86: @memcmp({{.*}}8) +; X86: icmp eq {{.*}} 0 ; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br +; X86: br label %opeq1.exit +; X86-NOT: br ; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %land.rhs.i ] -; CHECK-NEXT: ret +; X86: phi i1 [ %{{[^,]*}}, %land.rhs.i ] +; X86-NEXT: ret } Index: test/Transforms/MergeICmps/X86/tuple-four-int8.ll =================================================================== --- test/Transforms/MergeICmps/X86/tuple-four-int8.ll +++ test/Transforms/MergeICmps/X86/tuple-four-int8.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s ; This is a more involved test: clang generates this weird pattern for ; tuple. Right now we skip the entry block Index: test/Transforms/MergeICmps/X86/volatile.ll =================================================================== --- test/Transforms/MergeICmps/X86/volatile.ll +++ test/Transforms/MergeICmps/X86/volatile.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s %"struct.std::pair" = type { i32, i32 } Index: test/Transforms/MergeICmps/pair-int32-int32.ll =================================================================== --- test/Transforms/MergeICmps/pair-int32-int32.ll +++ test/Transforms/MergeICmps/pair-int32-int32.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -S -o - %s | FileCheck %s --check-prefix=NOEXPANSION %"struct.std::pair" = type { i32, i32 } @@ -25,21 +25,7 @@ %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] ret i1 %4 ; CHECK-LABEL: @opeq1( -; The entry block with zero-offset GEPs is kept, loads are removed. -; CHECK: entry -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 -; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br -; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %entry ] -; CHECK-NEXT: ret +; NOEXPANSION-NOT: @memcmp({{.*}}8) } ; Same as above, but the two blocks are in inverse order. @@ -66,21 +52,7 @@ %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] ret i1 %4 ; CHECK-LABEL: @opeq1_inverse( -; The second block with zero-offset GEPs is kept, loads are removed. -; CHECK: land.rhs.i -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 -; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br -; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %land.rhs.i ] -; CHECK-NEXT: ret +; NOEXPANSION-NOT: @memcmp({{.*}}8) }