Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -548,7 +548,7 @@ bool enableAggressiveInterleaving(bool LoopHasReductions) const; /// \brief Enable inline expansion of memcmp - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const; + bool enableMemCmpExpansion(unsigned &MaxLoadSize) const; /// \brief Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; @@ -985,7 +985,7 @@ unsigned VF) = 0; virtual bool supportsEfficientVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; - virtual bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) = 0; + virtual bool enableMemCmpExpansion(unsigned &MaxLoadSize) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, @@ -1235,8 +1235,8 @@ bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) override { - return Impl.expandMemCmp(I, MaxLoadSize); + bool enableMemCmpExpansion(unsigned &MaxLoadSize) override { + return Impl.enableMemCmpExpansion(MaxLoadSize); } bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -290,7 +290,7 @@ bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { return false; } + bool enableMemCmpExpansion(unsigned &MaxLoadSize) { return false; } bool enableInterleavedAccessVectorization() { return false; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -245,8 +245,8 @@ return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } -bool TargetTransformInfo::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const { - return TTIImpl->expandMemCmp(I, MaxLoadSize); +bool TargetTransformInfo::enableMemCmpExpansion(unsigned &MaxLoadSize) const { + return TTIImpl->enableMemCmpExpansion(MaxLoadSize); } bool TargetTransformInfo::enableInterleavedAccessVectorization() const { Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -2315,7 +2315,7 @@ // TTI call to check if target would like to expand memcmp. Also, get the // MaxLoadSize. unsigned MaxLoadSize; - if (!TTI->expandMemCmp(CI, MaxLoadSize)) + if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return false; // Early exit from expansion if -Oz. Index: lib/Target/PowerPC/PPCTargetTransformInfo.h =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.h +++ lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -61,7 +61,7 @@ /// @{ bool enableAggressiveInterleaving(bool LoopHasReductions); - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); + bool enableMemCmpExpansion(unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector) const; Index: lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -215,7 +215,7 @@ return LoopHasReductions; } -bool PPCTTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { +bool PPCTTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) { MaxLoadSize = 8; return true; } Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -127,7 +127,7 @@ bool hasDivRemOp(Type *DataType, bool IsSigned); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); + bool enableMemCmpExpansion(unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); private: int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask, Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -2536,7 +2536,7 @@ return (CallerBits & CalleeBits) == CalleeBits; } -bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { +bool X86TTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) { // TODO: We can increase these based on available vector ops. MaxLoadSize = ST->is64Bit() ? 8 : 4; return true; Index: lib/Transforms/Scalar/MergeICmps.cpp =================================================================== --- lib/Transforms/Scalar/MergeICmps.cpp +++ lib/Transforms/Scalar/MergeICmps.cpp @@ -24,6 +24,8 @@ #include "llvm/ADT/APSInt.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -41,8 +43,6 @@ #define DEBUG_TYPE "mergeicmps" -#define MERGEICMPS_DOT_ON - // A BCE atom. struct BCEAtom { BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {} @@ -605,22 +605,31 @@ bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; const auto &TLI = getAnalysis().getTLI(); - auto PA = runImpl(F, &TLI); + const auto &TTI = getAnalysis().getTTI(F); + auto PA = runImpl(F, &TLI, &TTI); return !PA.areAllPreserved(); } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } - PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI); + PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI); }; PreservedAnalyses MergeICmps::runImpl(Function &F, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI) { DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); + // We only try merging comparisons if the target wants to expand memcmp later. + // The rationale is to avoid turning small chains into memcmp calls. + unsigned MaxLoadSize; + if(!TTI->enableMemCmpExpansion(MaxLoadSize)) return PreservedAnalyses::all(); + bool MadeChange = false; for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { @@ -640,6 +649,7 @@ INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) Index: test/Transforms/MergeICmps/X86/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/MergeICmps/X86/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'X86' in config.root.targets: + config.unsupported = True + Index: test/Transforms/MergeICmps/X86/pair-int32-int32.ll =================================================================== --- test/Transforms/MergeICmps/X86/pair-int32-int32.ll +++ test/Transforms/MergeICmps/X86/pair-int32-int32.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s --check-prefix=X86 %"struct.std::pair" = type { i32, i32 } @@ -26,20 +26,20 @@ ret i1 %4 ; CHECK-LABEL: @opeq1( ; The entry block with zero-offset GEPs is kept, loads are removed. -; CHECK: entry -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load +; X86: entry +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 +; X86: @memcmp({{.*}}8) +; X86: icmp eq {{.*}} 0 ; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br +; X86: br label %opeq1.exit +; X86-NOT: br ; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %entry ] -; CHECK-NEXT: ret +; X86: phi i1 [ %{{[^,]*}}, %entry ] +; X86-NEXT: ret } ; Same as above, but the two blocks are in inverse order. @@ -68,19 +68,19 @@ ; CHECK-LABEL: @opeq1_inverse( ; The second block with zero-offset GEPs is kept, loads are removed. ; CHECK: land.rhs.i -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load +; X86: getelementptr {{.*}} i32 0 +; X86-NOT: load ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 +; X86: @memcmp({{.*}}8) +; X86: icmp eq {{.*}} 0 ; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br +; X86: br label %opeq1.exit +; X86-NOT: br ; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %land.rhs.i ] -; CHECK-NEXT: ret +; X86: phi i1 [ %{{[^,]*}}, %land.rhs.i ] +; X86-NEXT: ret } Index: test/Transforms/MergeICmps/X86/tuple-four-int8.ll =================================================================== --- test/Transforms/MergeICmps/X86/tuple-four-int8.ll +++ test/Transforms/MergeICmps/X86/tuple-four-int8.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s ; This is a more involved test: clang generates this weird pattern for ; tuple. Right now we skip the entry block Index: test/Transforms/MergeICmps/X86/volatile.ll =================================================================== --- test/Transforms/MergeICmps/X86/volatile.ll +++ test/Transforms/MergeICmps/X86/volatile.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -mtriple=x86_64-unknown-unknown -S -o - %s | FileCheck %s %"struct.std::pair" = type { i32, i32 } Index: test/Transforms/MergeICmps/pair-int32-int32.ll =================================================================== --- test/Transforms/MergeICmps/pair-int32-int32.ll +++ test/Transforms/MergeICmps/pair-int32-int32.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergeicmps -S -o - %s | FileCheck %s +; RUN: opt -mergeicmps -S -o - %s | FileCheck %s --check-prefix=NOEXPANSION %"struct.std::pair" = type { i32, i32 } @@ -25,21 +25,7 @@ %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] ret i1 %4 ; CHECK-LABEL: @opeq1( -; The entry block with zero-offset GEPs is kept, loads are removed. -; CHECK: entry -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 -; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br -; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %entry ] -; CHECK-NEXT: ret +; NOEXPANSION-NOT: @memcmp({{.*}}8) } ; Same as above, but the two blocks are in inverse order. @@ -66,21 +52,7 @@ %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] ret i1 %4 ; CHECK-LABEL: @opeq1_inverse( -; The second block with zero-offset GEPs is kept, loads are removed. -; CHECK: land.rhs.i -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; CHECK: getelementptr {{.*}} i32 0 -; CHECK-NOT: load -; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; CHECK: @memcmp({{.*}}8) -; CHECK: icmp eq {{.*}} 0 -; The branch is now a direct branch; the other block has been removed. -; CHECK: br label %opeq1.exit -; CHECK-NOT: br -; The phi is updated. -; CHECK: phi i1 [ %{{[^,]*}}, %land.rhs.i ] -; CHECK-NEXT: ret +; NOEXPANSION-NOT: @memcmp({{.*}}8) }