Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -223,6 +223,9 @@ /// incurs significant execution cost. bool isLoweredToCall(const Function *F) const; + /// \brief Target control of generating Intrinsic::bitreverse in InstCombine. + bool shouldUseBitreverseIntrinsic() const; + /// Parameters that control the generic loop unrolling transformation. struct UnrollingPreferences { /// The cost threshold for the unrolled loop. Should be relative to the @@ -577,6 +580,7 @@ virtual bool hasBranchDivergence() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isLoweredToCall(const Function *F) = 0; + virtual bool shouldUseBitreverseIntrinsic() = 0; virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; virtual bool isLegalICmpImmediate(int64_t Imm) = 0; @@ -700,6 +704,9 @@ bool isLoweredToCall(const Function *F) override { return Impl.isLoweredToCall(F); } + bool shouldUseBitreverseIntrinsic() override { + return Impl.shouldUseBitreverseIntrinsic(); + } void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override { return Impl.getUnrollingPreferences(L, UP); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -193,6 +193,10 @@ return true; } + bool shouldUseBitreverseIntrinsic() { + return true; + } + void getUnrollingPreferences(Loop *, TTI::UnrollingPreferences &) {} bool isLegalAddImmediate(int64_t Imm) { return false; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -216,6 +216,10 @@ return BaseT::getOperationCost(Opcode, Ty, OpTy); } + bool shouldUseBitreverseIntrinsic() { + return true; + } + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { // This unrolling functionality is target independent, but to provide some // motivation for its intended use, for x86: Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -91,6 +91,10 @@ return TTIImpl->isLoweredToCall(F); } +bool TargetTransformInfo::shouldUseBitreverseIntrinsic() const { + return TTIImpl->shouldUseBitreverseIntrinsic(); +} + void TargetTransformInfo::getUnrollingPreferences( Loop *L, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, UP); Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -60,6 +60,11 @@ using BaseT::getIntImmCost; int getIntImmCost(const APInt &Imm, Type *Ty); + bool shouldUseBitreverseIntrinsic() { + if (!ST->hasThumb2()) + return false; + return true; + } /// @} /// \name Vector TTI Implementations Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -13,6 +13,7 @@ #include "InstCombineInternal.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" @@ -1732,13 +1733,16 @@ // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. - bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;; + bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; for (unsigned i = 0, e = BitValues.size(); i != e; ++i) { OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); } + if (TTI) + OKForBitReverse &= TTI->shouldUseBitreverseIntrinsic(); + Intrinsic::ID Intrin; if (OKForBSwap) Intrin = Intrinsic::bswap; Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -36,6 +36,7 @@ class DataLayout; class DominatorTree; class TargetLibraryInfo; +class TargetTransformInfo; class DbgDeclareInst; class MemIntrinsic; class MemSetInst; @@ -184,6 +185,7 @@ // FIXME: These can never be null and should be references. AssumptionCache *AC; TargetLibraryInfo *TLI; + TargetTransformInfo *TTI; DominatorTree *DT; const DataLayout &DL; @@ -197,9 +199,11 @@ InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, AliasAnalysis *AA, AssumptionCache *AC, TargetLibraryInfo *TLI, + TargetTransformInfo *TTI, DominatorTree *DT, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), - AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {} + AA(AA), AC(AC), TLI(TLI), TTI(TTI), DT(DT), DL(DL), LI(LI), + MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -48,6 +48,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" @@ -3039,7 +3040,8 @@ static bool combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, - TargetLibraryInfo &TLI, DominatorTree &DT, + TargetLibraryInfo &TLI, + TargetTransformInfo *TTI, DominatorTree &DT, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); @@ -3064,7 +3066,7 @@ Changed = true; InstCombiner IC(Worklist, &Builder, F.optForMinSize(), - AA, &AC, &TLI, &DT, DL, LI); + AA, &AC, &TLI, TTI, &DT, DL, LI); if (IC.run()) Changed = true; @@ -3084,7 +3086,8 @@ auto *LI = AM->getCachedResult(F); // FIXME: The AliasAnalysis is not yet supported in the new pass manager - if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, LI)) + if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, + /*TTI=*/nullptr, DT, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3120,6 +3123,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -3133,13 +3137,14 @@ auto AA = &getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(); + auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); // Optional analyses. auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, LI); + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, &TTI, DT, LI); } char InstructionCombiningPass::ID = 0; Index: test/Transforms/InstCombine/bit_reverse.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/bit_reverse.ll @@ -0,0 +1,56 @@ +;REQUIRES: arm-registered-target +;RUN: opt < %s -mtriple=armv5e--linux-gnueabi -instcombine -S | FileCheck %s +;RUN: opt < %s -mtriple=thumbv4t--linux-gnueabi -instcombine -S | FileCheck %s +;RUN: opt < %s -mtriple=armv6--linux-gnueabi -instcombine -S | FileCheck %s + +;RUN: opt < %s -mtriple=armv7--linux-gnueabi -instcombine -S | FileCheck %s --check-prefix=RBIT +;RUN: opt < %s -mtriple=thumbv8--linux-gnueabi -instcombine -S | FileCheck %s --check-prefix=RBIT + +;CHECK-NOT: llvm.bitreverse +;RBIT: llvm.bitreverse + +; Function Attrs: nounwind +define void @byte_reversal(i8* %p, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp ult i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.0 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %and = and i32 %conv, 1 + %shl = shl i32 %and, 7 + %and1 = and i32 %conv, 2 + %shl2 = shl i32 %and1, 5 + %or = or i32 %shl, %shl2 + %and3 = and i32 %conv, 4 + %shl4 = shl i32 %and3, 3 + %or5 = or i32 %or, %shl4 + %and6 = and i32 %conv, 8 + %shl7 = shl i32 %and6, 1 + %or8 = or i32 %or5, %shl7 + %and9 = and i32 %conv, 16 + %shr = lshr i32 %and9, 1 + %or10 = or i32 %or8, %shr + %and11 = and i32 %conv, 32 + %shr12 = lshr i32 %and11, 3 + %or13 = or i32 %or10, %shr12 + %and14 = and i32 %conv, 64 + %shr15 = lshr i32 %and14, 5 + %or16 = or i32 %or13, %shr15 + %and17 = and i32 %conv, 128 + %shr18 = lshr i32 %and17, 7 + %or19 = or i32 %or16, %shr18 + %conv20 = trunc i32 %or19 to i8 + store i8 %conv20, i8* %arrayidx, align 1 + %inc = add i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}