Index: llvm/trunk/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
===================================================================
--- llvm/trunk/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ llvm/trunk/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -19,11 +19,15 @@
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Pass.h"
 using namespace llvm;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "aggressive-instcombine"
 
@@ -53,6 +57,91 @@
 };
 } // namespace
 
+/// This is a recursive helper for 'and X, 1' that walks through a chain of 'or'
+/// instructions looking for shift ops of a common source value (first member of
+/// the pair). The second member of the pair is a mask constant for all of the
+/// bits that are being compared. So this:
+/// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
+/// returns {X, 0x129} and those are the operands of an 'and' that is compared
+/// to zero.
+static bool matchMaskedCmpOp(Value *V, std::pair<Value *, APInt> &Result) {
+  // Recurse through a chain of 'or' operands.
+  Value *Op0, *Op1;
+  if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
+    return matchMaskedCmpOp(Op0, Result) && matchMaskedCmpOp(Op1, Result);
+
+  // We need a shift-right or a bare value representing a compare of bit 0 of
+  // the original source operand.
+  Value *Candidate;
+  uint64_t BitIndex = 0;
+  if (!match(V, m_LShr(m_Value(Candidate), m_ConstantInt(BitIndex))))
+    Candidate = V;
+
+  // Initialize result source operand.
+  if (!Result.first)
+    Result.first = Candidate;
+
+  // Fill in the mask bit derived from the shift constant.
+  Result.second |= (1 << BitIndex);
+  return Result.first == Candidate;
+}
+
+/// Match an 'and' of a chain of or-shifted bits from a common source value into
+/// a masked compare:
+/// and (or (lshr X, C), ...), 1 --> (X & C') != 0
+static bool foldToMaskedCmp(Instruction &I) {
+  // TODO: This is only looking for 'any-bits-set' and 'all-bits-clear'.
+  // We should also match 'all-bits-set' and 'any-bits-clear' by looking for a
+  // a chain of 'and'.
+  if (!match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
+    return false;
+
+  std::pair<Value *, APInt>
+  MaskOps(nullptr, APInt::getNullValue(I.getType()->getScalarSizeInBits()));
+  if (!matchMaskedCmpOp(cast<BinaryOperator>(&I)->getOperand(0), MaskOps))
+    return false;
+
+  IRBuilder<> Builder(&I);
+  Value *Mask = Builder.CreateAnd(MaskOps.first, MaskOps.second);
+  Value *CmpZero = Builder.CreateIsNotNull(Mask);
+  Value *Zext = Builder.CreateZExt(CmpZero, I.getType());
+  I.replaceAllUsesWith(Zext);
+  return true;
+}
+
+/// This is the entry point for folds that could be implemented in regular
+/// InstCombine, but they are separated because they are not expected to
+/// occur frequently and/or have more than a constant-length pattern match.
+static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
+  bool MadeChange = false;
+  for (BasicBlock &BB : F) {
+    // Ignore unreachable basic blocks.
+    if (!DT.isReachableFromEntry(&BB))
+      continue;
+    // Do not delete instructions under here and invalidate the iterator.
+    for (Instruction &I : BB)
+      MadeChange |= foldToMaskedCmp(I);
+  }
+
+  // We're done with transforms, so remove dead instructions.
+  if (MadeChange)
+    for (BasicBlock &BB : F)
+      SimplifyInstructionsInBlock(&BB);
+
+  return MadeChange;
+}
+
+/// This is the entry point for all transforms. Pass manager differences are
+/// handled in the callers of this function.
+static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) {
+  bool MadeChange = false;
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  TruncInstCombine TIC(TLI, DL, DT);
+  MadeChange |= TIC.run(F);
+  MadeChange |= foldUnusualPatterns(F, DT);
+  return MadeChange;
+}
+
 void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
     AnalysisUsage &AU) const {
   AU.setPreservesCFG();
@@ -65,35 +154,19 @@
 }
 
 bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
-  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  auto &DL = F.getParent()->getDataLayout();
-
-  bool MadeIRChange = false;
-
-  // Handle TruncInst patterns
-  TruncInstCombine TIC(TLI, DL, DT);
-  MadeIRChange |= TIC.run(F);
-
-  // TODO: add more patterns to handle...
-
-  return MadeIRChange;
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  return runImpl(F, TLI, DT);
 }
 
 PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
                                                  FunctionAnalysisManager &AM) {
-  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
-  auto &DL = F.getParent()->getDataLayout();
-  bool MadeIRChange = false;
-
-  // Handle TruncInst patterns
-  TruncInstCombine TIC(TLI, DL, DT);
-  MadeIRChange |= TIC.run(F);
-  if (!MadeIRChange)
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  if (!runImpl(F, TLI, DT)) {
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
-
+  }
   // Mark all the analyses that instcombine updates as preserved.
   PreservedAnalyses PA;
   PA.preserveSet<CFGAnalyses>();
Index: llvm/trunk/test/Transforms/AggressiveInstCombine/masked-cmp.ll
===================================================================
--- llvm/trunk/test/Transforms/AggressiveInstCombine/masked-cmp.ll
+++ llvm/trunk/test/Transforms/AggressiveInstCombine/masked-cmp.ll
@@ -5,10 +5,10 @@
 
 define i32 @anyset_two_bit_mask(i32 %x) {
 ; CHECK-LABEL: @anyset_two_bit_mask(
-; CHECK-NEXT:    [[S:%.*]] = lshr i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[O:%.*]] = or i32 [[S]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = and i32 [[O]], 1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 9
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %s = lshr i32 %x, 3
   %o = or i32 %s, %x
@@ -18,14 +18,10 @@
 
 define i32 @anyset_four_bit_mask(i32 %x) {
 ; CHECK-LABEL: @anyset_four_bit_mask(
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[X]], 5
-; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[X]], 8
-; CHECK-NEXT:    [[O1:%.*]] = or i32 [[T1]], [[X]]
-; CHECK-NEXT:    [[O2:%.*]] = or i32 [[T2]], [[T3]]
-; CHECK-NEXT:    [[O3:%.*]] = or i32 [[O1]], [[O2]]
-; CHECK-NEXT:    [[R:%.*]] = and i32 [[O3]], 1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 297
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %t1 = lshr i32 %x, 3
   %t2 = lshr i32 %x, 5
@@ -41,13 +37,10 @@
 
 define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) {
 ; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits(
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[X]], 5
-; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[X]], 8
-; CHECK-NEXT:    [[O2:%.*]] = or i32 [[T2]], [[T3]]
-; CHECK-NEXT:    [[O3:%.*]] = or i32 [[T1]], [[O2]]
-; CHECK-NEXT:    [[R:%.*]] = and i32 [[O3]], 1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %t1 = lshr i32 %x, 3
   %t2 = lshr i32 %x, 5
Index: llvm/trunk/test/Transforms/PhaseOrdering/bitfield-bittests.ll
===================================================================
--- llvm/trunk/test/Transforms/PhaseOrdering/bitfield-bittests.ll
+++ llvm/trunk/test/Transforms/PhaseOrdering/bitfield-bittests.ll
@@ -18,15 +18,10 @@
 
 define i32 @allclear(i32 %a) {
 ; CHECK-LABEL: @allclear(
-; CHECK-NEXT:    [[BF_LSHR:%.*]] = lshr i32 [[A:%.*]], 1
-; CHECK-NEXT:    [[BF_CLEAR1:%.*]] = or i32 [[BF_LSHR]], [[A]]
-; CHECK-NEXT:    [[BF_LSHR5:%.*]] = lshr i32 [[A]], 2
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[BF_CLEAR1]], [[BF_LSHR5]]
-; CHECK-NEXT:    [[BF_LSHR10:%.*]] = lshr i32 [[A]], 3
-; CHECK-NEXT:    [[OR83:%.*]] = or i32 [[OR2]], [[BF_LSHR10]]
-; CHECK-NEXT:    [[OR13:%.*]] = and i32 [[OR83]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[OR13]], 1
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %a.sroa.0.0.trunc = trunc i32 %a to i8
   %a.sroa.5.0.shift = lshr i32 %a, 8
@@ -51,14 +46,10 @@
 
 define i32 @anyset(i32 %a) {
 ; CHECK-LABEL: @anyset(
-; CHECK-NEXT:    [[BF_LSHR:%.*]] = lshr i32 [[A:%.*]], 1
-; CHECK-NEXT:    [[BF_CLEAR1:%.*]] = or i32 [[BF_LSHR]], [[A]]
-; CHECK-NEXT:    [[BF_LSHR5:%.*]] = lshr i32 [[A]], 2
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[BF_CLEAR1]], [[BF_LSHR5]]
-; CHECK-NEXT:    [[BF_LSHR10:%.*]] = lshr i32 [[A]], 3
-; CHECK-NEXT:    [[OR83:%.*]] = or i32 [[OR2]], [[BF_LSHR10]]
-; CHECK-NEXT:    [[OR13:%.*]] = and i32 [[OR83]], 1
-; CHECK-NEXT:    ret i32 [[OR13]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %a.sroa.0.0.trunc = trunc i32 %a to i8
   %a.sroa.5.0.shift = lshr i32 %a, 8