Index: docs/LangRef.html
===================================================================
--- docs/LangRef.html
+++ docs/LangRef.html
@@ -330,6 +330,8 @@
           <li><a href="#int_objectsize">
             '<tt>llvm.objectsize</tt>' Intrinsic</a></li>
           <li><a href="#int_expect">
+            '<tt>llvm.invariant</tt>' Intrinsic</a></li>
+          <li><a href="#int_invariant">
             '<tt>llvm.expect</tt>' Intrinsic</a></li>
           <li><a href="#int_donothing">
             '<tt>llvm.donothing</tt>' Intrinsic</a></li>
@@ -9057,6 +9059,34 @@
 
 <!-- _______________________________________________________________________ -->
 <h4>
+  <a name="int_invariant">'<tt>llvm.invariant</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.invariant(i1 %cond)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.invariant</tt>' intrinsic allows the optimizer to assume
+   that the provided condition is true. This information can then be used
+   in simplifying other parts of the code.</p>
+
+<h5>Arguments:</h5>
+
+<p>The condition which the optimizer may assume is always true.</p>
+
+<h5>Semantics:</h5>
+
+<p>The intrinsic allows the optimizer to assume that the provided condition is
+   always true. No code is generated for this intrinsic, and instructions that
+   contribute only to the provided condition are not used for code generation.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
   <a name="int_donothing">'<tt>llvm.donothing</tt>' Intrinsic</a>
 </h4>
 
Index: include/llvm-c/Transforms/Scalar.h
===================================================================
--- include/llvm-c/Transforms/Scalar.h
+++ include/llvm-c/Transforms/Scalar.h
@@ -35,6 +35,9 @@
 /** See llvm::createAggressiveDCEPass function. */
 void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
 
+/** See llvm::createAlignmentInvPropPass function. */
+void LLVMAddAlignmentInvPropPass(LLVMPassManagerRef PM);
+
 /** See llvm::createCFGSimplificationPass function. */
 void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM);
 
Index: include/llvm/Analysis/EphemeralValues.h
===================================================================
--- /dev/null
+++ include/llvm/Analysis/EphemeralValues.h
@@ -0,0 +1,56 @@
+//===- EphemeralValues.h - Ephemeral value analysis -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Calculate ephemeral values - those used only (indirectly) by invariants.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_EPHEMERAL_VALUES_H
+#define LLVM_ANALYSIS_EPHEMERAL_VALUES_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class Value;
+class raw_ostream;
+
+//===----------------------------------------------------------------------===//
+/// @brief Analysis that finds ephemeral values. 
+class EphemeralValues : public ModulePass {
+  DenseSet<Value *> EphValues;
+
+  EphemeralValues(const EphemeralValues &) LLVM_DELETED_FUNCTION;
+  const EphemeralValues &operator=(const EphemeralValues &) LLVM_DELETED_FUNCTION;
+
+public:
+  // Returns true if the provided value is ephemeral.
+  bool isEphemeralValue(Value *V) const {
+    return EphValues.count(V);
+  }
+  bool isEphemeralValue(const Value *V) const {
+    return isEphemeralValue(const_cast<Value *>(V));
+  }
+
+public:
+  static char ID;
+  explicit EphemeralValues();
+
+  /// @name ModulePass interface
+  //@{
+  virtual bool runOnModule(Module &M);
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  virtual void print(raw_ostream &OS, const Module *M) const;
+  //@}
+};
+
+} // End llvm namespace
+#endif
+
Index: include/llvm/Analysis/InlineCost.h
===================================================================
--- include/llvm/Analysis/InlineCost.h
+++ include/llvm/Analysis/InlineCost.h
@@ -27,6 +27,7 @@
 
   class CallSite;
   class DataLayout;
+  class EphemeralValues;
 
   namespace InlineConstants {
     // Various magic constants used to adjust heuristics.
@@ -106,11 +107,14 @@
   class InlineCostAnalyzer {
     // DataLayout if available, or null.
     const DataLayout *TD;
+    // EphemeralValues if available, or null.
+    const EphemeralValues *EV;
 
   public:
-    InlineCostAnalyzer(): TD(0) {}
+    InlineCostAnalyzer(): TD(0), EV(0) {}
 
     void setDataLayout(const DataLayout *TData) { TD = TData; }
+    void setEphemeralValues(const EphemeralValues *EVals) { EV = EVals; }
 
     /// \brief Get an InlineCost object representing the cost of inlining this
     /// callsite.
Index: include/llvm/Analysis/Passes.h
===================================================================
--- include/llvm/Analysis/Passes.h
+++ include/llvm/Analysis/Passes.h
@@ -66,6 +66,12 @@
 
   //===--------------------------------------------------------------------===//
   //
+  // createEphemeralValuesPass - This pass identifies ephemeral values.
+  //
+  ModulePass *createEphemeralValuesPass();
+
+  //===--------------------------------------------------------------------===//
+  //
   /// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows
   /// about the semantics of a set of libcalls specified by LCI.  The newly
   /// constructed pass takes ownership of the pointer that is provided.
Index: include/llvm/InitializePasses.h
===================================================================
--- include/llvm/InitializePasses.h
+++ include/llvm/InitializePasses.h
@@ -66,6 +66,7 @@
 void initializeAliasSetPrinterPass(PassRegistry&);
 void initializeAlwaysInlinerPass(PassRegistry&);
 void initializeArgPromotionPass(PassRegistry&);
+void initializeAlignmentInvPropPass(PassRegistry&);
 void initializeBarrierNoopPass(PassRegistry&);
 void initializeBasicAliasAnalysisPass(PassRegistry&);
 void initializeBasicCallGraphPass(PassRegistry&);
@@ -107,6 +108,7 @@
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEdgeProfilerPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
+void initializeEphemeralValuesPass(PassRegistry&);
 void initializePathProfilerPass(PassRegistry&);
 void initializeGCOVProfilerPass(PassRegistry&);
 void initializeAddressSanitizerPass(PassRegistry&);
Index: include/llvm/Intrinsics.td
===================================================================
--- include/llvm/Intrinsics.td
+++ include/llvm/Intrinsics.td
@@ -234,6 +234,8 @@
 
 def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
 
+def int_invariant     : Intrinsic<[], [llvm_i1_ty], [IntrNoMem]>;
+
 // Stack Protector Intrinsic - The stackprotector intrinsic writes the stack
 // guard to the correct place on the stack frame.
 def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
Index: include/llvm/LinkAllPasses.h
===================================================================
--- include/llvm/LinkAllPasses.h
+++ include/llvm/LinkAllPasses.h
@@ -50,6 +50,7 @@
       (void) llvm::createAliasAnalysisCounterPass();
       (void) llvm::createAliasDebugger();
       (void) llvm::createArgumentPromotionPass();
+      (void) llvm::createAlignmentInvPropPass();
       (void) llvm::createBasicAliasAnalysisPass();
       (void) llvm::createLibCallAliasAnalysisPass(0);
       (void) llvm::createScalarEvolutionAliasAnalysisPass();
Index: include/llvm/Transforms/Scalar.h
===================================================================
--- include/llvm/Transforms/Scalar.h
+++ include/llvm/Transforms/Scalar.h
@@ -32,6 +32,12 @@
 
 //===----------------------------------------------------------------------===//
 //
+// AlignmentInvProp - A worklist driven alignment assumption propagation pass
+//
+FunctionPass *createAlignmentInvPropPass();
+
+//===----------------------------------------------------------------------===//
+//
 // SCCP - Sparse conditional constant propagation.
 //
 FunctionPass *createSCCPPass();
Index: lib/Analysis/Analysis.cpp
===================================================================
--- lib/Analysis/Analysis.cpp
+++ lib/Analysis/Analysis.cpp
@@ -42,6 +42,7 @@
   initializePostDomPrinterPass(Registry);
   initializePostDomOnlyViewerPass(Registry);
   initializePostDomOnlyPrinterPass(Registry);
+  initializeEphemeralValuesPass(Registry);
   initializeIVUsersPass(Registry);
   initializeInstCountPass(Registry);
   initializeIntervalPartitionPass(Registry);
Index: lib/Analysis/CMakeLists.txt
===================================================================
--- lib/Analysis/CMakeLists.txt
+++ lib/Analysis/CMakeLists.txt
@@ -17,6 +17,7 @@
   DependenceAnalysis.cpp
   DomPrinter.cpp
   DominanceFrontier.cpp
+  EphemeralValues.cpp
   IVUsers.cpp
   InlineCost.cpp
   InstCount.cpp
Index: lib/Analysis/CodeMetrics.cpp
===================================================================
--- lib/Analysis/CodeMetrics.cpp
+++ lib/Analysis/CodeMetrics.cpp
@@ -69,6 +69,7 @@
       return false;
     case Intrinsic::dbg_declare:
     case Intrinsic::dbg_value:
+    case Intrinsic::invariant:
     case Intrinsic::invariant_start:
     case Intrinsic::invariant_end:
     case Intrinsic::lifetime_start:
Index: lib/Analysis/EphemeralValues.cpp
===================================================================
--- /dev/null
+++ lib/Analysis/EphemeralValues.cpp
@@ -0,0 +1,114 @@
+//===------------------------ EphemeralValues.cpp ------------------------===//
+//              Code to perform Alignment Invariant Propagation
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ephemeral value determination.
+//
+//===----------------------------------------------------------------------===//
+
+#define EV_NAME "eph-values"
+#define DEBUG_TYPE EV_NAME
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/EphemeralValues.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Instruction.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char EphemeralValues::ID = 0;
+static const char ev_name[] = "Ephemeral value analysis";
+INITIALIZE_PASS_BEGIN(EphemeralValues, EV_NAME,
+                ev_name, false, false)
+INITIALIZE_PASS_END(EphemeralValues, EV_NAME,
+                ev_name, false, false)
+
+ModulePass *llvm::createEphemeralValuesPass() {
+  return new EphemeralValues();
+}
+
+EphemeralValues::EphemeralValues() : ModulePass(ID) {
+  initializeEphemeralValuesPass(*PassRegistry::getPassRegistry());
+}
+
+void EphemeralValues::getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+}
+
+void EphemeralValues::print(raw_ostream &OS, const Module *M) const {
+  if (!M)
+    return;
+
+  OS << "Ephemeral values...\n";
+  for (Module::const_iterator L = M->begin(), LE = M->end(); L != LE; ++L)
+    for (Function::const_iterator I = L->begin(), IE = L->end(); I != IE; ++I)
+      for (BasicBlock::const_iterator J = I->getFirstInsertionPt(),
+           JE = I->end(); J != JE; ++J)
+        if (isEphemeralValue(J)) {
+          OS << "\tephemeral: " << L->getName() << ": " << I->getName() <<
+                ": " << *J << "\n";
+        }
+}
+
+bool EphemeralValues::runOnModule(Module &M) {
+  DenseSet<Value *> Visited;
+  SmallVector<Value *, 16> WorkSet;
+
+  EphValues.clear();
+
+  for (Module::iterator L = M.begin(), LE = M.end(); L != LE; ++L)
+    for (Function::iterator I = L->begin(), IE = L->end(); I != IE; ++I)
+      for (BasicBlock::iterator J = I->getFirstInsertionPt(), JE = I->end();
+           J != JE; ++J)
+        if (CallInst *CI = dyn_cast<CallInst>(J))
+          if (Function *F2 = CI->getCalledFunction())
+            if (F2->getIntrinsicID() == Intrinsic::invariant) {
+              WorkSet.push_back(CI);
+              EphValues.insert(CI);
+            }
+
+  while (!WorkSet.empty()) {
+    Value *V = WorkSet.pop_back_val();
+    if (!Visited.insert(V).second)
+      continue;
+
+    // If all uses of this value are ephemeral, then so is this value.
+    bool FoundNEUse = false;
+    for (Value::use_iterator I = V->use_begin(), IE = V->use_end();
+         I != IE; ++I)
+      if (!EphValues.count(*I)) {
+        FoundNEUse = true;
+        break;
+      }
+
+    if (!FoundNEUse) {
+      EphValues.insert(V);
+
+      if (User *U = dyn_cast<User>(V))
+        for (User::op_iterator J = U->op_begin(), JE = U->op_end();
+             J != JE; ++J)
+          WorkSet.push_back(*J);
+    }
+  }
+
+  return false;
+}
+
Index: lib/Analysis/InlineCost.cpp
===================================================================
--- lib/Analysis/InlineCost.cpp
+++ lib/Analysis/InlineCost.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "inline-cost"
+#include "llvm/Analysis/EphemeralValues.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -43,6 +44,8 @@
 
   // DataLayout if available, or null.
   const DataLayout *const TD;
+  // EphemeralValues if available, or null.
+  const EphemeralValues *const EV;
 
   // The called function.
   Function &F;
@@ -125,8 +128,9 @@
   bool visitCallSite(CallSite CS);
 
 public:
-  CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold)
-    : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
+  CallAnalyzer(const DataLayout *TD, const EphemeralValues *EV,
+               Function &Callee, int Threshold)
+    : TD(TD), EV(EV), F(Callee), Threshold(Threshold), Cost(0),
       IsCallerRecursive(false), IsRecursiveCall(false),
       ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
       NumInstructions(0), NumVectorInstructions(0),
@@ -670,7 +674,7 @@
   // during devirtualization and so we want to give it a hefty bonus for
   // inlining, but cap that bonus in the event that inlining wouldn't pan
   // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold);
+  CallAnalyzer CA(TD, EV, *F, InlineConstants::IndirectCallThreshold);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // bonus we want to apply, but don't go below zero.
@@ -714,7 +718,7 @@
     // all of the per-instruction logic. The visit tree returns true if we
     // consumed the instruction in any way, and false if the instruction's base
     // cost should count against inlining.
-    if (Base::visit(I))
+    if ((EV && EV->isEphemeralValue(I)) || Base::visit(I))
       ++NumInstructionsSimplified;
     else
       Cost += InlineConstants::InstrCost;
@@ -1058,7 +1062,7 @@
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
         << "...\n");
 
-  CallAnalyzer CA(TD, *Callee, Threshold);
+  CallAnalyzer CA(TD, EV, *Callee, Threshold);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -1898,6 +1898,7 @@
        // should be considered at least *safe* to speculate...
        case Intrinsic::dbg_declare:
        case Intrinsic::dbg_value:
+       case Intrinsic::invariant:
          return true;
 
        case Intrinsic::bswap:
Index: lib/CodeGen/IntrinsicLowering.cpp
===================================================================
--- lib/CodeGen/IntrinsicLowering.cpp
+++ lib/CodeGen/IntrinsicLowering.cpp
@@ -453,6 +453,7 @@
     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
     break;
 
+  case Intrinsic::invariant:
   case Intrinsic::var_annotation:
     break;   // Strip out annotate intrinsic
     
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5018,6 +5018,7 @@
     setValue(&I, Res);
     return 0;
   }
+  case Intrinsic::invariant:
   case Intrinsic::var_annotation:
     // Discard annotate attributes
     return 0;
Index: lib/Transforms/IPO/InlineSimple.cpp
===================================================================
--- lib/Transforms/IPO/InlineSimple.cpp
+++ lib/Transforms/IPO/InlineSimple.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "inline"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/EphemeralValues.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DataLayout.h"
@@ -42,6 +43,10 @@
     InlineCost getInlineCost(CallSite CS) {
       return CA.getInlineCost(CS, getInlineThreshold(CS));
     }
+    virtual void getAnalysisUsage(AnalysisUsage &Info) const {
+      Info.addRequired<EphemeralValues>();
+      Inliner::getAnalysisUsage(Info);
+    }
     using llvm::Pass::doInitialization;
     virtual bool doInitialization(CallGraph &CG);
   };
@@ -51,6 +56,7 @@
 INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
                 "Function Integration/Inlining", false, false)
 INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(EphemeralValues)
 INITIALIZE_PASS_END(SimpleInliner, "inline",
                 "Function Integration/Inlining", false, false)
 
@@ -64,6 +70,12 @@
 // annotated with the noinline attribute.
 bool SimpleInliner::doInitialization(CallGraph &CG) {
   CA.setDataLayout(getAnalysisIfAvailable<DataLayout>());
+
+  // FIXME: We need to use getAnalysisIfAvailable instead of getAnalysis
+  // because, even though the pass was been required, it will not have been
+  // run. getAnalysisIfAvailable will run the pass now, while getAnalysis
+  // will not (and will assert instead).
+  CA.setEphemeralValues(getAnalysisIfAvailable<EphemeralValues>());
   return false;
 }
 
Index: lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- lib/Transforms/IPO/PassManagerBuilder.cpp
+++ lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -195,6 +195,8 @@
     MPM.add(createLoopUnrollPass());          // Unroll small loops
   addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
 
+  MPM.add(createAlignmentInvPropPass());      // Alignment invariants
+
   if (OptLevel > 1)
     MPM.add(createGVNPass());                 // Remove redundancies
   MPM.add(createMemCpyOptPass());             // Remove memcpy / form memset
Index: lib/Transforms/Scalar/ADCE.cpp
===================================================================
--- lib/Transforms/Scalar/ADCE.cpp
+++ lib/Transforms/Scalar/ADCE.cpp
@@ -23,6 +23,7 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/InstIterator.h"
@@ -49,6 +50,13 @@
 char ADCE::ID = 0;
 INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
 
+static bool isInvariantIntrinsic(Instruction *I) {
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    return II->getIntrinsicID() == Intrinsic::invariant;
+
+  return false;
+}
+
 bool ADCE::runOnFunction(Function& F) {
   SmallPtrSet<Instruction*, 128> alive;
   SmallVector<Instruction*, 128> worklist;
@@ -57,6 +65,7 @@
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
     if (isa<TerminatorInst>(I.getInstructionIterator()) ||
         isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+        isInvariantIntrinsic(I.getInstructionIterator()) ||
         isa<LandingPadInst>(I.getInstructionIterator()) ||
         I->mayHaveSideEffects()) {
       alive.insert(I.getInstructionIterator());
Index: lib/Transforms/Scalar/AlignmentInvProp.cpp
===================================================================
--- /dev/null
+++ lib/Transforms/Scalar/AlignmentInvProp.cpp
@@ -0,0 +1,386 @@
+//===------------------------ AlignmentInvProp.cpp ------------------------===//
+//              Code to perform Alignment Invariant Propagation
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements alignment invariant propagation.
+//
+//===----------------------------------------------------------------------===//
+
+#define AA_NAME "alignment-inv-prop"
+#define DEBUG_TYPE AA_NAME
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constant.h"
+#include "llvm/Instruction.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+using namespace llvm;
+
+STATISTIC(NumLoadAlignChanged,
+  "Number of loads changed by alignment assumptions");
+STATISTIC(NumStoreAlignChanged,
+  "Number of stores changed by alignment assumptions");
+STATISTIC(NumMemIntAlignChanged,
+  "Number of memory intrinsics changed by alignment assumptions");
+
+namespace {
+  struct AlignmentInvProp : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    AlignmentInvProp() : FunctionPass(ID) {
+      initializeAlignmentInvPropPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<ScalarEvolution>();
+    }
+  };
+}
+
+char AlignmentInvProp::ID = 0;
+static const char aip_name[] = "Alignment invariant propagation";
+INITIALIZE_PASS_BEGIN(AlignmentInvProp, AA_NAME,
+                aip_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(AlignmentInvProp, AA_NAME,
+                aip_name, false, false)
+
+FunctionPass *llvm::createAlignmentInvPropPass() {
+  return new AlignmentInvProp();
+}
+
+static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
+                                    const SCEV *AlignSCEV,
+                                    ScalarEvolution *SE) {
+  // DiffUnits = Diff % int64_t(Alignment)
+  const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV);
+  const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV);
+  const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV);
+
+  DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " <<
+                  *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n");
+
+  if (const SCEVConstant *ConstDUSCEV =
+      dyn_cast<SCEVConstant>(DiffUnitsSCEV)) {
+    int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue();
+
+    if (!DiffUnits)
+      return (unsigned)
+        cast<SCEVConstant>(AlignSCEV)->getValue()->getSExtValue();
+
+    uint64_t DiffUnitsAbs = abs64(DiffUnits);
+    if (isPowerOf2_64(DiffUnitsAbs))
+      return (unsigned) DiffUnitsAbs;
+  }
+
+  return 0;
+}
+
+static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
+                                const SCEV *OffSCEV, Value *Ptr,
+                                ScalarEvolution *SE) {
+  const SCEV *PtrSCEV = SE->getSCEV(Ptr);
+  const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
+
+  // What we really want to know if the overall offset to the aligned
+  // address. This address is displaced by the provided offset.
+  DiffSCEV = SE->getMinusSCEV(DiffSCEV, OffSCEV);
+
+  DEBUG(dbgs() << AA_NAME ": alignment of " << *Ptr << " relative to " <<
+                  *AlignSCEV << " and offset " << *OffSCEV <<
+                  " using diff " << *DiffSCEV << "\n");
+
+  unsigned NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE);
+  DEBUG(dbgs() << "\tnew alignment: " << NewAlignment << "\n");
+
+  if (NewAlignment) {
+    return NewAlignment;
+  } else if (const SCEVAddRecExpr *DiffARSCEV =
+             dyn_cast<SCEVAddRecExpr>(DiffSCEV)) {
+    // The relative offset to the alignment assumption did not yield a constant,
+    // but we should try harder: if we assume that a is 32-byte aligned, then in
+    // for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are
+    // 32-byte aligned, but instead alternate between 32 and 16-byte alignment.
+    // As a result, the new alignment will not be a constant, but can still
+    // be improved over the default (of 4) to 16.
+
+    const SCEV *DiffStartSCEV = DiffARSCEV->getStart();
+    const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE);
+
+    DEBUG(dbgs() << "\ttrying start/inc alignment using start " <<
+                    *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n");
+
+    NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE);
+    unsigned NewIncAlignment = getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE);
+
+    DEBUG(dbgs() << "\tnew start alignment: " << NewAlignment << "\n");
+    DEBUG(dbgs() << "\tnew inc alignment: " << NewIncAlignment << "\n");
+
+    if (NewAlignment > NewIncAlignment) {
+      if (NewAlignment % NewIncAlignment == 0) {
+        DEBUG(dbgs() << "\tnew start/inc alignment: " <<
+                        NewIncAlignment << "\n");
+        return NewIncAlignment;
+      }
+    } else if (NewIncAlignment > NewAlignment) {
+      if (NewIncAlignment % NewAlignment == 0) {
+        DEBUG(dbgs() << "\tnew start/inc alignment: " <<
+                        NewAlignment << "\n");
+        return NewAlignment;
+      }
+    } else if (NewIncAlignment == NewAlignment && NewIncAlignment) {
+      DEBUG(dbgs() << "\tnew start/inc alignment: " <<
+                      NewAlignment << "\n");
+      return NewAlignment;
+    }
+  }
+
+  return 0;
+}
+
+bool AlignmentInvProp::runOnFunction(Function &F) {
+  SmallVector<Value *, 4> InvConds;
+  BasicBlock *EntryBB = F.begin();
+  for (df_iterator<BasicBlock *> I = df_begin(EntryBB), IE = df_end(EntryBB);
+       I != IE; ++I)
+    for (BasicBlock::iterator J = I->getFirstInsertionPt(), JE = I->end();
+         J != JE; ++J)
+      if (CallInst *CI = dyn_cast<CallInst>(J))
+        if (Function *F2 = CI->getCalledFunction())
+          if (F2->getIntrinsicID() == Intrinsic::invariant)
+            InvConds.push_back(CI->getArgOperand(0));
+
+  // Visit all invariant conditions, and split those that are ands of
+  // other conditions.
+  DenseSet<Value *> VisitedInvCond;
+  while (VisitedInvCond.size() != InvConds.size()) {
+    SmallVector<Value *, 4> NewInvConds;
+    for (SmallVector<Value*, 4>::iterator I = InvConds.begin(),
+         IE = InvConds.end(); I != IE; ++I) {
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*I)) {
+        if (BO->getOpcode() == Instruction::And) {
+          NewInvConds.push_back(BO->getOperand(0));
+          NewInvConds.push_back(BO->getOperand(1));
+        }
+      }
+
+      VisitedInvCond.insert(*I);
+    }
+
+    InvConds.insert(InvConds.end(), NewInvConds.begin(), NewInvConds.end());
+    NewInvConds.clear();
+  }
+
+  bool Changed = false;
+  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+
+  DenseMap<MemTransferInst *, unsigned> NewDestAlignments, NewSrcAlignments;
+
+  for (SmallVector<Value *, 4>::iterator I = InvConds.begin(),
+       IE = InvConds.end(); I != IE; ++I) {
+    // An alignment invariant must be a statement about the least-significant
+    // bits of the pointer being zero, possibly with some offset.
+    ICmpInst *ICI = dyn_cast<ICmpInst>(*I);
+    if (!ICI)
+      continue;
+
+    // This must be an expression of the form: x & m == 0.
+    if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
+      continue;
+
+    Value *CmpLHS = ICI->getOperand(0);
+    Value *CmpRHS = ICI->getOperand(1);
+    const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
+    const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
+    if (CmpLHSSCEV->isZero())
+      std::swap(CmpLHS, CmpRHS);
+    else if (!CmpRHSSCEV->isZero())
+      continue;
+
+    BinaryOperator *CmpBO = dyn_cast<BinaryOperator>(CmpLHS);
+    if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
+      continue;
+
+    Value *AndLHS = CmpBO->getOperand(0);
+    Value *AndRHS = CmpBO->getOperand(1);
+    const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
+    const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
+    if (isa<SCEVConstant>(AndLHSSCEV)) {
+      std::swap(AndLHS, AndRHS);
+      std::swap(AndLHSSCEV, AndRHSSCEV);
+    }
+
+    const SCEVConstant *MaskSCEV = dyn_cast<SCEVConstant>(AndRHSSCEV);
+    if (!MaskSCEV)
+      continue;
+
+    unsigned TrailingOnes =
+      MaskSCEV->getValue()->getValue().countTrailingOnes();
+    if (!TrailingOnes)
+      continue;
+
+    uint64_t Alignment;
+    TrailingOnes = std::min(TrailingOnes,
+      unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+    Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
+
+    Type *Int64Ty = Type::getInt64Ty(F.getContext());
+    const SCEV *AlignSCEV = SE->getConstant(Int64Ty, Alignment);
+
+    // The LHS might be a ptrtoint instruction, or it might be the pointer
+    // with an offset.
+    Value *AAPtr = 0;
+    const SCEV *OffSCEV = 0;
+    if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
+      AAPtr = PToI->getPointerOperand();
+      OffSCEV = SE->getConstant(Int64Ty, 0);
+    } else if (const SCEVAddExpr* AndLHSAddSCEV =
+               dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
+      // Try to find the ptrtoint; subtract it and the rest is the offset.
+      for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
+           JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
+        if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
+          if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
+            AAPtr = PToI->getPointerOperand();
+            OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
+            break;
+          }
+    }
+
+    if (!AAPtr)
+      continue;
+    
+    unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
+    if (OffSCEVBits < 64)
+      OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
+    else if (OffSCEVBits > 64)
+      continue;
+
+    AAPtr = AAPtr->stripPointerCasts();
+    const SCEV *AASCEV = SE->getSCEV(AAPtr);
+
+    // Apply the assumption to all other users of the specified pointer.
+    DenseSet<Instruction *> Visited;
+    SmallVector<Instruction*, 16> WorkList;
+    for (Value::use_iterator J = AAPtr->use_begin(),
+         JE = AAPtr->use_end(); J != JE; ++J) {
+      if (*J == *I)
+        continue;
+
+      if (Instruction *K = dyn_cast<Instruction>(*J))
+        WorkList.push_back(K);
+    }
+
+    while (!WorkList.empty()) {
+      Instruction *J = WorkList.pop_back_val();
+
+      if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
+        unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
+          LI->getPointerOperand(), SE);
+
+        if (NewAlignment > LI->getAlignment()) {
+          LI->setAlignment(NewAlignment);
+          ++NumLoadAlignChanged;
+          Changed = true;
+        }
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
+        unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
+          SI->getPointerOperand(), SE);
+
+        if (NewAlignment > SI->getAlignment()) {
+          SI->setAlignment(NewAlignment);
+          ++NumStoreAlignChanged;
+          Changed = true;
+        }
+      } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
+        unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
+          MI->getDest(), SE);
+
+        // For memory transfers, we need a common alignment for both the
+        // source and destination. If we have a new alignment for this
+        // instruction, but only for one operand, save it. If we reach the
+        // other operand through another assumption later, then we may
+        // change the alignment at that point.
+        if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+          unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
+            MTI->getSource(), SE);
+
+          DenseMap<MemTransferInst *, unsigned>::iterator DI =
+            NewDestAlignments.find(MTI);
+          unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ?
+                                      0 : DI->second;
+
+          DenseMap<MemTransferInst *, unsigned>::iterator SI =
+            NewSrcAlignments.find(MTI);
+          unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ?
+                                     0 : SI->second;
+
+          DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " <<
+                          AltDestAlignment << " " << NewSrcAlignment <<
+                          " " << AltSrcAlignment << "\n");
+
+          // If these four alignments, pick the largest possible...
+          unsigned NewAlignment = 0;
+          if (NewDestAlignment <= NewSrcAlignment ||
+              NewDestAlignment <= AltSrcAlignment)
+            NewAlignment = std::max(NewAlignment, NewDestAlignment);
+          if (AltDestAlignment <= NewSrcAlignment ||
+              AltDestAlignment <= AltSrcAlignment)
+            NewAlignment = std::max(NewAlignment, AltDestAlignment);
+          if (NewSrcAlignment <= NewDestAlignment ||
+              NewSrcAlignment <= AltDestAlignment)
+            NewAlignment = std::max(NewAlignment, NewSrcAlignment);
+          if (AltSrcAlignment <= NewDestAlignment ||
+              AltSrcAlignment <= AltDestAlignment)
+            NewAlignment = std::max(NewAlignment, AltSrcAlignment);
+
+          if (NewAlignment > MI->getAlignment()) {
+            MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
+              MI->getParent()->getContext()), NewAlignment));
+            ++NumMemIntAlignChanged;
+            Changed = true;
+          }
+
+          NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment));
+          NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment));
+        } else if (NewDestAlignment > MI->getAlignment()) {
+          MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
+            MI->getParent()->getContext()), NewDestAlignment));
+          ++NumMemIntAlignChanged;
+          Changed = true;
+        }
+      }
+
+      Visited.insert(J);
+      for (Value::use_iterator UJ = J->use_begin(), UE = J->use_end();
+           UJ != UE; ++UJ) {
+        Instruction *K = cast<Instruction>(*UJ);
+        if (!Visited.count(K))
+          WorkList.push_back(cast<Instruction>(*UJ));
+      }
+    }
+
+    Changed = true;
+  }
+
+  return Changed;
+}
+
Index: lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- lib/Transforms/Scalar/CMakeLists.txt
+++ lib/Transforms/Scalar/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMScalarOpts
   ADCE.cpp
+  AlignmentInvProp.cpp
   BasicBlockPlacement.cpp
   CodeGenPrepare.cpp
   ConstantProp.cpp
Index: lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- lib/Transforms/Scalar/Scalar.cpp
+++ lib/Transforms/Scalar/Scalar.cpp
@@ -28,6 +28,7 @@
 /// ScalarOpts library.
 void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeADCEPass(Registry);
+  initializeAlignmentInvPropPass(Registry);
   initializeBlockPlacementPass(Registry);
   initializeCodeGenPreparePass(Registry);
   initializeConstantPropagationPass(Registry);
@@ -76,6 +77,10 @@
   unwrap(PM)->add(createAggressiveDCEPass());
 }
 
+void LLVMAddAlignmentInvPropPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createAlignmentInvPropPass());
+}
+
 void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createCFGSimplificationPass());
 }
Index: lib/Transforms/Utils/Local.cpp
===================================================================
--- lib/Transforms/Utils/Local.cpp
+++ lib/Transforms/Utils/Local.cpp
@@ -286,6 +286,12 @@
     return true;
   }
 
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    // Invariants are dead if their condition is undef.
+    if (II->getIntrinsicID() == Intrinsic::invariant)
+      return isa<UndefValue>(II->getArgOperand(0));
+  }
+
   if (!I->mayHaveSideEffects()) return true;
 
   // Special case intrinsics that "may have side effects" but can be deleted
Index: test/Analysis/EphemeralValues/lit.local.cfg
===================================================================
--- /dev/null
+++ test/Analysis/EphemeralValues/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
Index: test/Analysis/EphemeralValues/simple.ll
===================================================================
--- /dev/null
+++ test/Analysis/EphemeralValues/simple.ll
@@ -0,0 +1,98 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -analyze -eph-values | FileCheck %s
+
+define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %0 = load i32* %a, align 4, !tbaa !0
+  ret i32 %0
+
+; CHECK: ephemeral: foo: entry:   %ptrint = ptrtoint i32* %a to i64
+; CHECK: ephemeral: foo: entry:   %maskedptr = and i64 %ptrint, 31
+; CHECK: ephemeral: foo: entry:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK: ephemeral: foo: entry:   tail call void @llvm.invariant(i1 %maskcond)
+}
+
+define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 24
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 2
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  ret i32 %0
+
+; CHECK: ephemeral: foo2: entry:   %ptrint = ptrtoint i32* %a to i64
+; CHECK: ephemeral: foo2: entry:   %offsetptr = add i64 %ptrint, 24
+; CHECK: ephemeral: foo2: entry:   %maskedptr = and i64 %offsetptr, 31
+; CHECK: ephemeral: foo2: entry:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK: ephemeral: foo2: entry:   tail call void @llvm.invariant(i1 %maskcond)
+}
+
+define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  tail call void @llvm.invariant(i1 %maskcond)
+  ret i32 %add.lcssa
+
+; CHECK: ephemeral: hoo: entry:   %ptrint = ptrtoint i32* %a to i64
+; CHECK: ephemeral: hoo: entry:   %maskedptr = and i64 %ptrint, 31
+; CHECK: ephemeral: hoo: entry:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK: ephemeral: hoo: for.end:   tail call void @llvm.invariant(i1 %maskcond)
+}
+
+define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr3 = and i64 %ptrint1, 127
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  tail call void @llvm.invariant(i1 %maskcond4)
+  %0 = bitcast i32* %a to i8*
+  %1 = bitcast i32* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK: ephemeral: moo2: entry:   %ptrint = ptrtoint i32* %a to i64
+; CHECK: ephemeral: moo2: entry:   %maskedptr = and i64 %ptrint, 31
+; CHECK: ephemeral: moo2: entry:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK: ephemeral: moo2: entry:   tail call void @llvm.invariant(i1 %maskcond)
+; CHECK: ephemeral: moo2: entry:   %ptrint1 = ptrtoint i32* %b to i64
+; CHECK: ephemeral: moo2: entry:   %maskedptr3 = and i64 %ptrint1, 127
+; CHECK: ephemeral: moo2: entry:   %maskcond4 = icmp eq i64 %maskedptr3, 0
+; CHECK: ephemeral: moo2: entry:   tail call void @llvm.invariant(i1 %maskcond4)
+}
+
+declare void @llvm.invariant(i1) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+
Index: test/Transforms/AlignmentInvProp/lit.local.cfg
===================================================================
--- /dev/null
+++ test/Transforms/AlignmentInvProp/lit.local.cfg
@@ -0,0 +1,2 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
Index: test/Transforms/AlignmentInvProp/simple.ll
===================================================================
--- /dev/null
+++ test/Transforms/AlignmentInvProp/simple.ll
@@ -0,0 +1,219 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -alignment-inv-prop -S | FileCheck %s
+
+define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %0 = load i32* %a, align 4, !tbaa !0
+  ret i32 %0
+
+; CHECK: @foo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 24
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 2
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  ret i32 %0
+
+; CHECK: @foo2
+; CHECK: load i32* {{[^,]+}}, align 16
+; CHECK: ret i32
+}
+
+define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 28
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 -1
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  ret i32 %0
+
+; CHECK: @foo2a
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %0 = load i32* %a, align 4, !tbaa !0
+  ret i32 %0
+
+; CHECK: @goo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  tail call void @llvm.invariant(i1 %maskcond)
+  ret i32 %add.lcssa
+
+; CHECK: @hoo
+; CHECK: load i32* %arrayidx, align 32
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  tail call void @llvm.invariant(i1 %maskcond)
+  ret i32 %add.lcssa
+
+; CHECK: @joo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  tail call void @llvm.invariant(i1 %maskcond)
+  ret i32 %add.lcssa
+
+; CHECK: @koo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  tail call void @llvm.invariant(i1 %maskcond)
+  ret i32 %add.lcssa
+
+; CHECK: @koo2
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @moo(i32* nocapture %a) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK: @moo
+; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.invariant(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr3 = and i64 %ptrint1, 127
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  tail call void @llvm.invariant(i1 %maskcond4)
+  %0 = bitcast i32* %a to i8*
+  %1 = bitcast i32* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK: @moo2
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+declare void @llvm.invariant(i1) nounwind readnone
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+
Index: test/Transforms/Inline/ephemeral.ll
===================================================================
--- /dev/null
+++ test/Transforms/Inline/ephemeral.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -Oz %s | FileCheck %s
+
+@a = global i32 4
+
+define i1 @inner() {
+  %a1 = load volatile i32* @a
+  %x1 = add i32 %a1, %a1
+  %c = icmp eq i32 %x1, 0
+
+  %a2 = mul i32 %a1, %a1
+  %a3 = sub i32 %a1, 5
+  %a4 = udiv i32 %a3, -13
+  %a5 = mul i32 %a4, %a4
+  %a6 = add i32 %a5, %x1
+  %ca = icmp sgt i32 %a6, -7
+  tail call void @llvm.invariant(i1 %ca)
+
+  ret i1 %c
+}
+
+; @inner() should be inlined for -Oz.
+; CHECK-NOT: call i1 @inner
+define i1 @outer() optsize {
+   %r = call i1 @inner()
+   ret i1 %r
+}
+
+declare void @llvm.invariant(i1) nounwind readnone
+