Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -6681,9 +6681,11 @@
 ::
 
       declare void @llvm.memcpy.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
-                                              i32 <len>, i32 <align>, i1 <isvolatile>)
+                                              i32 <len>, i32 <align>,
+                                              i1 <isdestvolatile>, i1 <issrcvolatile>)
       declare void @llvm.memcpy.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
-                                              i64 <len>, i32 <align>, i1 <isvolatile>)
+                                              i64 <len>, i32 <align>,
+                                              i1 <isdestvolatile>, i1 <issrcvolatile>)
 
 Overview:
 """""""""
@@ -6692,8 +6694,9 @@
 source location to the destination location.
 
 Note that, unlike the standard libc function, the ``llvm.memcpy.*``
-intrinsics do not return a value, takes extra alignment/isvolatile
-arguments and the pointers can be in specified address spaces.
+intrinsics do not return a value, takes extra
+alignment/isdestvolatile/issrcvolatile arguments and the pointers can be
+in specified address spaces.
 
 Arguments:
 """"""""""
@@ -6708,9 +6711,10 @@
 then the caller guarantees that both the source and destination pointers
 are aligned to that boundary.
 
-If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy`` call is
-a :ref:`volatile operation <volatile>`. The detailed access behavior is not
-very cleanly specified and it is unwise to depend on it.
+If the ``isdestvolatile`` parameter is ``true``, the '``llvm.memcpy.*``' call
+will perform :ref:`volatile accesses <volatile>` to the destination location.
+If the ``issrcvolatile`` parameter is ``true``, the '``llvm.memcpy.*``' call
+will perform :ref:`volatile accesses <volatile>` to the source location.
 
 Semantics:
 """"""""""
@@ -6721,6 +6725,13 @@
 to be aligned to some boundary, this can be specified as the fourth
 argument, otherwise it should be set to 0 or 1.
 
+In case of volatile accesses, it is guaranteed that each byte loaded is loaded
+exactly once when ``issrcvolatile`` is ``true`` and that each byte stored is
+stored exactly once when ``isdestvolatile`` is ``true``, but these operations
+are free to be batched into 4-byte, 8-byte, or any other width of loads and
+stores. There is no guarantee about the interleaving of these loads and stores
+even when both are volatile.
+
 '``llvm.memmove``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -6734,9 +6745,11 @@
 ::
 
       declare void @llvm.memmove.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
-                                               i32 <len>, i32 <align>, i1 <isvolatile>)
+                                               i32 <len>, i32 <align>,
+                                               i1 <isdestvolatile>, i1 <issrcvolatile>)
       declare void @llvm.memmove.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
-                                               i64 <len>, i32 <align>, i1 <isvolatile>)
+                                               i64 <len>, i32 <align>,
+                                               i1 <isvolatile>, i1 <issrcvolatile>)
 
 Overview:
 """""""""
@@ -6747,8 +6760,9 @@
 overlap.
 
 Note that, unlike the standard libc function, the ``llvm.memmove.*``
-intrinsics do not return a value, takes extra alignment/isvolatile
-arguments and the pointers can be in specified address spaces.
+intrinsics do not return a value, takes extra
+alignment/isdestvolatile/issrcvolatile arguments and the pointers can be
+in specified address spaces.
 
 Arguments:
 """"""""""
@@ -6763,9 +6777,10 @@
 then the caller guarantees that the source and destination pointers are
 aligned to that boundary.
 
-If the ``isvolatile`` parameter is ``true``, the ``llvm.memmove`` call
-is a :ref:`volatile operation <volatile>`. The detailed access behavior is
-not very cleanly specified and it is unwise to depend on it.
+If the ``isdestvolatile`` parameter is ``true``, the '``llvm.memmove.*``' call
+will perform :ref:`volatile accesses <volatile>` to the destination location.
+If the ``issrcvolatile`` parameter is ``true``, the '``llvm.memmove.*``' call
+will perform :ref:`volatile accesses <volatile>` to the source location.
 
 Semantics:
 """"""""""
@@ -6776,6 +6791,13 @@
 aligned to some boundary, this can be specified as the fourth argument,
 otherwise it should be set to 0 or 1.
 
+In case of volatile accesses, it is guaranteed that each byte loaded is loaded
+exactly once when ``issrcvolatile`` is ``true`` and that each byte stored is
+stored exactly once when ``isdestvolatile`` is ``true``, but these operations
+are free to be batched into 4-byte, 8-byte, or any other width of loads and
+stores. There is no guarantee about the interleaving of these loads and stores
+even when both are volatile.
+
 '``llvm.memset.*``' Intrinsics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -6789,9 +6811,9 @@
 ::
 
       declare void @llvm.memset.p0i8.i32(i8* <dest>, i8 <val>,
-                                         i32 <len>, i32 <align>, i1 <isvolatile>)
+                                         i32 <len>, i32 <align>, i1 <isdestvolatile>)
       declare void @llvm.memset.p0i8.i64(i8* <dest>, i8 <val>,
-                                         i64 <len>, i32 <align>, i1 <isvolatile>)
+                                         i64 <len>, i32 <align>, i1 <isdestvolatile>)
 
 Overview:
 """""""""
@@ -6815,9 +6837,8 @@
 then the caller guarantees that the destination pointer is aligned to
 that boundary.
 
-If the ``isvolatile`` parameter is ``true``, the ``llvm.memset`` call is
-a :ref:`volatile operation <volatile>`. The detailed access behavior is not
-very cleanly specified and it is unwise to depend on it.
+If the ``isdestvolatile`` parameter is ``true``, the ``llvm.memset`` call is
+a :ref:`volatile operation <volatile>`.
 
 Semantics:
 """"""""""
@@ -6827,6 +6848,10 @@
 some boundary, this can be specified as the fourth argument, otherwise
 it should be set to 0 or 1.
 
+In case of volatile accesses, it is guaranteed that each byte stored is stored
+exactly once, and these stores are free to be batched into 4-byte, 8-byte,
+or any other width of stores.
+
 '``llvm.sqrt.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
Index: include/llvm/IR/IntrinsicInst.h
===================================================================
--- include/llvm/IR/IntrinsicInst.h
+++ include/llvm/IR/IntrinsicInst.h
@@ -128,11 +128,19 @@
       return getAlignmentCst()->getZExtValue();
     }
 
-    ConstantInt *getVolatileCst() const {
-      return cast<ConstantInt>(const_cast<Value*>(getArgOperand(4)));
+    bool isDestVolatile() const {
+      return !cast<ConstantInt>(const_cast<Value*>(getArgOperand(4)))->isZero();
     }
+
     bool isVolatile() const {
-      return !getVolatileCst()->isZero();
+      switch (getIntrinsicID()) {
+        case Intrinsic::memcpy:
+        case Intrinsic::memmove:
+          return isDestVolatile() ||
+            !cast<ConstantInt>(const_cast<Value*>(getArgOperand(5)))->isZero();
+        default:
+          return isDestVolatile();
+      }
     }
 
     unsigned getDestAddressSpace() const {
@@ -162,10 +170,18 @@
       setArgOperand(3, A);
     }
 
-    void setVolatile(Constant* V) {
+    void setDestVolatile(Constant* V) {
       setArgOperand(4, V);
     }
 
+    void setVolatile(Constant* V) {
+      setDestVolatile(V);
+      if (getIntrinsicID()==Intrinsic::memcpy ||
+          getIntrinsicID()==Intrinsic::memmove) {
+        setArgOperand(5, V);
+      }
+    }
+
     Type *getAlignmentType() const {
       return getArgOperand(3)->getType();
     }
@@ -211,6 +227,7 @@
   /// MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics.
   ///
   class MemTransferInst : public MemIntrinsic {
+
   public:
     /// get* - Return the arguments to the instruction.
     ///
@@ -231,6 +248,14 @@
       setArgOperand(1, Ptr);
     }
 
+    bool isSourceVolatile() const {
+      return !cast<ConstantInt>(const_cast<Value*>(getArgOperand(5)))->isZero();
+    }
+
+    void setSourceVolatile(Constant* V) {
+      setArgOperand(5, V);
+    }
+
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const IntrinsicInst *I) {
       return I->getIntrinsicID() == Intrinsic::memcpy ||
Index: include/llvm/IR/Intrinsics.td
===================================================================
--- include/llvm/IR/Intrinsics.td
+++ include/llvm/IR/Intrinsics.td
@@ -252,11 +252,11 @@
 
 def int_memcpy  : Intrinsic<[],
                              [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
-                              llvm_i32_ty, llvm_i1_ty],
+                              llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
                             [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
 def int_memmove : Intrinsic<[],
                             [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
-                             llvm_i32_ty, llvm_i1_ty],
+                             llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
                             [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
 def int_memset  : Intrinsic<[],
                             [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4469,7 +4469,8 @@
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     if (!Align)
       Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
-    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    const MemCpyInst &mcpy = cast<MemCpyInst>(I);
+    bool isVol = mcpy.isSourceVolatile() || mcpy.isDestVolatile();
     DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
                               MachinePointerInfo(I.getArgOperand(0)),
                               MachinePointerInfo(I.getArgOperand(1))));
@@ -4487,7 +4488,7 @@
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     if (!Align)
       Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
-    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    bool isVol = cast<MemSetInst>(I).isDestVolatile();
     DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
                               MachinePointerInfo(I.getArgOperand(0))));
     return 0;
@@ -4506,7 +4507,8 @@
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     if (!Align)
       Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
-    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    const MemMoveInst &mm = cast<MemMoveInst>(I);
+    bool isVol = mm.isSourceVolatile() || mm.isDestVolatile();
     DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
                                MachinePointerInfo(I.getArgOperand(0)),
                                MachinePointerInfo(I.getArgOperand(1))));
Index: lib/IR/AutoUpgrade.cpp
===================================================================
--- lib/IR/AutoUpgrade.cpp
+++ lib/IR/AutoUpgrade.cpp
@@ -88,6 +88,23 @@
     }
     break;
   }
+  case 'm': {
+    if (Name.startswith("memcpy.") && F->arg_size() == 5) {
+      F->setName(Name + ".old");
+      const FunctionType* FT = F->getFunctionType();
+      Type *Tys[] = { FT->getParamType(0), FT->getParamType(1), FT->getParamType(2) };
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy, Tys);
+      return true;
+    }
+    if (Name.startswith("memmove.") && F->arg_size() == 5) {
+      F->setName(Name + ".old");
+      const FunctionType* FT = F->getFunctionType();
+      Type *Tys[] = { FT->getParamType(0), FT->getParamType(1), FT->getParamType(2) };
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove, Tys);
+      return true;
+    }
+    break;
+  }
   case 'x': {
     if (Name.startswith("x86.sse2.pcmpeq.") ||
         Name.startswith("x86.sse2.pcmpgt.") ||
@@ -302,7 +319,8 @@
   }
 
   std::string Name = CI->getName().str();
-  CI->setName(Name + ".old");
+  if (!CI->getType()->isVoidTy())
+    CI->setName(Name + ".old");
 
   switch (NewFn->getIntrinsicID()) {
   default:
@@ -317,6 +335,35 @@
     CI->eraseFromParent();
     return;
 
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove:
+    if (CI->getNumArgOperands() == 5) {
+      Value *Args[] = {
+        CI->getArgOperand(0),
+        CI->getArgOperand(1),
+        CI->getArgOperand(2),
+        CI->getArgOperand(3),
+        CI->getArgOperand(4), // isDestVolatile
+        CI->getArgOperand(4)  // isSrcVolatile
+      };
+
+      CallInst *NCI = Builder.CreateCall(NewFn, Args, Name);
+
+      NCI->setTailCall( CI->isTailCall());
+
+      if (CI->hasMetadata()) {
+        SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+        CI->getAllMetadata(MDs);
+        for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+             MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
+          NCI->setMetadata(MI->first, MI->second);
+      }
+
+      CI->replaceAllUsesWith(NCI);
+      CI->eraseFromParent();
+    }
+    return;
+
   case Intrinsic::arm_neon_vclz: {
     // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
     CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
Index: lib/IR/IRBuilder.cpp
===================================================================
--- lib/IR/IRBuilder.cpp
+++ lib/IR/IRBuilder.cpp
@@ -84,7 +84,8 @@
   Dst = getCastedInt8PtrValue(Dst);
   Src = getCastedInt8PtrValue(Src);
 
-  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align),
+                   getInt1(isVolatile), getInt1(isVolatile) };
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
   Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
@@ -108,7 +109,8 @@
   Dst = getCastedInt8PtrValue(Dst);
   Src = getCastedInt8PtrValue(Src);
   
-  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align),
+                   getInt1(isVolatile), getInt1(isVolatile) };
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
   Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
Index: lib/IR/Verifier.cpp
===================================================================
--- lib/IR/Verifier.cpp
+++ lib/IR/Verifier.cpp
@@ -2108,8 +2108,12 @@
             "alignment argument of memory intrinsics must be a constant int",
             &CI);
     Assert1(isa<ConstantInt>(CI.getArgOperand(4)),
-            "isvolatile argument of memory intrinsics must be a constant int",
+            "isdestvolatile argument of memory intrinsics must be a constant int",
             &CI);
+    if (ID!=Intrinsic::memset)
+      Assert1( isa<ConstantInt>(CI.getArgOperand(5)),
+              "issrcvolatile argument of memory intrinsics must be a constant int",
+              &CI);
     break;
   case Intrinsic::gcroot:
   case Intrinsic::gcwrite:
Index: lib/Target/ARM/ARMFastISel.cpp
===================================================================
--- lib/Target/ARM/ARMFastISel.cpp
+++ lib/Target/ARM/ARMFastISel.cpp
@@ -2340,9 +2340,12 @@
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     // If we're lowering a memory intrinsic instead of a regular call, skip the
-    // last two arguments, which shouldn't be passed to the underlying function.
-    if (IntrMemName && e-i <= 2)
-      break;
+    // last arguments, which shouldn't be passed to the underlying function.
+    if (IntrMemName) {
+      const unsigned numVolatileSpec = isa<MemTransferInst>(I) ? 2 : 1;
+      if ( e-i <= 1+numVolatileSpec)
+        break;
+    }
 
     ISD::ArgFlagsTy Flags;
     unsigned AttrInd = i - CS.arg_begin() + 1;
Index: lib/Transforms/Utils/InlineFunction.cpp
===================================================================
--- lib/Transforms/Utils/InlineFunction.cpp
+++ lib/Transforms/Utils/InlineFunction.cpp
@@ -402,7 +402,8 @@
   Value *CallArgs[] = {
     DestCast, SrcCast, Size,
     ConstantInt::get(Type::getInt32Ty(Context), 1),
-    ConstantInt::getFalse(Context) // isVolatile
+    ConstantInt::getFalse(Context), // isDestVolatile
+    ConstantInt::getFalse(Context)  // isSrcVolatile
   };
   IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
   
Index: test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
===================================================================
--- test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
+++ test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -6,7 +6,7 @@
 ; it has a TBAA tag which declares that it is unrelated.
 
 ; CHECK: @foo
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false, i1 false), !tbaa !0
 ; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa !2
 ; CHECK-NEXT: ret void
 define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind {
Index: test/Assembler/auto_upgrade_intrinsics.ll
===================================================================
--- test/Assembler/auto_upgrade_intrinsics.ll
+++ test/Assembler/auto_upgrade_intrinsics.ll
@@ -42,3 +42,29 @@
 
   ret void
 }
+
+define void @test.memcpy(i8* %dst, i8* %src, i64 %len) {
+; CHECK: @test.memcpy
+
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64( i8* %dst, i8* %src, i64 %len, i32 1, i1 false)
+  ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %len, i32 1, i1 false, i1 false)
+
+  ret void
+}
+
+define void @test.memmove(i8* %dst, i8* %src, i64 %len) {
+; CHECK: @test.memmove
+
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64( i8* %dst, i8* %src, i64 %len, i32 1, i1 false)
+  ; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %len, i32 1, i1 false, i1 false)
+
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64( i8* %dst, i8* %src, i64 %len, i32, i1)
+; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1, i1)
+
+declare void @llvm.memmove.p0i8.p0i8.i64( i8* %dst, i8* %src, i64 %len, i32, i1)
+; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1, i1)
Index: test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
===================================================================
--- test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -45,7 +45,7 @@
 ; CHECK: @write32to36
 entry:
   %0 = bitcast %struct.vec2plusi* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false, i1 false)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
   %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2
   store i32 1, i32* %c, align 4
@@ -56,7 +56,7 @@
 ; CHECK: @write16to32
 entry:
   %0 = bitcast %struct.vec2* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false, i1 false)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
   %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1
   store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
@@ -67,7 +67,7 @@
 ; CHECK: @dontwrite28to32memcpy
 entry:
   %0 = bitcast %struct.vec2* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false, i1 false)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
   %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7
   store i32 1, i32* %arrayidx1, align 4
@@ -87,7 +87,7 @@
   %add.ptr = getelementptr inbounds %struct.trapframe* %0, i64 -1
   %1 = bitcast %struct.trapframe* %add.ptr to i8*
   %2 = bitcast %struct.trapframe* %md_regs to i8*
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false, i1 false)
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
   %tf_trapno = getelementptr inbounds %struct.trapframe* %0, i64 -1, i32 1
   store i64 3, i64* %tf_trapno, align 8
Index: test/Transforms/InstCombine/objsize.ll
===================================================================
--- test/Transforms/InstCombine/objsize.ll
+++ test/Transforms/InstCombine/objsize.ll
@@ -129,7 +129,7 @@
   %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false, i1 false)
   %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
   ret i8* %0
 }
Index: test/Transforms/InstCombine/sprintf-1.ll
===================================================================
--- test/Transforms/InstCombine/sprintf-1.ll
+++ test/Transforms/InstCombine/sprintf-1.ll
@@ -22,7 +22,7 @@
 ; CHECK: @test_simplify1
   %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false, i1 false)
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -66,7 +66,7 @@
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8* %str)
 ; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
 ; CHECK-NEXT: [[LENINC:%[a-z0-9]+]] = add i32 [[STRLEN]], 1
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %str, i32 [[LENINC]], i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %str, i32 [[LENINC]], i32 1, i1 false, i1 false)
   ret void
 ; CHECK-NEXT: ret void
 }
Index: test/Transforms/LoopIdiom/basic.ll
===================================================================
--- test/Transforms/LoopIdiom/basic.ll
+++ test/Transforms/LoopIdiom/basic.ll
@@ -159,7 +159,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK: @test6
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
Index: test/Transforms/MemCpyOpt/align.ll
===================================================================
--- test/Transforms/MemCpyOpt/align.ll
+++ test/Transforms/MemCpyOpt/align.ll
@@ -26,7 +26,7 @@
 define void @bar() {
 ; CHECK: @bar
 ; CHECK: %a4 = alloca i32, align 8
-; CHECK-NOT: memcpy
+; CHECK-NOT: call void @llvm.memcpy
   %a4 = alloca i32, align 4
   %a8 = alloca i32, align 8
   %a8.cast = bitcast i32* %a8 to i8*
Index: test/Transforms/MemCpyOpt/smaller.ll
===================================================================
--- test/Transforms/MemCpyOpt/smaller.ll
+++ test/Transforms/MemCpyOpt/smaller.ll
@@ -4,7 +4,7 @@
 ; Memcpyopt shouldn't optimize the second memcpy using the first
 ; because the first has a smaller size.
 
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false, i1 false)
 
 target datalayout = "e-p:32:32:32"
 
Index: test/Verifier/2006-12-12-IntrinsicDefine.ll
===================================================================
--- test/Verifier/2006-12-12-IntrinsicDefine.ll
+++ test/Verifier/2006-12-12-IntrinsicDefine.ll
@@ -2,7 +2,7 @@
 ; CHECK: llvm intrinsics cannot be defined
 ; PR1047
 
-define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) {
+define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1, i1) {
 entry:
 	ret void
 }