Index: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -503,8 +503,11 @@
 
   /// If it is an interesting memory access, return the PointerOperand
   /// and set IsWrite/Alignment. Otherwise return nullptr.
+  /// MaybeMask is an output parameter for the mask Value, if we're looking at a
+  /// masked load/store.
   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
-                                   uint64_t *TypeSize, unsigned *Alignment);
+                                   uint64_t *TypeSize, unsigned *Alignment,
+                                   Value **MaybeMask = nullptr);
   void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
                      bool UseCalls, const DataLayout &DL);
   void instrumentPointerComparisonOrSubtraction(Instruction *I);
@@ -998,12 +1001,11 @@
   return IsInteresting;
 }
 
-/// If I is an interesting memory access, return the PointerOperand
-/// and set IsWrite/Alignment. Otherwise return nullptr.
 Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
                                                    bool *IsWrite,
                                                    uint64_t *TypeSize,
-                                                   unsigned *Alignment) {
+                                                   unsigned *Alignment,
+                                                   Value **MaybeMask) {
   // Skip memory accesses inserted by another instrumentation.
   if (I->getMetadata("nosanitize")) return nullptr;
 
@@ -1037,6 +1039,33 @@
     *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
     *Alignment = 0;
     PtrOperand = XCHG->getPointerOperand();
+  } else if (auto CI = dyn_cast<CallInst>(I)) {
+    auto *F = dyn_cast<Function>(CI->getCalledValue());
+    if (F && (F->getName().startswith("llvm.masked.load.") ||
+              F->getName().startswith("llvm.masked.store."))) {
+      unsigned OpOffset = 0;
+      if (F->getName().startswith("llvm.masked.store.")) {
+        // Masked store has an initial operand for the value.
+        OpOffset = 1;
+        *IsWrite = true;
+      } else {
+        *IsWrite = false;
+      }
+      // Only instrument if the mask is constant for now.
+      if (isa<ConstantVector>(CI->getOperand(2 + OpOffset))) {
+        auto BasePtr = CI->getOperand(0 + OpOffset);
+        auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+        *TypeSize = DL.getTypeStoreSizeInBits(Ty);
+        if (auto AlignmentConstant =
+                dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+          *Alignment = (unsigned)AlignmentConstant->getZExtValue();
+        else
+          *Alignment = 1; // No alignment guarantees. We probably got Undef
+        if (MaybeMask)
+          *MaybeMask = CI->getOperand(2 + OpOffset);
+        PtrOperand = BasePtr;
+      }
+    }
   }
 
   // Do not instrument acesses from different address spaces; we cannot deal
@@ -1095,13 +1124,55 @@
   IRB.CreateCall(F, Param);
 }
 
+static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
+                                Value *Addr, unsigned Alignment,
+                                unsigned Granularity, uint32_t TypeSize,
+                                bool IsWrite, Value *SizeArgument,
+                                bool UseCalls, uint32_t Exp) {
+  // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
+  // if the data is properly aligned.
+  if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
+       TypeSize == 128) &&
+      (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
+    return Pass->instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr,
+                                   UseCalls, Exp);
+  Pass->instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
+                                         UseCalls, Exp);
+}
+
+static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
+                                        const DataLayout &DL, Type *IntptrTy,
+                                        ConstantVector *Mask, Instruction *I,
+                                        Value *Addr, unsigned Alignment,
+                                        unsigned Granularity, uint32_t TypeSize,
+                                        bool IsWrite, Value *SizeArgument,
+                                        bool UseCalls, uint32_t Exp) {
+  auto *VTy = cast<PointerType>(Addr->getType())->getElementType();
+  uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
+  unsigned Num = VTy->getVectorNumElements();
+  auto Zero = ConstantInt::get(IntptrTy, 0);
+  for (unsigned Idx = 0; Idx < Num; ++Idx) {
+    // dyn_cast as we might get UndefValue
+    auto Masked = dyn_cast<ConstantInt>(Mask->getOperand(Idx));
+    if (Masked && Masked->isAllOnesValue()) {
+      IRBuilder<> IRB(I);
+      auto InstrumentedAddress =
+          IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+      doInstrumentAddress(Pass, I, InstrumentedAddress, Alignment, Granularity,
+                          ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp);
+    }
+  }
+}
+
 void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
                                      Instruction *I, bool UseCalls,
                                      const DataLayout &DL) {
   bool IsWrite = false;
   unsigned Alignment = 0;
   uint64_t TypeSize = 0;
-  Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment);
+  Value *MaybeMask = nullptr;
+  Value *Addr =
+      isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
   assert(Addr);
 
   // Optimization experiments.
@@ -1143,15 +1214,15 @@
     NumInstrumentedReads++;
 
   unsigned Granularity = 1 << Mapping.Scale;
-  // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
-  // if the data is properly aligned.
-  if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
-       TypeSize == 128) &&
-      (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
-    return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls,
-                             Exp);
-  instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
-                                   UseCalls, Exp);
+  if (MaybeMask) {
+    auto Mask = cast<ConstantVector>(MaybeMask);
+    instrumentMaskedLoadOrStore(this, DL, IntptrTy, Mask, I, Addr, Alignment,
+                                Granularity, TypeSize, IsWrite, nullptr,
+                                UseCalls, Exp);
+  } else {
+    doInstrumentAddress(this, I, Addr, Alignment, Granularity, TypeSize,
+                        IsWrite, nullptr, UseCalls, Exp);
+  }
 }
 
 Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
Index: llvm/trunk/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
===================================================================
--- llvm/trunk/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
@@ -0,0 +1,127 @@
+; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s
+; Support ASan instrumentation for constant-mask llvm.masked.{load,store}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@v4f32 = global <4 x float>* zeroinitializer, align 8
+@v8i32 = global <8 x i32>* zeroinitializer, align 8
+@v4i64 = global <4 x i32*>* zeroinitializer, align 8
+
+;;;;;;;;;;;;;;;; STORE
+declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) argmemonly nounwind
+declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) argmemonly nounwind
+declare void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*>, <4 x i32*>*, i32, <4 x i1>) argmemonly nounwind
+
+define void @store.v4f32.1110(<4 x float> %arg) sanitize_address {
+; CHECK-LABEL: @store.v4f32.1110
+  %p = load <4 x float>*, <4 x float>** @v4f32, align 8
+; CHECK: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0
+; CHECK: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64
+; CHECK: call void @__asan_store4(i64 [[PGEP0]])
+; CHECK: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1
+; CHECK: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64
+; CHECK: call void @__asan_store4(i64 [[PGEP1]])
+; CHECK: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2
+; CHECK: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64
+; CHECK: call void @__asan_store4(i64 [[PGEP2]])
+; CHECK: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
+  tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
+  ret void
+}
+
+define void @store.v8i32.10010110(<8 x i32> %arg) sanitize_address {
+; CHECK-LABEL: @store.v8i32.10010110
+  %p = load <8 x i32>*, <8 x i32>** @v8i32, align 8
+; CHECK: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 0
+; CHECK: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP0]] to i64
+; CHECK: call void @__asan_store4(i64 [[PGEP0]])
+; CHECK: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 3
+; CHECK: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP3]] to i64
+; CHECK: call void @__asan_store4(i64 [[PGEP3]])
+; CHECK: [[GEP5:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 5
+; CHECK: [[PGEP5:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP5]] to i64
+; CHECK: call void @__asan_store4(i64 [[PGEP5]])
+; CHECK: [[GEP6:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 6
+; CHECK: [[PGEP6:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP6]] to i64
+; CHECK: call void @__asan_store4(i64 [[PGEP6]])
+; CHECK: tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %arg, <8 x i32>* %p, i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
+  tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %arg, <8 x i32>* %p, i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
+  ret void
+}
+
+define void @store.v4i64.0001(<4 x i32*> %arg) sanitize_address {
+; CHECK-LABEL: @store.v4i64.0001
+  %p = load <4 x i32*>*, <4 x i32*>** @v4i64, align 8
+; CHECK: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x i32*>, <4 x i32*>* %p, i64 0, i64 3
+; CHECK: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32** [[GEP3]] to i64
+; CHECK: call void @__asan_store8(i64 [[PGEP3]])
+; CHECK: tail call void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*> %arg, <4 x i32*>* %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
+  tail call void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*> %arg, <4 x i32*>* %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
+  ret void
+}
+
+define void @store.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) sanitize_address {
+; CHECK-LABEL: @store.v4f32.variable
+  %p = load <4 x float>*, <4 x float>** @v4f32, align 8
+; CHECK-NOT: call void @__asan_store
+  tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> %mask)
+  ret void
+}
+
+;;;;;;;;;;;;;;;; LOAD
+declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) argmemonly nounwind
+declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>) argmemonly nounwind
+declare <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>*, i32, <4 x i1>, <4 x i32*>) argmemonly nounwind
+
+define <8 x i32> @load.v8i32.11100001(<8 x i32> %arg) sanitize_address {
+; CHECK-LABEL: @load.v8i32.11100001
+  %p = load <8 x i32>*, <8 x i32>** @v8i32, align 8
+; CHECK: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 0
+; CHECK: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP0]] to i64
+; CHECK: call void @__asan_load4(i64 [[PGEP0]])
+; CHECK: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 1
+; CHECK: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP1]] to i64
+; CHECK: call void @__asan_load4(i64 [[PGEP1]])
+; CHECK: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 2
+; CHECK: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP2]] to i64
+; CHECK: call void @__asan_load4(i64 [[PGEP2]])
+; CHECK: [[GEP7:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 7
+; CHECK: [[PGEP7:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP7]] to i64
+; CHECK: call void @__asan_load4(i64 [[PGEP7]])
+; CHECK: tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> %arg)
+  %res = tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> %arg)
+  ret <8 x i32> %res
+}
+
+define <4 x float> @load.v4f32.1001(<4 x float> %arg) sanitize_address {
+; CHECK-LABEL: @load.v4f32.1001
+  %p = load <4 x float>*, <4 x float>** @v4f32, align 8
+; CHECK: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0
+; CHECK: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64
+; CHECK: call void @__asan_load4(i64 [[PGEP0]])
+; CHECK: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3
+; CHECK: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64
+; CHECK: call void @__asan_load4(i64 [[PGEP3]])
+; CHECK: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %arg)
+  %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %arg)
+  ret <4 x float> %res
+}
+
+define <4 x i32*> @load.v4i64.0001(<4 x i32*> %arg) sanitize_address {
+; CHECK-LABEL: @load.v4i64.0001
+  %p = load <4 x i32*>*, <4 x i32*>** @v4i64, align 8
+; CHECK: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x i32*>, <4 x i32*>* %p, i64 0, i64 3
+; CHECK: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32** [[GEP3]] to i64
+; CHECK: call void @__asan_load8(i64 [[PGEP3]])
+; CHECK: tail call <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>* %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32*> %arg)
+  %res = tail call <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>* %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32*> %arg)
+  ret <4 x i32*> %res
+}
+
+define <4 x float> @load.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) sanitize_address {
+; CHECK-LABEL: @load.v4f32.variable
+  %p = load <4 x float>*, <4 x float>** @v4f32, align 8
+; CHECK-NOT: call void @__asan_load
+  %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> %mask, <4 x float> %arg)
+  ret <4 x float> %res
+}