diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -392,6 +392,14 @@
   0x1C0000000000,  // OriginBase
 };
 
+// s390x Linux
+static const MemoryMapParams Linux_S390X_MemoryMapParams = {
+    0xC00000000000, // AndMask
+    0,              // XorMask (not used)
+    0x080000000000, // ShadowBase
+    0x1C0000000000, // OriginBase
+};
+
 // aarch64 Linux
 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
   0,               // AndMask (not used)
@@ -439,6 +447,11 @@
   &Linux_PowerPC64_MemoryMapParams,
 };
 
+static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
+    nullptr,
+    &Linux_S390X_MemoryMapParams,
+};
+
 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
   nullptr,
   &Linux_AArch64_MemoryMapParams,
@@ -484,6 +497,7 @@
   friend struct VarArgMIPS64Helper;
   friend struct VarArgAArch64Helper;
   friend struct VarArgPowerPC64Helper;
+  friend struct VarArgSystemZHelper;
 
   void initializeModule(Module &M);
   void initializeCallbacks(Module &M);
@@ -796,14 +810,25 @@
        AccessSizeIndex++) {
     unsigned AccessSize = 1 << AccessSizeIndex;
     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
+    SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs;
+    MaybeWarningFnAttrs.push_back(std::make_pair(
+        AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
+    MaybeWarningFnAttrs.push_back(std::make_pair(
+        AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt)));
     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
-        FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
-        IRB.getInt32Ty());
+        FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs),
+        IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
 
     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
+    SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs;
+    MaybeStoreOriginFnAttrs.push_back(std::make_pair(
+        AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
+    MaybeStoreOriginFnAttrs.push_back(std::make_pair(
+        AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt)));
     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
-        FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
-        IRB.getInt8PtrTy(), IRB.getInt32Ty());
+        FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs),
+        IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
+        IRB.getInt32Ty());
   }
 
   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
@@ -924,6 +949,9 @@
           case Triple::ppc64le:
             MapParams = Linux_PowerPC_MemoryMapParams.bits64;
             break;
+          case Triple::systemz:
+            MapParams = Linux_S390_MemoryMapParams.bits64;
+            break;
           case Triple::aarch64:
           case Triple::aarch64_be:
             MapParams = Linux_ARM_MemoryMapParams.bits64;
@@ -4599,6 +4627,306 @@
   }
 };
 
+/// SystemZ-specific implementation of VarArgHelper.
+struct VarArgSystemZHelper : public VarArgHelper {
+  static const unsigned SystemZGpOffset = 16;
+  static const unsigned SystemZGpEndOffset = 56;
+  static const unsigned SystemZFpOffset = 128;
+  static const unsigned SystemZFpEndOffset = 160;
+  static const unsigned SystemZMaxVrArgs = 8;
+  static const unsigned SystemZRegSaveAreaSize = 160;
+  static const unsigned SystemZOverflowOffset = 160;
+  static const unsigned SystemZVAListTagSize = 32;
+  static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
+  static const unsigned SystemZRegSaveAreaPtrOffset = 24;
+
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy = nullptr;
+  Value *VAArgTLSOriginCopy = nullptr;
+  Value *VAArgOverflowSize = nullptr;
+
+  SmallVector<CallInst *, 16> VAStartInstrumentationList;
+
+  enum class ArgKind { GeneralPurpose, FloatingPoint, Vector, Memory };
+
+  enum class ShadowExtension { None, Zero, Sign };
+
+  VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
+                      MemorySanitizerVisitor &MSV)
+      : F(F), MS(MS), MSV(MSV) {}
+
+  ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
+    // T is a SystemZABIInfo::classifyArgumentType() output, and there are
+    // only a few possibilities of what it can be. In particular, enums, single
+    // element structs and large types have already been taken care of.
+    if (T->isFloatingPointTy())
+      return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
+    if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+      return ArgKind::GeneralPurpose;
+    if (T->isPointerTy())
+      return ArgKind::GeneralPurpose;
+    if (T->isVectorTy())
+      return ArgKind::Vector;
+    return ArgKind::Memory;
+  }
+
+  ShadowExtension getShadowExtension(const CallSite &CS, unsigned ArgNo) {
+    // ABI says: "One of the simple integer types no more than 64 bits wide.
+    // ... If such an argument is shorter than 64 bits, replace it by a full
+    // 64-bit integer representing the same number, using sign or zero
+    // extension". Shadow for an integer argument has the same type as the
+    // argument itself, so it can be sign or zero extended as well.
+    bool ZExt = CS.paramHasAttr(ArgNo, Attribute::ZExt);
+    bool SExt = CS.paramHasAttr(ArgNo, Attribute::SExt);
+    if (ZExt) {
+      assert(!SExt);
+      return ShadowExtension::Zero;
+    }
+    if (SExt) {
+      assert(!ZExt);
+      return ShadowExtension::Sign;
+    }
+    return ShadowExtension::None;
+  }
+
+  void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+    bool IsSoftFloatABI = CS.getCalledFunction()
+                              ->getFnAttribute("use-soft-float")
+                              .getValueAsString() == "true";
+    unsigned GpOffset = SystemZGpOffset;
+    unsigned FpOffset = SystemZFpOffset;
+    unsigned VrIndex = 0;
+    unsigned OverflowOffset = SystemZOverflowOffset;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+         ArgIt != End; ++ArgIt) {
+      Value *A = *ArgIt;
+      unsigned ArgNo = CS.getArgumentNo(ArgIt);
+      bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
+      // SystemZABIInfo does not produce ByVal parameters.
+      assert(!CS.paramHasAttr(ArgNo, Attribute::ByVal));
+      Type *T = A->getType();
+      // Some fp128 arguments are converted to pointers only in the back end.
+      if (T->isFP128Ty())
+        T = PointerType::get(T, 0);
+      ArgKind AK = classifyArgument(T, IsSoftFloatABI);
+      if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
+        AK = ArgKind::Memory;
+      if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
+        AK = ArgKind::Memory;
+      if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
+        AK = ArgKind::Memory;
+      Value *ShadowBase = nullptr;
+      Value *OriginBase = nullptr;
+      ShadowExtension SE = ShadowExtension::None;
+      switch (AK) {
+      case ArgKind::GeneralPurpose: {
+        // Always keep track of GpOffset, but store shadow only for varargs.
+        uint64_t ArgSize = 8;
+        if (GpOffset + ArgSize <= kParamTLSSize) {
+          if (!IsFixed) {
+            SE = getShadowExtension(CS, ArgNo);
+            uint64_t GapSize = 0;
+            if (SE == ShadowExtension::None) {
+              uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
+              assert(ArgAllocSize <= ArgSize);
+              GapSize = ArgSize - ArgAllocSize;
+            }
+            ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
+            if (MS.TrackOrigins)
+              OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
+          }
+          GpOffset += ArgSize;
+        } else {
+          GpOffset = kParamTLSSize;
+        }
+        break;
+      }
+      case ArgKind::FloatingPoint: {
+        // Always keep track of FpOffset, but store shadow only for varargs.
+        uint64_t ArgSize = 8;
+        if (FpOffset + ArgSize <= kParamTLSSize) {
+          if (!IsFixed) {
+            // PoP says: "A short floating-point datum requires only the
+            // left-most 32 bit positions of a floating-point register".
+            // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
+            // don't extend shadow and don't mind the gap.
+            ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
+            if (MS.TrackOrigins)
+              OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
+          }
+          FpOffset += ArgSize;
+        } else {
+          FpOffset = kParamTLSSize;
+        }
+        break;
+      }
+      case ArgKind::Vector: {
+        // Keep track of VrIndex. No need to store shadow, since vector varargs
+        // go through AK_Memory.
+        assert(IsFixed);
+        VrIndex++;
+        break;
+      }
+      case ArgKind::Memory: {
+        // Keep track of OverflowOffset and store shadow only for varargs.
+        // Ignore fixed args, since we need to copy only the vararg portion of
+        // the overflow area shadow.
+        if (!IsFixed) {
+          uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
+          uint64_t ArgSize = alignTo(ArgAllocSize, 8);
+          if (OverflowOffset + ArgSize <= kParamTLSSize) {
+            SE = getShadowExtension(CS, ArgNo);
+            uint64_t GapSize =
+                SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
+            ShadowBase =
+                getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
+            if (MS.TrackOrigins)
+              OriginBase =
+                  getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
+            OverflowOffset += ArgSize;
+          } else {
+            OverflowOffset = kParamTLSSize;
+          }
+        }
+        break;
+      }
+      }
+      if (ShadowBase == nullptr)
+        continue;
+      Value *Shadow = MSV.getShadow(A);
+      if (SE != ShadowExtension::None)
+        Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
+                                      /*Signed*/ SE == ShadowExtension::Sign);
+      ShadowBase = IRB.CreateIntToPtr(
+          ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
+      IRB.CreateStore(Shadow, ShadowBase);
+      if (MS.TrackOrigins) {
+        Value *Origin = MSV.getOrigin(A);
+        unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+        MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
+                        kMinOriginAlignment);
+      }
+    }
+    Constant *OverflowSize = ConstantInt::get(
+        IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
+    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+  }
+
+  Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+                              "_msarg_va_o");
+  }
+
+  void unpoisonVAListTagForInst(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) =
+        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
+                               /*isStore*/ true);
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     SystemZVAListTagSize, Alignment, false);
+  }
+
+  void visitVAStartInst(VAStartInst &I) override {
+    VAStartInstrumentationList.push_back(&I);
+    unpoisonVAListTagForInst(I);
+  }
+
+  void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
+
+  void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
+    Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+    Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
+        IRB.CreateAdd(
+            IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+            ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
+        PointerType::get(RegSaveAreaPtrTy, 0));
+    Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
+    Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
+        MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
+                               /*isStore*/ true);
+    // TODO(iii): copy only fragments filled by visitCallSite()
+    IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
+                     SystemZRegSaveAreaSize);
+    if (MS.TrackOrigins)
+      IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
+                       Alignment, SystemZRegSaveAreaSize);
+  }
+
+  void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
+    Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+    Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
+        IRB.CreateAdd(
+            IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+            ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
+        PointerType::get(OverflowArgAreaPtrTy, 0));
+    Value *OverflowArgAreaPtr =
+        IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
+    Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
+        MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
+                               Alignment, /*isStore*/ true);
+    Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
+                                           SystemZOverflowOffset);
+    IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
+                     VAArgOverflowSize);
+    if (MS.TrackOrigins) {
+      SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
+                                      SystemZOverflowOffset);
+      IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
+                       VAArgOverflowSize);
+    }
+  }
+
+  void finalizeInstrumentation() override {
+    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
+      IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+      VAArgOverflowSize =
+          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+      Value *CopySize =
+          IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
+                        VAArgOverflowSize);
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+      if (MS.TrackOrigins) {
+        VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+        IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
+                         Align(8), CopySize);
+      }
+    }
+
+    // Instrument va_start.
+    // Copy va_list shadow from the backup copy of the TLS contents.
+    for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
+         VaStartNo < VaStartNum; VaStartNo++) {
+      CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+      Value *VAListTag = OrigInst->getArgOperand(0);
+      copyRegSaveArea(IRB, VAListTag);
+      copyOverflowArea(IRB, VAListTag);
+    }
+  }
+};
+
 /// A no-op implementation of VarArgHelper.
 struct VarArgNoOpHelper : public VarArgHelper {
   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
@@ -4629,6 +4957,8 @@
   else if (TargetTriple.getArch() == Triple::ppc64 ||
            TargetTriple.getArch() == Triple::ppc64le)
     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
+  else if (TargetTriple.getArch() == Triple::systemz)
+    return new VarArgSystemZHelper(Func, Msan, Visitor);
   else
     return new VarArgNoOpHelper(Func, Msan, Visitor);
 }
diff --git a/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll
@@ -0,0 +1,125 @@
+; RUN: opt < %s -S -march=z13 -msan-kernel=1 -float-abi=soft -passes=msan 2>&1 | FileCheck %s
+; RUN: opt < %s -msan -S -march=z13 -msan-kernel=1 -float-abi=soft | FileCheck %s
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+target triple = "s390x-unknown-linux-gnu"
+
+declare i64 @foo(i64 %guard, ...) #0
+
+attributes #0 = { "target-features"="+soft-float" "use-soft-float"="true" }
+
+declare i32 @random_i32()
+declare i64 @random_i64()
+declare float @random_float()
+declare double @random_double()
+
+define i64 @bar() #1 {
+  %arg2 = call i32 () @random_i32()
+  %arg3 = call float () @random_float()
+  %arg4 = call i32 () @random_i32()
+  %arg5 = call double () @random_double()
+  %arg6 = call i64 () @random_i64()
+  %arg9 = call i32 () @random_i32()
+  %arg11 = call float () @random_float()
+  %arg12 = call i32 () @random_i32()
+  %arg13 = call double () @random_double()
+  %arg14 = call i64 () @random_i64()
+  %1 = call i64 (i64, ...) @foo(i64 1, i32 zeroext %arg2, float %arg3,
+                                i32 signext %arg4, double %arg5, i64 %arg6,
+                                i64 7, double 8.0, i32 zeroext %arg9,
+                                double 10.0, float %arg11, i32 signext %arg12,
+                                double %arg13, i64 %arg14)
+  ret i64 %1
+}
+
+attributes #1 = { sanitize_memory }
+
+; In kernel the floating point values are passed in GPRs:
+; - r2@16              == i64 1            - skipped, because it's fixed
+; - r3@24              == i32 zext %arg2   - shadow is zero-extended
+; - r4@(32 + 4)        == float %arg3      - right-justified, shadow is 32-bit
+; - r5@40              == i32 sext %arg4   - shadow is sign-extended
+; - r6@48              == double %arg5     - straightforward
+; - overflow@160       == i64 %arg6        - straightforward
+; - overflow@168       == 7                - filler
+; - overflow@176       == 8.0              - filler
+; - overflow@184       == i32 zext %arg9   - shadow is zero-extended
+; - overflow@192       == 10.0             - filler
+; - overflow@(200 + 4) == float %arg11     - right-justified, shadow is 32-bit
+; - overflow@208       == i32 sext %arg12  - shadow is sign-extended
+; - overflow@216       == double %arg13    - straightforward
+; - overflow@224       == i64 %arg14       - straightforward
+; Overflow arg area size is 72.
+
+; CHECK-LABEL: @bar
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 24
+; CHECK: [[V:%.*]] = zext {{.*}}
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[V]], {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 36
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i32*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 40
+; CHECK: [[V:%.*]] = sext {{.*}}
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[V]], {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 48
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 160
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 168
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 176
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 184
+; CHECK: [[V:%.*]] = zext {{.*}}
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[V]], {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 192
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 204
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i32*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 208
+; CHECK: [[V:%.*]] = sext {{.*}}
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[V]], {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 216
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK: [[B:%.*]] = ptrtoint [100 x i64]* %va_arg_shadow to i64
+; CHECK: [[S:%.*]] = add i64 [[B]], 224
+; CHECK: [[M:%_msarg_va_s.*]] = inttoptr i64 [[S]] to i64*
+; CHECK: store {{.*}} [[M]]
+
+; CHECK store {{.*}} 72, {{.*}} %va_arg_overflow_size
diff --git a/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg.ll b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg.ll
@@ -0,0 +1,189 @@
+; RUN: opt < %s -S -march=z13 -passes=msan 2>&1 | FileCheck %s
+; RUN: opt < %s -msan -S -march=z13 | FileCheck %s
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+target triple = "s390x-unknown-linux-gnu"
+
+%struct.__va_list = type { i64, i64, i8*, i8* }
+
+define i64 @foo(i64 %guard, ...) {
+  %vl = alloca %struct.__va_list, align 8
+  %1 = bitcast %struct.__va_list* %vl to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* %1)
+  call void @llvm.va_start(i8* %1)
+  call void @llvm.va_end(i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* %1)
+  ret i64 0
+}
+
+; First check if the variadic shadow values are saved in stack with correct
+; size (which is 160 - size of the register save area).
+
+; CHECK-LABEL: @foo
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
+; CHECK: [[B:%.*]] = add i64 160, [[A]]
+; CHECK: alloca {{.*}} [[B]]
+
+; We expect two memcpy operations: one for the register save area, and one for
+; the overflow arg area.
+
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 {{%.*}}, i8* align 8 {{%.*}}, i64 160, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 {{%.*}}, i8* align 8 {{%.*}}, i64 [[A]], i1 false)
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+declare void @llvm.va_start(i8*) #2
+declare void @llvm.va_end(i8*) #2
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+declare i32 @random_i32()
+declare i64 @random_i64()
+declare float @random_float()
+declare double @random_double()
+
+define i64 @bar() {
+  %arg2 = call i32 () @random_i32()
+  %arg3 = call float () @random_float()
+  %arg4 = call i32 () @random_i32()
+  %arg5 = call double () @random_double()
+  %arg6 = call i64 () @random_i64()
+  %arg9 = call i32 () @random_i32()
+  %arg11 = call float () @random_float()
+  %arg12 = call i32 () @random_i32()
+  %arg13 = call double () @random_double()
+  %arg14 = call i64 () @random_i64()
+  %1 = call i64 (i64, ...) @foo(i64 1, i32 zeroext %arg2, float %arg3,
+                                i32 signext %arg4, double %arg5, i64 %arg6,
+                                i64 7, double 8.0, i32 zeroext %arg9,
+                                double 10.0, float %arg11, i32 signext %arg12,
+                                double %arg13, i64 %arg14)
+  ret i64 %1
+}
+
+; Save the incoming shadow values from the varargs in the __msan_va_arg_tls
+; array at the offsets equal to those defined by the ABI for the corresponding
+; registers in the register save area, and for the corresponding overflow args
+; in the overflow arg area:
+; - r2@16              == i64 1            - skipped, because it's fixed
+; - r3@24              == i32 zext %arg2   - shadow is zero-extended
+; - f0@128             == float %arg3      - left-justified, shadow is 32-bit
+; - r4@32              == i32 sext %arg4   - shadow is sign-extended
+; - f2@136             == double %arg5     - straightforward
+; - r5@40              == i64 %arg6        - straightforward
+; - r6@48              == 7                - filler
+; - f4@144             == 8.0              - filler
+; - overflow@160       == i32 zext %arg9   - shadow is zero-extended
+; - f6@152             == 10.0             - filler
+; - overflow@(168 + 4) == float %arg11     - right-justified, shadow is 32-bit
+; - overflow@176       == i32 sext %arg12  - shadow is sign-extended
+; - overflow@184       == double %arg13    - straightforward
+; - overflow@192       == i64 %arg14       - straightforward
+; Overflow arg area size is 40.
+
+; CHECK-LABEL: @bar
+
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 24
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 128
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 32
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 136
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 40
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 48
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 144
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 160
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 152
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 172
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 176
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 184
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 192
+; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls
+
+; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
+; passed to a variadic function.
+
+define dso_local i64 @many_args() {
+entry:
+  %ret = call i64 (i64, ...) @sum(i64 120,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
+    i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1
+  )
+  ret i64 %ret
+}
+
+; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed.
+; CHECK-LABEL: @many_args
+; CHECK: i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 792)
+; CHECK-NOT: i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 800)
+
+declare i64 @sum(i64 %n, ...)
+
+; Test offset calculation for vector arguments.
+; Regardless of whether or not fixed args overflow, we should copy a shadow
+; for a single vector vararg to offset 160.
+
+declare void @vr_no_overflow(<4 x float> %v24, <4 x float> %v26,
+                             <4 x float> %v28, <4 x float> %v30,
+                             <4 x float> %v25, <4 x float> %v27,
+                             <4 x float> %v29, ...)
+
+declare <4 x float> @vr_value()
+
+define void @vr_no_overflow_caller() {
+  %1 = call <4 x float> () @vr_value()
+  call void (<4 x float>, <4 x float>, <4 x float>,
+             <4 x float>, <4 x float>, <4 x float>,
+             <4 x float>, ...) @vr_no_overflow(
+    <4 x float> %1, <4 x float> %1, <4 x float> %1, <4 x float> %1,
+    <4 x float> %1, <4 x float> %1, <4 x float> %1, <4 x float> %1)
+  ret void
+}
+
+; CHECK-LABEL: @vr_no_overflow_caller
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 160
+; CHECK-NOT: store {{.*}} @__msan_va_arg_tls {{.*}}
+; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
+
+declare void @vr_overflow(<4 x float> %v24, <4 x float> %v26,
+                          <4 x float> %v28, <4 x float> %v30,
+                          <4 x float> %v25, <4 x float> %v27,
+                          <4 x float> %v29, <4 x float> %v31,
+                          <4 x float> %overflow, ...)
+
+define void @vr_overflow_caller() {
+  %1 = call <4 x float> @vr_value()
+  call void (<4 x float>, <4 x float>, <4 x float>,
+             <4 x float>, <4 x float>, <4 x float>,
+             <4 x float>, <4 x float>, <4 x float>,
+             ...) @vr_overflow(
+    <4 x float> %1, <4 x float> %1, <4 x float> %1, <4 x float> %1,
+    <4 x float> %1, <4 x float> %1, <4 x float> %1, <4 x float> %1,
+    <4 x float> %1, <4 x float> %1)
+  ret void
+}
+
+; CHECK-LABEL: @vr_overflow_caller
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 160
+; CHECK-NOT: store {{.*}} @__msan_va_arg_tls {{.*}}
+; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
+
+; Test that fp128 is passed by reference.
+
+declare fp128 @random_fp128()
+
+define i64 @bar_fp128() {
+  %arg = call fp128 @random_fp128()
+  %1 = call i64 (i64, ...) @foo(i64 1, fp128 %arg)
+  ret i64 %1
+}
+
+; CHECK-LABEL: @bar_fp128
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 24
+; CHECK: store {{.*}} 0, {{.*}} @__msan_va_arg_overflow_size_tls
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -4,6 +4,8 @@
 ; RUN:   FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s
 ; RUN: opt < %s -passes='module(msan-module),function(msan)' -msan-check-access-address=0 -msan-track-origins=1 -S | \
 ; RUN:   FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s
+; RUN: opt < %s -passes='module(msan-module),function(msan)' -msan-instrumentation-with-call-threshold=0 -msan-track-origins=1 -S | \
+; RUN:   FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK-CALLS %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -1007,3 +1009,12 @@
 
 ; CHECK-LABEL: define internal void @msan.module_ctor() {
 ; CHECK: call void @__msan_init()
+
+; CHECK-CALLS: declare void @__msan_maybe_warning_1(i8 zeroext, i32 zeroext)
+; CHECK-CALLS: declare void @__msan_maybe_store_origin_1(i8 zeroext, i8*, i32 zeroext)
+; CHECK-CALLS: declare void @__msan_maybe_warning_2(i16 zeroext, i32 zeroext)
+; CHECK-CALLS: declare void @__msan_maybe_store_origin_2(i16 zeroext, i8*, i32 zeroext)
+; CHECK-CALLS: declare void @__msan_maybe_warning_4(i32 zeroext, i32 zeroext)
+; CHECK-CALLS: declare void @__msan_maybe_store_origin_4(i32 zeroext, i8*, i32 zeroext)
+; CHECK-CALLS: declare void @__msan_maybe_warning_8(i64 zeroext, i32 zeroext)
+; CHECK-CALLS: declare void @__msan_maybe_store_origin_8(i64 zeroext, i8*, i32 zeroext)