diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -407,8 +407,8 @@
       }
     )cpp",
       R"cpp(
-      template<typename $TemplateParameter[[T]],
-        void ($TemplateParameter[[T]]::*$TemplateParameter[[method]])(int)>
+      template<typename $TemplateParameter[[T]], 
+        void (T::*$TemplateParameter[[method]])(int)>
       struct $Class[[G]] {
         void $Method[[foo]](
             $TemplateParameter[[T]] *$Parameter[[O]]) {
diff --git a/clang/include/clang/AST/ASTConcept.h b/clang/include/clang/AST/ASTConcept.h
new file mode 100644
--- /dev/null
+++ b/clang/include/clang/AST/ASTConcept.h
@@ -0,0 +1,80 @@
+//===--- ASTConcept.h - Concepts Related AST Data Structures ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file provides AST data structures related to concepts.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_ASTCONCEPT_H
+#define LLVM_CLANG_AST_ASTCONCEPT_H
+#include "clang/AST/Expr.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+#include <utility>
+namespace clang {
+
+/// \brief The result of a constraint satisfaction check, containing the
+/// necessary information to diagnose an unsatisfied constraint.
+struct ConstraintSatisfaction {
+  using SubstitutionDiagnostic = std::pair<SourceLocation, StringRef>;
+  using Detail = llvm::PointerUnion<Expr *, SubstitutionDiagnostic *>;
+
+  bool IsSatisfied = false;
+
+  /// \brief Pairs of unsatisfied atomic constraint expressions along with the
+  /// substituted constraint expr, if the template arguments could be
+  /// substituted into them, or a diagnostic if substitution resulted in an
+  /// invalid expression.
+  llvm::SmallVector<std::pair<const Expr *, Detail>, 4> Details;
+
+  // This can leak if used in an AST node, use ASTConstraintSatisfaction
+  // instead.
+  void *operator new(size_t bytes, ASTContext &C) = delete;
+};
+
+/// Pairs of unsatisfied atomic constraint expressions along with the
+/// substituted constraint expr, if the template arguments could be
+/// substituted into them, or a diagnostic if substitution resulted in
+/// an invalid expression.
+using UnsatisfiedConstraintRecord =
+    std::pair<const Expr *,
+              llvm::PointerUnion<Expr *,
+                                 std::pair<SourceLocation, StringRef> *>>;
+
+/// \brief The result of a constraint satisfaction check, containing the
+/// necessary information to diagnose an unsatisfied constraint.
+///
+/// This is safe to store in an AST node, as opposed to ConstraintSatisfaction.
+struct ASTConstraintSatisfaction final :
+    llvm::TrailingObjects<ASTConstraintSatisfaction,
+                          UnsatisfiedConstraintRecord> {
+  std::size_t NumRecords;
+  bool IsSatisfied : 1;
+
+  const UnsatisfiedConstraintRecord *begin() const {
+    return getTrailingObjects<UnsatisfiedConstraintRecord>();
+  }
+
+  const UnsatisfiedConstraintRecord *end() const {
+    return getTrailingObjects<UnsatisfiedConstraintRecord>() + NumRecords;
+  }
+
+  ASTConstraintSatisfaction(const ASTContext &C,
+                            const ConstraintSatisfaction &Satisfaction);
+
+  static ASTConstraintSatisfaction *
+  Create(const ASTContext &C, const ConstraintSatisfaction &Satisfaction);
+};
+
+} // clang
+
+#endif // LLVM_CLANG_AST_ASTCONCEPT_H
\ No newline at end of file
diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_AST_EXPRCXX_H
 #define LLVM_CLANG_AST_EXPRCXX_H
 
+#include "clang/AST/ASTConcept.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclCXX.h"
@@ -4845,6 +4846,10 @@
                                     TemplateArgument> {
   friend class ASTStmtReader;
   friend TrailingObjects;
+public:
+  using SubstitutionDiagnostic = std::pair<SourceLocation, std::string>;
+
+protected:
 
   // \brief The optional nested name specifier used when naming the concept.
   NestedNameSpecifierLoc NestedNameSpec;
@@ -4862,11 +4867,8 @@
   /// through a UsingShadowDecl.
   NamedDecl *FoundDecl;
 
-  /// \brief The concept named, and whether or not the concept with the given
-  /// arguments was satisfied when the expression was created.
-  /// If any of the template arguments are dependent (this expr would then be
-  /// isValueDependent()), this bit is to be ignored.
-  llvm::PointerIntPair<ConceptDecl *, 1, bool> NamedConcept;
+  /// \brief The concept named.
+  ConceptDecl *NamedConcept;
 
   /// \brief The template argument list source info used to specialize the
   /// concept.
@@ -4876,13 +4878,18 @@
   /// converted template arguments.
   unsigned NumTemplateArgs;
 
+  /// \brief Information about the satisfaction of the named concept with the
+  /// given arguments. If this expression is value dependent, this is to be
+  /// ignored.
+  ASTConstraintSatisfaction *Satisfaction;
+
   ConceptSpecializationExpr(ASTContext &C, NestedNameSpecifierLoc NNS,
                             SourceLocation TemplateKWLoc,
                             SourceLocation ConceptNameLoc, NamedDecl *FoundDecl,
                             ConceptDecl *NamedConcept,
                             const ASTTemplateArgumentListInfo *ArgsAsWritten,
                             ArrayRef<TemplateArgument> ConvertedArgs,
-                            Optional<bool> IsSatisfied);
+                            const ConstraintSatisfaction *Satisfaction);
 
   ConceptSpecializationExpr(EmptyShell Empty, unsigned NumTemplateArgs);
 
@@ -4893,7 +4900,8 @@
          SourceLocation TemplateKWLoc, SourceLocation ConceptNameLoc,
          NamedDecl *FoundDecl, ConceptDecl *NamedConcept,
          const ASTTemplateArgumentListInfo *ArgsAsWritten,
-         ArrayRef<TemplateArgument> ConvertedArgs, Optional<bool> IsSatisfied);
+         ArrayRef<TemplateArgument> ConvertedArgs,
+         const ConstraintSatisfaction *Satisfaction);
 
   static ConceptSpecializationExpr *
   Create(ASTContext &C, EmptyShell Empty, unsigned NumTemplateArgs);
@@ -4907,7 +4915,7 @@
   }
 
   ConceptDecl *getNamedConcept() const {
-    return NamedConcept.getPointer();
+    return NamedConcept;
   }
 
   ArrayRef<TemplateArgument> getTemplateArguments() const {
@@ -4924,12 +4932,21 @@
                             ArrayRef<TemplateArgument> Converted);
 
   /// \brief Whether or not the concept with the given arguments was satisfied
-  /// when the expression was created. This method assumes that the expression
-  /// is not dependent!
+  /// when the expression was created.
+  /// The expression must not be dependent.
   bool isSatisfied() const {
     assert(!isValueDependent()
            && "isSatisfied called on a dependent ConceptSpecializationExpr");
-    return NamedConcept.getInt();
+    return Satisfaction->IsSatisfied;
+  }
+
+  /// \brief Get elaborated satisfaction info about the template arguments'
+  /// satisfaction of the named concept.
+  /// The expression must not be dependent.
+  const ASTConstraintSatisfaction &getSatisfaction() const {
+    assert(!isValueDependent()
+           && "getSatisfaction called on dependent ConceptSpecializationExpr");
+    return *Satisfaction;
   }
 
   SourceLocation getConceptNameLoc() const { return ConceptNameLoc; }
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -1162,12 +1162,11 @@
 DEF_TRAVERSE_TYPELOC(RValueReferenceType,
                      { TRY_TO(TraverseTypeLoc(TL.getPointeeLoc())); })
 
+// FIXME: location of base class?
 // We traverse this in the type case as well, but how is it not reached through
 // the pointee type?
 DEF_TRAVERSE_TYPELOC(MemberPointerType, {
-  auto *TSI = TL.getClassTInfo();
-  assert(TSI);
-  TRY_TO(TraverseTypeLoc(TSI->getTypeLoc()));
+  TRY_TO(TraverseType(QualType(TL.getTypePtr()->getClass(), 0)));
   TRY_TO(TraverseTypeLoc(TL.getPointeeLoc()));
 })
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2572,6 +2572,26 @@
   "expression">;
 def err_non_bool_atomic_constraint : Error<
   "atomic constraint must be of type 'bool' (found %0)">;
+def err_template_arg_list_constraints_not_satisfied : Error<
+  "constraints not satisfied for %select{class template|function template|variable template|alias template|"
+  "template template parameter|template}0 %1%2">;
+def note_constraints_not_satisfied : Note<
+  "constraints not satisfied">;
+def note_substituted_constraint_expr_is_ill_formed : Note<
+  "because substituted constraint expression is ill-formed%0">;
+def note_atomic_constraint_evaluated_to_false : Note<
+  "%select{and |because }0'%1' evaluated to false">;
+def note_concept_specialization_constraint_evaluated_to_false : Note<
+  "%select{and |because }0'%1' evaluated to false">;
+def note_single_arg_concept_specialization_constraint_evaluated_to_false : Note<
+  "%select{and |because }0%1 does not satisfy %2">;
+def note_atomic_constraint_evaluated_to_false_elaborated : Note<
+  "%select{and |because }0'%1' (%2 %3 %4) evaluated to false">;
+def err_could_not_normalize_ill_formed_constraint : Error<
+  "required expansion of concept specialization %0 failed, substituted "
+  "expression would be illegal">;
+def note_could_not_normalize_ill_formed_constraint_reason : Note<
+  "because: %0">;
 
 def err_template_different_requires_clause : Error<
   "requires clause differs in template redeclaration">;
@@ -3861,6 +3881,8 @@
 def note_ovl_candidate_explicit_arg_mismatch_named : Note<
     "candidate template ignored: invalid explicitly-specified argument "
     "for template parameter %0">;
+def note_ovl_candidate_unsatisfied_constraints : Note<
+    "candidate template ignored: constraints not satisfied%0">;
 def note_ovl_candidate_explicit_arg_mismatch_unnamed : Note<
     "candidate template ignored: invalid explicitly-specified argument "
     "for %ordinal0 template parameter">;
@@ -4553,6 +4575,14 @@
   "while checking a default template argument used here">;
 def note_concept_specialization_here : Note<
   "while checking the satisfaction of concept '%0' requested here">;
+def note_checking_constraints_for_template_id_here : Note<
+  "while checking constraint satisfaction for template '%0' required here">;
+def note_checking_constraints_for_var_spec_id_here : Note<
+  "while checking constraint satisfaction for variable template "
+  "partial specialization '%0' required here">;
+def note_checking_constraints_for_class_spec_id_here : Note<
+  "while checking constraint satisfaction for class template partial "
+  "specialization '%0' required here">;
 def note_constraint_substitution_here : Note<
   "while substituting template arguments into constraint expression here">;
 def note_instantiation_contexts_suppressed : Note<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_SEMA_SEMA_H
 #define LLVM_CLANG_SEMA_SEMA_H
 
+#include "clang/AST/ASTConcept.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Availability.h"
 #include "clang/AST/ComparisonCategories.h"
@@ -6140,10 +6141,45 @@
   /// A diagnostic is emitted if it is not, and false is returned.
   bool CheckConstraintExpression(Expr *CE);
 
-  bool CalculateConstraintSatisfaction(ConceptDecl *NamedConcept,
-                                       MultiLevelTemplateArgumentList &MLTAL,
-                                       Expr *ConstraintExpr,
-                                       bool &IsSatisfied);
+  /// \brief Check whether the given list of constraint expressions are
+  /// satisfied (as if in a 'conjunction') given template arguments.
+  /// \param ConstraintExprs a list of constraint expressions, treated as if
+  /// they were 'AND'ed together.
+  /// \param TemplateArgs the list of template arguments to substitute into the
+  /// constraint expression.
+  /// \param TemplateIDRange The source range of the template id that
+  /// caused the constraints check.
+  /// \param Satisfaction if true is returned, will contain details of the
+  /// satisfaction, with enough information to diagnose an unsatisfied
+  /// expression.
+  /// \returns true if an error occurred and satisfaction could not be checked,
+  /// false otherwise.
+  bool CheckConstraintSatisfaction(TemplateDecl *Template,
+                                   ArrayRef<const Expr *> ConstraintExprs,
+                                   ArrayRef<TemplateArgument> TemplateArgs,
+                                   SourceRange TemplateIDRange,
+                                   ConstraintSatisfaction &Satisfaction);
+
+  bool CheckConstraintSatisfaction(ClassTemplatePartialSpecializationDecl *TD,
+                                   ArrayRef<const Expr *> ConstraintExprs,
+                                   ArrayRef<TemplateArgument> TemplateArgs,
+                                   SourceRange TemplateIDRange,
+                                   ConstraintSatisfaction &Satisfaction);
+
+  bool CheckConstraintSatisfaction(VarTemplatePartialSpecializationDecl *TD,
+                                   ArrayRef<const Expr *> ConstraintExprs,
+                                   ArrayRef<TemplateArgument> TemplateArgs,
+                                   SourceRange TemplateIDRange,
+                                   ConstraintSatisfaction &Satisfaction);
+
+  /// \brief Check whether the given non-dependent constraint expression is
+  /// satisfied. Returns false and updates Satisfaction with the satisfaction
+  /// verdict if successful, emits a diagnostic and returns true if an error
+  /// occured and satisfaction could not be determined.
+  ///
+  /// \returns true if an error occurred, false otherwise.
+  bool CheckConstraintSatisfaction(const Expr *ConstraintExpr,
+                                   ConstraintSatisfaction &Satisfaction);
 
   /// Check that the associated constraints of a template declaration match the
   /// associated constraints of an older declaration of which it is a
@@ -6151,6 +6187,38 @@
   bool CheckRedeclarationConstraintMatch(TemplateParameterList *Old,
                                          TemplateParameterList *New);
 
+  /// \brief Ensure that the given template arguments satisfy the constraints
+  /// associated with the given template, emitting a diagnostic if they do not.
+  ///
+  /// \param Template The template to which the template arguments are being
+  /// provided.
+  ///
+  /// \param TemplateArgs The converted, canonicalized template arguments.
+  ///
+  /// \param TemplateIDRange The source range of the template id that
+  /// caused the constraints check.
+  ///
+  /// \returns true if the constrains are not satisfied or could not be checked
+  /// for satisfaction, false if the constraints are satisfied.
+  bool EnsureTemplateArgumentListConstraints(TemplateDecl *Template,
+                                       ArrayRef<TemplateArgument> TemplateArgs,
+                                             SourceRange TemplateIDRange);
+
+  /// \brief Emit diagnostics explaining why a constraint expression was deemed
+  /// unsatisfied.
+  void
+  DiagnoseUnsatisfiedConstraint(const ConstraintSatisfaction& Satisfaction);
+
+  /// \brief Emit diagnostics explaining why a constraint expression was deemed
+  /// unsatisfied.
+  void
+  DiagnoseUnsatisfiedConstraint(const ASTConstraintSatisfaction& Satisfaction);
+
+  /// \brief Emit diagnostics explaining why a constraint expression was deemed
+  /// unsatisfied because it was ill-formed.
+  void DiagnoseUnsatisfiedIllFormedConstraint(SourceLocation DiagnosticLocation,
+                                              StringRef Diagnostic);
+
   // ParseObjCStringLiteral - Parse Objective-C string literals.
   ExprResult ParseObjCStringLiteral(SourceLocation *AtLocs,
                                     ArrayRef<Expr *> Strings);
@@ -6966,13 +7034,18 @@
   /// contain the converted forms of the template arguments as written.
   /// Otherwise, \p TemplateArgs will not be modified.
   ///
+  /// \param ConstraintsNotSatisfied If provided, and an error occured, will
+  /// receive true if the cause for the error is the associated constraints of
+  /// the template not being satisfied by the template arguments.
+  ///
   /// \returns true if an error occurred, false otherwise.
   bool CheckTemplateArgumentList(TemplateDecl *Template,
                                  SourceLocation TemplateLoc,
                                  TemplateArgumentListInfo &TemplateArgs,
                                  bool PartialTemplateArgs,
                                  SmallVectorImpl<TemplateArgument> &Converted,
-                                 bool UpdateArgsWithConversions = true);
+                                 bool UpdateArgsWithConversions = true,
+                                 bool *ConstraintsNotSatisfied = nullptr);
 
   bool CheckTemplateTypeArgument(TemplateTypeParmDecl *Param,
                                  TemplateArgumentLoc &Arg,
@@ -7514,6 +7587,9 @@
     TDK_InvalidExplicitArguments,
     /// Checking non-dependent argument conversions failed.
     TDK_NonDependentConversionFailure,
+    /// The deduced arguments did not satisfy the constraints associated
+    /// with the template.
+    TDK_ConstraintsNotSatisfied,
     /// Deduction failed; that's all we know.
     TDK_MiscellaneousDeductionFailure,
     /// CUDA Target attributes do not match.
@@ -8026,7 +8102,7 @@
     /// constrained entity (a concept declaration or a template with associated
     /// constraints).
     InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
-                          ConstraintsCheck, TemplateDecl *Template,
+                          ConstraintsCheck, NamedDecl *Template,
                           ArrayRef<TemplateArgument> TemplateArgs,
                           SourceRange InstantiationRange);
 
@@ -8035,7 +8111,7 @@
     /// with a template declaration or as part of the satisfaction check of a
     /// concept.
     InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
-                          ConstraintSubstitution, TemplateDecl *Template,
+                          ConstraintSubstitution, NamedDecl *Template,
                           sema::TemplateDeductionInfo &DeductionInfo,
                           SourceRange InstantiationRange);
 
diff --git a/clang/include/clang/Sema/TemplateDeduction.h b/clang/include/clang/Sema/TemplateDeduction.h
--- a/clang/include/clang/Sema/TemplateDeduction.h
+++ b/clang/include/clang/Sema/TemplateDeduction.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_CLANG_SEMA_TEMPLATEDEDUCTION_H
 #define LLVM_CLANG_SEMA_TEMPLATEDEDUCTION_H
 
+#include "clang/Sema/Ownership.h"
+#include "clang/AST/ASTConcept.h"
 #include "clang/AST/DeclAccessPair.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/TemplateBase.h"
@@ -218,6 +220,10 @@
   ///
   /// FIXME: This should be kept internal to SemaTemplateDeduction.
   SmallVector<DeducedPack *, 8> PendingDeducedPacks;
+
+  /// \brief The constraint satisfaction details resulting from the associated
+  /// constraints satisfaction tests.
+  ConstraintSatisfaction AssociatedConstraintsSatisfaction;
 };
 
 } // namespace sema
diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp
new file mode 100644
--- /dev/null
+++ b/clang/lib/AST/ASTConcept.cpp
@@ -0,0 +1,55 @@
+//===--- ASTConcept.cpp - Concepts Related AST Data Structures --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines AST data structures related to concepts.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/ASTConcept.h"
+#include "clang/AST/ASTContext.h"
+using namespace clang;
+
+ASTConstraintSatisfaction::ASTConstraintSatisfaction(const ASTContext &C,
+    const ConstraintSatisfaction &Satisfaction):
+    NumRecords{Satisfaction.Details.size()},
+    IsSatisfied{Satisfaction.IsSatisfied} {
+  for (unsigned I = 0; I < NumRecords; ++I) {
+    auto &Detail = Satisfaction.Details[I];
+    if (Detail.second.is<Expr *>())
+      new (getTrailingObjects<UnsatisfiedConstraintRecord>() + I)
+         UnsatisfiedConstraintRecord{Detail.first,
+                                     UnsatisfiedConstraintRecord::second_type(
+                                         Detail.second.get<Expr *>())};
+    else {
+      auto &SubstitutionDiagnostic =
+          *Detail.second.get<std::pair<SourceLocation, StringRef> *>();
+      unsigned MessageSize = SubstitutionDiagnostic.second.size();
+      char *Mem = new (C) char[MessageSize];
+      memcpy(Mem, SubstitutionDiagnostic.second.data(), MessageSize);
+      auto *NewSubstDiag = new (C) std::pair<SourceLocation, StringRef>(
+          SubstitutionDiagnostic.first, StringRef(Mem, MessageSize));
+      new (getTrailingObjects<UnsatisfiedConstraintRecord>() + I)
+         UnsatisfiedConstraintRecord{Detail.first,
+                                     UnsatisfiedConstraintRecord::second_type(
+                                         NewSubstDiag)};
+    }
+  }
+}
+
+
+ASTConstraintSatisfaction *
+ASTConstraintSatisfaction::Create(const ASTContext &C,
+                                  const ConstraintSatisfaction &Satisfaction) {
+  std::size_t size =
+      totalSizeToAlloc<UnsatisfiedConstraintRecord>(
+          Satisfaction.Details.size());
+  void *Mem = C.Allocate(size, alignof(ASTConstraintSatisfaction));
+  return new (Mem) ASTConstraintSatisfaction(C, Satisfaction);
+}
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -14,6 +14,7 @@
 #include "CXXABI.h"
 #include "Interp/Context.h"
 #include "clang/AST/APValue.h"
+#include "clang/AST/ASTConcept.h"
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/ASTTypeTraits.h"
 #include "clang/AST/Attr.h"
diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt
--- a/clang/lib/AST/CMakeLists.txt
+++ b/clang/lib/AST/CMakeLists.txt
@@ -14,6 +14,7 @@
 
 add_clang_library(clangAST
   APValue.cpp
+  ASTConcept.cpp
   ASTConsumer.cpp
   ASTContext.cpp
   ASTDiagnostic.cpp
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4606,7 +4606,7 @@
 }
 
 void LabelDecl::setMSAsmLabel(StringRef Name) {
-  char *Buffer = new (getASTContext(), 1) char[Name.size() + 1];
+char *Buffer = new (getASTContext(), 1) char[Name.size() + 1];
   memcpy(Buffer, Name.data(), Name.size());
   Buffer[Name.size()] = '\0';
   MSAsmName = Buffer;
diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp
--- a/clang/lib/AST/ExprCXX.cpp
+++ b/clang/lib/AST/ExprCXX.cpp
@@ -1769,18 +1769,19 @@
     NestedNameSpecifierLoc NNS, SourceLocation TemplateKWLoc,
     SourceLocation ConceptNameLoc, NamedDecl *FoundDecl,
     ConceptDecl *NamedConcept, const ASTTemplateArgumentListInfo *ArgsAsWritten,
-    ArrayRef<TemplateArgument> ConvertedArgs, Optional<bool> IsSatisfied)
+    ArrayRef<TemplateArgument> ConvertedArgs,
+    const ConstraintSatisfaction *Satisfaction)
     : Expr(ConceptSpecializationExprClass, C.BoolTy, VK_RValue, OK_Ordinary,
            /*TypeDependent=*/false,
            // All the flags below are set in setTemplateArguments.
-           /*ValueDependent=*/!IsSatisfied.hasValue(),
-           /*InstantiationDependent=*/false,
+           /*ValueDependent=*/!Satisfaction, /*InstantiationDependent=*/false,
            /*ContainsUnexpandedParameterPacks=*/false),
       NestedNameSpec(NNS), TemplateKWLoc(TemplateKWLoc),
       ConceptNameLoc(ConceptNameLoc), FoundDecl(FoundDecl),
-      NamedConcept(NamedConcept, IsSatisfied ? *IsSatisfied : true),
-      NumTemplateArgs(ConvertedArgs.size()) {
-
+      NamedConcept(NamedConcept), NumTemplateArgs(ConvertedArgs.size()),
+      Satisfaction(Satisfaction ?
+                   ASTConstraintSatisfaction::Create(C, *Satisfaction) :
+                   nullptr) {
   setTemplateArguments(ArgsAsWritten, ConvertedArgs);
 }
 
@@ -1827,13 +1828,13 @@
                                   ConceptDecl *NamedConcept,
                                const ASTTemplateArgumentListInfo *ArgsAsWritten,
                                   ArrayRef<TemplateArgument> ConvertedArgs,
-                                  Optional<bool> IsSatisfied) {
+                                  const ConstraintSatisfaction *Satisfaction) {
   void *Buffer = C.Allocate(totalSizeToAlloc<TemplateArgument>(
                                 ConvertedArgs.size()));
   return new (Buffer) ConceptSpecializationExpr(C, NNS, TemplateKWLoc,
                                                 ConceptNameLoc, FoundDecl,
                                                 NamedConcept, ArgsAsWritten,
-                                                ConvertedArgs, IsSatisfied);
+                                                ConvertedArgs, Satisfaction);
 }
 
 ConceptSpecializationExpr *
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -748,6 +748,7 @@
   ApplyDebugLocation(ApplyDebugLocation &&Other) : CGF(Other.CGF) {
     Other.CGF = nullptr;
   }
+  ApplyDebugLocation &operator=(ApplyDebugLocation &&) = default;
 
   ~ApplyDebugLocation();
 
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -981,7 +981,7 @@
 
   generateObjCGetterBody(IMP, PID, OMD, AtomicHelperFn);
 
-  FinishFunction();
+  FinishFunction(OMD->getEndLoc());
 }
 
 static bool hasTrivialGetExpr(const ObjCPropertyImplDecl *propImpl) {
@@ -1515,7 +1515,7 @@
 
   generateObjCSetterBody(IMP, PID, AtomicHelperFn);
 
-  FinishFunction();
+  FinishFunction(OMD->getEndLoc());
 }
 
 namespace {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6189,7 +6189,7 @@
   // Emit the finalizer body:
   // <destroy>(<type>* %0)
   RCG.emitCleanups(CGF, N, PrivateAddr);
-  CGF.FinishFunction();
+  CGF.FinishFunction(Loc);
   return Fn;
 }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -377,9 +377,15 @@
   if (HasCleanups) {
     // Make sure the line table doesn't jump back into the body for
     // the ret after it's been at EndLoc.
-    if (CGDebugInfo *DI = getDebugInfo())
+    Optional<ApplyDebugLocation> AL;
+    if (CGDebugInfo *DI = getDebugInfo()) {
       if (OnlySimpleReturnStmts)
         DI->EmitLocation(Builder, EndLoc);
+      else
+        // We may not have a valid end location. Try to apply it anyway, and
+        // fall back to an artificial location if needed.
+        AL = ApplyDebugLocation::CreateDefaultArtificial(*this, EndLoc);
+    }
 
     PopCleanupBlocks(PrologueCleanupDepth);
   }
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -12,10 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Sema/Sema.h"
+#include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Sema/TemplateDeduction.h"
 #include "clang/Sema/Template.h"
 #include "clang/AST/ExprCXX.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
 using namespace clang;
 using namespace sema;
 
@@ -46,80 +49,369 @@
   return true;
 }
 
-bool
-Sema::CalculateConstraintSatisfaction(ConceptDecl *NamedConcept,
-                                      MultiLevelTemplateArgumentList &MLTAL,
-                                      Expr *ConstraintExpr,
-                                      bool &IsSatisfied) {
+template <typename AtomicEvaluator>
+static bool
+calculateConstraintSatisfaction(Sema &S, const Expr *ConstraintExpr,
+                                ConstraintSatisfaction &Satisfaction,
+                                AtomicEvaluator &&Evaluator) {
   ConstraintExpr = ConstraintExpr->IgnoreParenImpCasts();
 
   if (auto *BO = dyn_cast<BinaryOperator>(ConstraintExpr)) {
-    if (BO->getOpcode() == BO_LAnd) {
-      if (CalculateConstraintSatisfaction(NamedConcept, MLTAL, BO->getLHS(),
-                                          IsSatisfied))
+    if (BO->getOpcode() == BO_LAnd || BO->getOpcode() == BO_LOr) {
+      if (calculateConstraintSatisfaction(S, BO->getLHS(), Satisfaction,
+                                          Evaluator))
         return true;
-      if (!IsSatisfied)
+
+      bool IsLHSSatisfied = Satisfaction.IsSatisfied;
+
+      if (BO->getOpcode() == BO_LOr && IsLHSSatisfied)
+        // [temp.constr.op] p3
+        //    A disjunction is a constraint taking two operands. To determine if
+        //    a disjunction is satisfied, the satisfaction of the first operand
+        //    is checked. If that is satisfied, the disjunction is satisfied.
+        //    Otherwise, the disjunction is satisfied if and only if the second
+        //    operand is satisfied.
         return false;
-      return CalculateConstraintSatisfaction(NamedConcept, MLTAL, BO->getRHS(),
-                                             IsSatisfied);
-    } else if (BO->getOpcode() == BO_LOr) {
-      if (CalculateConstraintSatisfaction(NamedConcept, MLTAL, BO->getLHS(),
-                                          IsSatisfied))
-        return true;
-      if (IsSatisfied)
+
+      if (BO->getOpcode() == BO_LAnd && !IsLHSSatisfied)
+        // [temp.constr.op] p2
+        //    A conjunction is a constraint taking two operands. To determine if
+        //    a conjunction is satisfied, the satisfaction of the first operand
+        //    is checked. If that is not satisfied, the conjunction is not
+        //    satisfied. Otherwise, the conjunction is satisfied if and only if
+        //    the second operand is satisfied.
         return false;
-      return CalculateConstraintSatisfaction(NamedConcept, MLTAL, BO->getRHS(),
-                                             IsSatisfied);
+
+      return calculateConstraintSatisfaction(S, BO->getRHS(), Satisfaction,
+          std::forward<AtomicEvaluator>(Evaluator));
     }
   }
   else if (auto *C = dyn_cast<ExprWithCleanups>(ConstraintExpr))
-    return CalculateConstraintSatisfaction(NamedConcept, MLTAL, C->getSubExpr(),
-                                           IsSatisfied);
+    return calculateConstraintSatisfaction(S, C->getSubExpr(), Satisfaction,
+        std::forward<AtomicEvaluator>(Evaluator));
 
-  EnterExpressionEvaluationContext ConstantEvaluated(
-      *this, Sema::ExpressionEvaluationContext::ConstantEvaluated);
-
-  // Atomic constraint - substitute arguments and check satisfaction.
-  ExprResult E;
-  {
-    TemplateDeductionInfo Info(ConstraintExpr->getBeginLoc());
-    InstantiatingTemplate Inst(*this, ConstraintExpr->getBeginLoc(),
-                               InstantiatingTemplate::ConstraintSubstitution{},
-                               NamedConcept, Info,
-                               ConstraintExpr->getSourceRange());
-    if (Inst.isInvalid())
-      return true;
-    // We do not want error diagnostics escaping here.
-    Sema::SFINAETrap Trap(*this);
+  // An atomic constraint expression
+  ExprResult SubstitutedAtomicExpr = Evaluator(ConstraintExpr);
 
-    E = SubstExpr(ConstraintExpr, MLTAL);
-    if (E.isInvalid() || Trap.hasErrorOccurred()) {
-      // C++2a [temp.constr.atomic]p1
-      //   ...If substitution results in an invalid type or expression, the
-      //   constraint is not satisfied.
-      IsSatisfied = false;
-      return false;
-    }
-  }
-
-  if (!CheckConstraintExpression(E.get()))
+  if (SubstitutedAtomicExpr.isInvalid())
     return true;
 
+  if (!SubstitutedAtomicExpr.isUsable())
+    // Evaluator has decided satisfaction without yielding an expression.
+    return false;
+
+  EnterExpressionEvaluationContext ConstantEvaluated(
+      S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
   SmallVector<PartialDiagnosticAt, 2> EvaluationDiags;
   Expr::EvalResult EvalResult;
   EvalResult.Diag = &EvaluationDiags;
-  if (!E.get()->EvaluateAsRValue(EvalResult, Context)) {
-    // C++2a [temp.constr.atomic]p1
-    //   ...E shall be a constant expression of type bool.
-    Diag(E.get()->getBeginLoc(),
-         diag::err_non_constant_constraint_expression)
-        << E.get()->getSourceRange();
+  if (!SubstitutedAtomicExpr.get()->EvaluateAsRValue(EvalResult, S.Context)) {
+      // C++2a [temp.constr.atomic]p1
+      //   ...E shall be a constant expression of type bool.
+    S.Diag(SubstitutedAtomicExpr.get()->getBeginLoc(),
+           diag::err_non_constant_constraint_expression)
+        << SubstitutedAtomicExpr.get()->getSourceRange();
     for (const PartialDiagnosticAt &PDiag : EvaluationDiags)
-      Diag(PDiag.first, PDiag.second);
+      S.Diag(PDiag.first, PDiag.second);
     return true;
   }
 
-  IsSatisfied = EvalResult.Val.getInt().getBoolValue();
+  Satisfaction.IsSatisfied = EvalResult.Val.getInt().getBoolValue();
+  if (!Satisfaction.IsSatisfied)
+    Satisfaction.Details.emplace_back(ConstraintExpr,
+                                      SubstitutedAtomicExpr.get());
+
+  return false;
+}
+
+template <typename TemplateDeclT>
+static bool calculateConstraintSatisfaction(
+    Sema &S, TemplateDeclT *Template, ArrayRef<TemplateArgument> TemplateArgs,
+    SourceLocation TemplateNameLoc, MultiLevelTemplateArgumentList &MLTAL,
+    const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction) {
+  return calculateConstraintSatisfaction(
+      S, ConstraintExpr, Satisfaction, [&](const Expr *AtomicExpr) {
+        EnterExpressionEvaluationContext ConstantEvaluated(
+            S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+
+        // Atomic constraint - substitute arguments and check satisfaction.
+        ExprResult SubstitutedExpression;
+        {
+          TemplateDeductionInfo Info(TemplateNameLoc);
+          Sema::InstantiatingTemplate Inst(S, AtomicExpr->getBeginLoc(),
+              Sema::InstantiatingTemplate::ConstraintSubstitution{}, Template,
+              Info, AtomicExpr->getSourceRange());
+          if (Inst.isInvalid())
+            return ExprError();
+          // We do not want error diagnostics escaping here.
+          Sema::SFINAETrap Trap(S);
+          SubstitutedExpression = S.SubstExpr(const_cast<Expr *>(AtomicExpr),
+                                              MLTAL);
+          if (SubstitutedExpression.isInvalid() || Trap.hasErrorOccurred()) {
+            // C++2a [temp.constr.atomic]p1
+            //   ...If substitution results in an invalid type or expression, the
+            //   constraint is not satisfied.
+            if (!Trap.hasErrorOccurred())
+              // A non-SFINAE error has occured as a result of this
+              // substitution.
+              return ExprError();
+
+            PartialDiagnosticAt SubstDiag{SourceLocation(),
+                                          PartialDiagnostic::NullDiagnostic()};
+            Info.takeSFINAEDiagnostic(SubstDiag);
+            // FIXME: Concepts: This is an unfortunate consequence of there
+            //  being no serialization code for PartialDiagnostics and the fact
+            //  that serializing them would likely take a lot more storage than
+            //  just storing them as strings. We would still like, in the
+            //  future, to serialize the proper PartialDiagnostic as serializing
+            //  it as a string defeats the purpose of the diagnostic mechanism.
+            SmallString<128> DiagString;
+            DiagString = ": ";
+            SubstDiag.second.EmitToString(S.getDiagnostics(), DiagString);
+            unsigned MessageSize = DiagString.size();
+            char *Mem = new (S.Context) char[MessageSize];
+            memcpy(Mem, DiagString.c_str(), MessageSize);
+            Satisfaction.Details.emplace_back(
+                AtomicExpr,
+                new (S.Context) ConstraintSatisfaction::SubstitutionDiagnostic{
+                        SubstDiag.first, StringRef(Mem, MessageSize)});
+            Satisfaction.IsSatisfied = false;
+            return ExprEmpty();
+          }
+        }
+
+        if (!S.CheckConstraintExpression(SubstitutedExpression.get()))
+          return ExprError();
+
+        return SubstitutedExpression;
+      });
+}
+
+template<typename TemplateDeclT>
+static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template,
+                                        ArrayRef<const Expr *> ConstraintExprs,
+                                        ArrayRef<TemplateArgument> TemplateArgs,
+                                        SourceRange TemplateIDRange,
+                                        ConstraintSatisfaction &Satisfaction) {
+  if (ConstraintExprs.empty()) {
+    Satisfaction.IsSatisfied = true;
+    return false;
+  }
+
+  for (auto& Arg : TemplateArgs)
+    if (Arg.isInstantiationDependent()) {
+      // No need to check satisfaction for dependent constraint expressions.
+      Satisfaction.IsSatisfied = true;
+      return false;
+    }
+
+  Sema::InstantiatingTemplate Inst(S, TemplateIDRange.getBegin(),
+      Sema::InstantiatingTemplate::ConstraintsCheck{}, Template, TemplateArgs,
+      TemplateIDRange);
+  if (Inst.isInvalid())
+    return true;
+
+  MultiLevelTemplateArgumentList MLTAL;
+  MLTAL.addOuterTemplateArguments(TemplateArgs);
+
+  for (const Expr *ConstraintExpr : ConstraintExprs) {
+    if (calculateConstraintSatisfaction(S, Template, TemplateArgs,
+                                        TemplateIDRange.getBegin(), MLTAL,
+                                        ConstraintExpr, Satisfaction))
+      return true;
+    if (!Satisfaction.IsSatisfied)
+      // [temp.constr.op] p2
+      //   [...] To determine if a conjunction is satisfied, the satisfaction
+      //   of the first operand is checked. If that is not satisfied, the
+      //   conjunction is not satisfied. [...]
+      return false;
+  }
+  return false;
+}
+
+bool Sema::CheckConstraintSatisfaction(TemplateDecl *Template,
+                                       ArrayRef<const Expr *> ConstraintExprs,
+                                       ArrayRef<TemplateArgument> TemplateArgs,
+                                       SourceRange TemplateIDRange,
+                                       ConstraintSatisfaction &Satisfaction) {
+  return ::CheckConstraintSatisfaction(*this, Template, ConstraintExprs,
+                                       TemplateArgs, TemplateIDRange,
+                                       Satisfaction);
+}
 
+bool
+Sema::CheckConstraintSatisfaction(ClassTemplatePartialSpecializationDecl* Part,
+                                  ArrayRef<const Expr *> ConstraintExprs,
+                                  ArrayRef<TemplateArgument> TemplateArgs,
+                                  SourceRange TemplateIDRange,
+                                  ConstraintSatisfaction &Satisfaction) {
+  return ::CheckConstraintSatisfaction(*this, Part, ConstraintExprs,
+                                       TemplateArgs, TemplateIDRange,
+                                       Satisfaction);
+}
+
+bool
+Sema::CheckConstraintSatisfaction(VarTemplatePartialSpecializationDecl* Partial,
+                                  ArrayRef<const Expr *> ConstraintExprs,
+                                  ArrayRef<TemplateArgument> TemplateArgs,
+                                  SourceRange TemplateIDRange,
+                                  ConstraintSatisfaction &Satisfaction) {
+  return ::CheckConstraintSatisfaction(*this, Partial, ConstraintExprs,
+                                       TemplateArgs, TemplateIDRange,
+                                       Satisfaction);
+}
+
+bool Sema::CheckConstraintSatisfaction(const Expr *ConstraintExpr,
+                                       ConstraintSatisfaction &Satisfaction) {
+  return calculateConstraintSatisfaction(
+      *this, ConstraintExpr, Satisfaction,
+      [](const Expr *AtomicExpr) -> ExprResult {
+        return ExprResult(const_cast<Expr *>(AtomicExpr));
+      });
+}
+
+bool Sema::EnsureTemplateArgumentListConstraints(
+    TemplateDecl *TD, ArrayRef<TemplateArgument> TemplateArgs,
+    SourceRange TemplateIDRange) {
+  ConstraintSatisfaction Satisfaction;
+  llvm::SmallVector<const Expr *, 3> AssociatedConstraints;
+  TD->getAssociatedConstraints(AssociatedConstraints);
+  if (CheckConstraintSatisfaction(TD, AssociatedConstraints, TemplateArgs,
+                                  TemplateIDRange, Satisfaction))
+    return true;
+
+  if (!Satisfaction.IsSatisfied) {
+    SmallString<128> TemplateArgString;
+    TemplateArgString = " ";
+    TemplateArgString += getTemplateArgumentBindingsText(
+        TD->getTemplateParameters(), TemplateArgs.data(), TemplateArgs.size());
+
+    Diag(TemplateIDRange.getBegin(),
+         diag::err_template_arg_list_constraints_not_satisfied)
+        << (int)getTemplateNameKindForDiagnostics(TemplateName(TD)) << TD
+        << TemplateArgString << TemplateIDRange;
+    DiagnoseUnsatisfiedConstraint(Satisfaction);
+    return true;
+  }
   return false;
+}
+
+static void diagnoseWellFormedUnsatisfiedConstraintExpr(Sema &S,
+                                                        Expr *SubstExpr,
+                                                        bool First = true) {
+  SubstExpr = SubstExpr->IgnoreParenImpCasts();
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SubstExpr)) {
+    switch (BO->getOpcode()) {
+    // These two cases will in practice only be reached when using fold
+    // expressions with || and &&, since otherwise the || and && will have been
+    // broken down into atomic constraints during satisfaction checking.
+    case BO_LOr:
+      // Or evaluated to false - meaning both RHS and LHS evaluated to false.
+      diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getLHS(), First);
+      diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getRHS(),
+                                                  /*First=*/false);
+      return;
+    case BO_LAnd:
+      bool LHSSatisfied;
+      BO->getLHS()->EvaluateAsBooleanCondition(LHSSatisfied, S.Context);
+      if (LHSSatisfied) {
+        // LHS is true, so RHS must be false.
+        diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getRHS(), First);
+        return;
+      }
+      // LHS is false
+      diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getLHS(), First);
+
+      // RHS might also be false
+      bool RHSSatisfied;
+      BO->getRHS()->EvaluateAsBooleanCondition(RHSSatisfied, S.Context);
+      if (!RHSSatisfied)
+        diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getRHS(),
+                                                    /*First=*/false);
+      return;
+    case BO_GE:
+    case BO_LE:
+    case BO_GT:
+    case BO_LT:
+    case BO_EQ:
+    case BO_NE:
+      if (BO->getLHS()->getType()->isIntegerType() &&
+          BO->getRHS()->getType()->isIntegerType()) {
+        Expr::EvalResult SimplifiedLHS;
+        Expr::EvalResult SimplifiedRHS;
+        BO->getLHS()->EvaluateAsInt(SimplifiedLHS, S.Context);
+        BO->getRHS()->EvaluateAsInt(SimplifiedRHS, S.Context);
+        if (!SimplifiedLHS.Diag && ! SimplifiedRHS.Diag) {
+          S.Diag(SubstExpr->getBeginLoc(),
+                 diag::note_atomic_constraint_evaluated_to_false_elaborated)
+              << (int)First << SubstExpr
+              << SimplifiedLHS.Val.getInt().toString(10)
+              << BinaryOperator::getOpcodeStr(BO->getOpcode())
+              << SimplifiedRHS.Val.getInt().toString(10);
+          return;
+        }
+      }
+      break;
+
+    default:
+      break;
+    }
+  } else if (auto *CSE = dyn_cast<ConceptSpecializationExpr>(SubstExpr)) {
+    if (CSE->getTemplateArgsAsWritten()->NumTemplateArgs == 1) {
+      S.Diag(
+          CSE->getSourceRange().getBegin(),
+          diag::
+          note_single_arg_concept_specialization_constraint_evaluated_to_false)
+          << (int)First
+          << CSE->getTemplateArgsAsWritten()->arguments()[0].getArgument()
+          << CSE->getNamedConcept();
+    } else {
+      S.Diag(SubstExpr->getSourceRange().getBegin(),
+             diag::note_concept_specialization_constraint_evaluated_to_false)
+          << (int)First << CSE;
+    }
+    S.DiagnoseUnsatisfiedConstraint(CSE->getSatisfaction());
+    return;
+  }
+
+  S.Diag(SubstExpr->getSourceRange().getBegin(),
+         diag::note_atomic_constraint_evaluated_to_false)
+      << (int)First << SubstExpr;
+}
+
+template<typename SubstitutionDiagnostic>
+static void diagnoseUnsatisfiedConstraintExpr(
+    Sema &S, const Expr *E,
+    const llvm::PointerUnion<Expr *, SubstitutionDiagnostic *> &Record,
+    bool First = true) {
+  if (auto *Diag = Record.template dyn_cast<SubstitutionDiagnostic *>()){
+    S.Diag(Diag->first, diag::note_substituted_constraint_expr_is_ill_formed)
+        << Diag->second;
+    return;
+  }
+
+  diagnoseWellFormedUnsatisfiedConstraintExpr(S,
+      Record.template get<Expr *>(), First);
+}
+
+void Sema::DiagnoseUnsatisfiedConstraint(
+    const ConstraintSatisfaction& Satisfaction) {
+  assert(!Satisfaction.IsSatisfied &&
+         "Attempted to diagnose a satisfied constraint");
+  bool First = true;
+  for (auto &Pair : Satisfaction.Details) {
+    diagnoseUnsatisfiedConstraintExpr(*this, Pair.first, Pair.second, First);
+    First = false;
+  }
+}
+
+void Sema::DiagnoseUnsatisfiedConstraint(
+    const ASTConstraintSatisfaction &Satisfaction) {
+  assert(!Satisfaction.IsSatisfied &&
+         "Attempted to diagnose a satisfied constraint");
+  bool First = true;
+  for (auto &Pair : Satisfaction) {
+    diagnoseUnsatisfiedConstraintExpr(*this, Pair.first, Pair.second, First);
+    First = false;
+  }
 }
\ No newline at end of file
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -14486,8 +14486,16 @@
       std::string InnerCondDescription;
       std::tie(InnerCond, InnerCondDescription) =
         findFailedBooleanCondition(Converted.get());
-      if (InnerCond && !isa<CXXBoolLiteralExpr>(InnerCond)
-                    && !isa<IntegerLiteral>(InnerCond)) {
+      if (InnerCond && isa<ConceptSpecializationExpr>(InnerCond)) {
+        // Drill down into concept specialization expressions to see why they
+        // weren't satisfied.
+        Diag(StaticAssertLoc, diag::err_static_assert_failed)
+          << !AssertMessage << Msg.str() << AssertExpr->getSourceRange();
+        ConstraintSatisfaction Satisfaction;
+        if (!CheckConstraintSatisfaction(InnerCond, Satisfaction))
+          DiagnoseUnsatisfiedConstraint(Satisfaction);
+      } else if (InnerCond && !isa<CXXBoolLiteralExpr>(InnerCond)
+                           && !isa<IntegerLiteral>(InnerCond)) {
         Diag(StaticAssertLoc, diag::err_static_assert_requirement_failed)
           << InnerCondDescription << !AssertMessage
           << Msg.str() << InnerCond->getSourceRange();
diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp
--- a/clang/lib/Sema/SemaObjCProperty.cpp
+++ b/clang/lib/Sema/SemaObjCProperty.cpp
@@ -1058,11 +1058,13 @@
                           SourceLocation PropertyLoc) {
   ObjCMethodDecl *Decl = AccessorDecl;
   ObjCMethodDecl *ImplDecl = ObjCMethodDecl::Create(
-      Context, AtLoc, PropertyLoc, Decl->getSelector(), Decl->getReturnType(),
+      Context, AtLoc.isValid() ? AtLoc : Decl->getBeginLoc(),
+      PropertyLoc.isValid() ? PropertyLoc : Decl->getEndLoc(),
+      Decl->getSelector(), Decl->getReturnType(),
       Decl->getReturnTypeSourceInfo(), Impl, Decl->isInstanceMethod(),
-      Decl->isVariadic(), Decl->isPropertyAccessor(), /* isSynthesized*/ true,
-      Decl->isImplicit(), Decl->isDefined(), Decl->getImplementationControl(),
-      Decl->hasRelatedResultType());
+      Decl->isVariadic(), Decl->isPropertyAccessor(),
+      /* isSynthesized*/ true, Decl->isImplicit(), Decl->isDefined(),
+      Decl->getImplementationControl(), Decl->hasRelatedResultType());
   ImplDecl->getMethodFamily();
   if (Decl->hasAttrs())
     ImplDecl->setAttrs(Decl->getAttrs());
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -591,6 +591,12 @@
     TemplateArgumentList *TemplateArgs;
     unsigned CallArgIndex;
   };
+  // Structure used by DeductionFailureInfo to store information about
+  // unsatisfied constraints.
+  struct CNSInfo {
+    TemplateArgumentList *TemplateArgs;
+    ConstraintSatisfaction Satisfaction;
+  };
 }
 
 /// Convert from Sema's representation of template deduction information
@@ -661,6 +667,14 @@
     }
     break;
 
+  case Sema::TDK_ConstraintsNotSatisfied: {
+    CNSInfo *Saved = new (Context) CNSInfo;
+    Saved->TemplateArgs = Info.take();
+    Saved->Satisfaction = Info.AssociatedConstraintsSatisfaction;
+    Result.Data = Saved;
+    break;
+  }
+
   case Sema::TDK_Success:
   case Sema::TDK_NonDependentConversionFailure:
     llvm_unreachable("not a deduction failure");
@@ -701,6 +715,15 @@
     }
     break;
 
+  case Sema::TDK_ConstraintsNotSatisfied:
+    // FIXME: Destroy the template argument list?
+    Data = nullptr;
+    if (PartialDiagnosticAt *Diag = getSFINAEDiagnostic()) {
+      Diag->~PartialDiagnosticAt();
+      HasDiagnostic = false;
+    }
+    break;
+
   // Unhandled
   case Sema::TDK_MiscellaneousDeductionFailure:
     break;
@@ -726,6 +749,7 @@
   case Sema::TDK_NonDeducedMismatch:
   case Sema::TDK_CUDATargetMismatch:
   case Sema::TDK_NonDependentConversionFailure:
+  case Sema::TDK_ConstraintsNotSatisfied:
     return TemplateParameter();
 
   case Sema::TDK_Incomplete:
@@ -769,6 +793,9 @@
   case Sema::TDK_SubstitutionFailure:
     return static_cast<TemplateArgumentList*>(Data);
 
+  case Sema::TDK_ConstraintsNotSatisfied:
+    return static_cast<CNSInfo*>(Data)->TemplateArgs;
+
   // Unhandled
   case Sema::TDK_MiscellaneousDeductionFailure:
     break;
@@ -789,6 +816,7 @@
   case Sema::TDK_SubstitutionFailure:
   case Sema::TDK_CUDATargetMismatch:
   case Sema::TDK_NonDependentConversionFailure:
+  case Sema::TDK_ConstraintsNotSatisfied:
     return nullptr;
 
   case Sema::TDK_IncompletePack:
@@ -820,6 +848,7 @@
   case Sema::TDK_SubstitutionFailure:
   case Sema::TDK_CUDATargetMismatch:
   case Sema::TDK_NonDependentConversionFailure:
+  case Sema::TDK_ConstraintsNotSatisfied:
     return nullptr;
 
   case Sema::TDK_Inconsistent:
@@ -1255,6 +1284,8 @@
     return NewTarget != OldTarget;
   }
 
+  // TODO: Concepts: Check function trailing requires clauses here.
+
   // The signatures match; this is not an overload.
   return false;
 }
@@ -10352,6 +10383,21 @@
     MaybeEmitInheritedConstructorNote(S, Found);
     return;
 
+  case Sema::TDK_ConstraintsNotSatisfied: {
+    // Format the template argument list into the argument string.
+    SmallString<128> TemplateArgString;
+    TemplateArgumentList *Args = DeductionFailure.getTemplateArgumentList();
+    TemplateArgString = " ";
+    TemplateArgString += S.getTemplateArgumentBindingsText(
+        getDescribedTemplate(Templated)->getTemplateParameters(), *Args);
+    S.Diag(Templated->getLocation(),
+           diag::note_ovl_candidate_unsatisfied_constraints)
+        << TemplateArgString;
+
+    S.DiagnoseUnsatisfiedConstraint(
+        static_cast<CNSInfo*>(DeductionFailure.Data)->Satisfaction);
+    return;
+  }
   case Sema::TDK_TooManyArguments:
   case Sema::TDK_TooFewArguments:
     DiagnoseArityMismatch(S, Found, Templated, NumArgs);
@@ -10804,6 +10850,7 @@
 
   case Sema::TDK_SubstitutionFailure:
   case Sema::TDK_DeducedMismatch:
+  case Sema::TDK_ConstraintsNotSatisfied:
   case Sema::TDK_DeducedMismatchNested:
   case Sema::TDK_NonDeducedMismatch:
   case Sema::TDK_MiscellaneousDeductionFailure:
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -3213,8 +3213,7 @@
 
   TemplateDecl *Template = Name.getAsTemplateDecl();
   if (!Template || isa<FunctionTemplateDecl>(Template) ||
-      isa<VarTemplateDecl>(Template) ||
-      isa<ConceptDecl>(Template)) {
+      isa<VarTemplateDecl>(Template) || isa<ConceptDecl>(Template)) {
     // We might have a substituted template template parameter pack. If so,
     // build a template specialization type for it.
     if (Name.getAsSubstTemplateTemplateParmPack())
@@ -3230,7 +3229,8 @@
   // template.
   SmallVector<TemplateArgument, 4> Converted;
   if (CheckTemplateArgumentList(Template, TemplateLoc, TemplateArgs,
-                                false, Converted))
+                                false, Converted,
+                                /*UpdateArgsWithConversion=*/true))
     return QualType();
 
   QualType CanonType;
@@ -3238,6 +3238,7 @@
   bool InstantiationDependent = false;
   if (TypeAliasTemplateDecl *AliasTemplate =
           dyn_cast<TypeAliasTemplateDecl>(Template)) {
+
     // Find the canonical type for this type alias template specialization.
     TypeAliasDecl *Pattern = AliasTemplate->getTemplatedDecl();
     if (Pattern->isInvalidDecl())
@@ -3875,7 +3876,8 @@
   // template.
   SmallVector<TemplateArgument, 4> Converted;
   if (CheckTemplateArgumentList(VarTemplate, TemplateNameLoc, TemplateArgs,
-                                false, Converted))
+                                false, Converted,
+                                /*UpdateArgsWithConversion=*/true))
     return true;
 
   // Find the variable template (partial) specialization declaration that
@@ -4046,7 +4048,7 @@
   if (CheckTemplateArgumentList(
           Template, TemplateNameLoc,
           const_cast<TemplateArgumentListInfo &>(TemplateArgs), false,
-          Converted))
+          Converted, /*UpdateArgsWithConversion=*/true))
     return true;
 
   // Find the variable template specialization declaration that
@@ -4237,7 +4239,7 @@
                                 /*UpdateArgsWithConversion=*/false))
     return ExprError();
 
-  Optional<bool> IsSatisfied;
+  ConstraintSatisfaction Satisfaction;
   bool AreArgsDependent = false;
   for (TemplateArgument &Arg : Converted) {
     if (Arg.isDependent()) {
@@ -4245,25 +4247,21 @@
       break;
     }
   }
-  if (!AreArgsDependent) {
-    InstantiatingTemplate Inst(*this, ConceptNameLoc,
-        InstantiatingTemplate::ConstraintsCheck{}, NamedConcept, Converted,
-        SourceRange(SS.isSet() ? SS.getBeginLoc() : ConceptNameLoc,
-                    TemplateArgs->getRAngleLoc()));
-    MultiLevelTemplateArgumentList MLTAL;
-    MLTAL.addOuterTemplateArguments(Converted);
-    bool Satisfied;
-    if (CalculateConstraintSatisfaction(NamedConcept, MLTAL,
-                                        NamedConcept->getConstraintExpr(),
-                                        Satisfied))
+  if (!AreArgsDependent &&
+      CheckConstraintSatisfaction(NamedConcept,
+                                  {NamedConcept->getConstraintExpr()},
+                                  Converted,
+                                  SourceRange(SS.isSet() ? SS.getBeginLoc() :
+                                                           ConceptNameLoc,
+                                              TemplateArgs->getRAngleLoc()),
+                                  Satisfaction))
       return ExprError();
-    IsSatisfied = Satisfied;
-  }
+
   return ConceptSpecializationExpr::Create(Context,
       SS.isSet() ? SS.getWithLocInContext(Context) : NestedNameSpecifierLoc{},
       TemplateKWLoc, ConceptNameLoc, FoundDecl, NamedConcept,
       ASTTemplateArgumentListInfo::Create(Context, *TemplateArgs), Converted,
-      IsSatisfied);
+      AreArgsDependent ? nullptr : &Satisfaction);
 }
 
 ExprResult Sema::BuildTemplateIdExpr(const CXXScopeSpec &SS,
@@ -5209,7 +5207,11 @@
     TemplateDecl *Template, SourceLocation TemplateLoc,
     TemplateArgumentListInfo &TemplateArgs, bool PartialTemplateArgs,
     SmallVectorImpl<TemplateArgument> &Converted,
-    bool UpdateArgsWithConversions) {
+    bool UpdateArgsWithConversions, bool *ConstraintsNotSatisfied) {
+
+  if (ConstraintsNotSatisfied)
+    *ConstraintsNotSatisfied = false;
+
   // Make a copy of the template arguments for processing.  Only make the
   // changes at the end when successful in matching the arguments to the
   // template.
@@ -5324,7 +5326,6 @@
       if ((*Param)->isTemplateParameterPack() && !ArgumentPack.empty())
         Converted.push_back(
             TemplateArgument::CreatePackCopy(Context, ArgumentPack));
-
       return false;
     }
 
@@ -5463,6 +5464,15 @@
   if (UpdateArgsWithConversions)
     TemplateArgs = std::move(NewArgs);
 
+  if (!PartialTemplateArgs &&
+      EnsureTemplateArgumentListConstraints(
+        Template, Converted, SourceRange(TemplateLoc,
+                                         TemplateArgs.getRAngleLoc()))) {
+    if (ConstraintsNotSatisfied)
+      *ConstraintsNotSatisfied = true;
+    return true;
+  }
+
   return false;
 }
 
@@ -6968,100 +6978,73 @@
 
   ValueDecl *VD = Arg.getAsDecl();
 
-  if (VD->getDeclContext()->isRecord() &&
-      (isa<CXXMethodDecl>(VD) || isa<FieldDecl>(VD) ||
-       isa<IndirectFieldDecl>(VD))) {
-    // If the value is a class member, we might have a pointer-to-member.
-    // Determine whether the non-type template template parameter is of
-    // pointer-to-member type. If so, we need to build an appropriate
-    // expression for a pointer-to-member, since a "normal" DeclRefExpr
-    // would refer to the member itself.
-    if (ParamType->isMemberPointerType()) {
-      QualType ClassType
-        = Context.getTypeDeclType(cast<RecordDecl>(VD->getDeclContext()));
-      NestedNameSpecifier *Qualifier
-        = NestedNameSpecifier::Create(Context, nullptr, false,
-                                      ClassType.getTypePtr());
-      CXXScopeSpec SS;
-      SS.MakeTrivial(Context, Qualifier, Loc);
-
-      // The actual value-ness of this is unimportant, but for
-      // internal consistency's sake, references to instance methods
-      // are r-values.
-      ExprValueKind VK = VK_LValue;
-      if (isa<CXXMethodDecl>(VD) && cast<CXXMethodDecl>(VD)->isInstance())
-        VK = VK_RValue;
-
-      ExprResult RefExpr = BuildDeclRefExpr(VD,
-                                            VD->getType().getNonReferenceType(),
-                                            VK,
-                                            Loc,
-                                            &SS);
-      if (RefExpr.isInvalid())
-        return ExprError();
-
-      RefExpr = CreateBuiltinUnaryOp(Loc, UO_AddrOf, RefExpr.get());
-
-      // We might need to perform a trailing qualification conversion, since
-      // the element type on the parameter could be more qualified than the
-      // element type in the expression we constructed, and likewise for a
-      // function conversion.
-      bool ObjCLifetimeConversion;
-      QualType Ignored;
-      if (IsFunctionConversion(RefExpr.get()->getType(), ParamType, Ignored) ||
-          IsQualificationConversion(RefExpr.get()->getType(),
-                                    ParamType.getUnqualifiedType(), false,
-                                    ObjCLifetimeConversion))
-        RefExpr = ImpCastExprToType(RefExpr.get(),
-                                    ParamType.getUnqualifiedType(), CK_NoOp);
-
-      // FIXME: We need to perform derived-to-base or base-to-derived
-      // pointer-to-member conversions here too.
-      assert(!RefExpr.isInvalid() &&
-             Context.hasSameType(RefExpr.get()->getType(),
-                                 ParamType.getUnqualifiedType()));
-      return RefExpr;
-    }
-  }
-
-  QualType T = VD->getType().getNonReferenceType();
+  CXXScopeSpec SS;
+  if (ParamType->isMemberPointerType()) {
+    // If this is a pointer to member, we need to use a qualified name to
+    // form a suitable pointer-to-member constant.
+    assert(VD->getDeclContext()->isRecord() &&
+           (isa<CXXMethodDecl>(VD) || isa<FieldDecl>(VD) ||
+            isa<IndirectFieldDecl>(VD)));
+    QualType ClassType
+      = Context.getTypeDeclType(cast<RecordDecl>(VD->getDeclContext()));
+    NestedNameSpecifier *Qualifier
+      = NestedNameSpecifier::Create(Context, nullptr, false,
+                                    ClassType.getTypePtr());
+    SS.MakeTrivial(Context, Qualifier, Loc);
+  }
+
+  ExprResult RefExpr = BuildDeclarationNameExpr(
+      SS, DeclarationNameInfo(VD->getDeclName(), Loc), VD);
+  if (RefExpr.isInvalid())
+    return ExprError();
 
-  if (ParamType->isPointerType()) {
-    // When the non-type template parameter is a pointer, take the
-    // address of the declaration.
-    ExprResult RefExpr = BuildDeclRefExpr(VD, T, VK_LValue, Loc);
+  // For a pointer, the argument declaration is the pointee. Take its address.
+  QualType ElemT(RefExpr.get()->getType()->getArrayElementTypeNoTypeQual(), 0);
+  if (ParamType->isPointerType() && !ElemT.isNull() &&
+      Context.hasSimilarType(ElemT, ParamType->getPointeeType())) {
+    // Decay an array argument if we want a pointer to its first element.
+    RefExpr = DefaultFunctionArrayConversion(RefExpr.get());
     if (RefExpr.isInvalid())
       return ExprError();
-
-    if (!Context.hasSameUnqualifiedType(ParamType->getPointeeType(), T) &&
-        (T->isFunctionType() || T->isArrayType())) {
-      // Decay functions and arrays unless we're forming a pointer to array.
-      RefExpr = DefaultFunctionArrayConversion(RefExpr.get());
-      if (RefExpr.isInvalid())
-        return ExprError();
-
-      return RefExpr;
+  } else if (ParamType->isPointerType() || ParamType->isMemberPointerType()) {
+    // For any other pointer, take the address (or form a pointer-to-member).
+    RefExpr = CreateBuiltinUnaryOp(Loc, UO_AddrOf, RefExpr.get());
+    if (RefExpr.isInvalid())
+      return ExprError();
+  } else {
+    assert(ParamType->isReferenceType() &&
+           "unexpected type for decl template argument");
+  }
+
+  // At this point we should have the right value category.
+  assert(ParamType->isReferenceType() == RefExpr.get()->isLValue() &&
+         "value kind mismatch for non-type template argument");
+
+  // The type of the template parameter can differ from the type of the
+  // argument in various ways; convert it now if necessary.
+  QualType DestExprType = ParamType.getNonLValueExprType(Context);
+  if (!Context.hasSameType(RefExpr.get()->getType(), DestExprType)) {
+    CastKind CK;
+    QualType Ignored;
+    if (Context.hasSimilarType(RefExpr.get()->getType(), DestExprType) ||
+        IsFunctionConversion(RefExpr.get()->getType(), DestExprType, Ignored)) {
+      CK = CK_NoOp;
+    } else if (ParamType->isVoidPointerType() &&
+               RefExpr.get()->getType()->isPointerType()) {
+      CK = CK_BitCast;
+    } else {
+      // FIXME: Pointers to members can need conversion derived-to-base or
+      // base-to-derived conversions. We currently don't retain enough
+      // information to convert properly (we need to track a cast path or
+      // subobject number in the template argument).
+      llvm_unreachable(
+          "unexpected conversion required for non-type template argument");
     }
-
-    // Take the address of everything else
-    return CreateBuiltinUnaryOp(Loc, UO_AddrOf, RefExpr.get());
-  }
-
-  ExprValueKind VK = VK_RValue;
-
-  // If the non-type template parameter has reference type, qualify the
-  // resulting declaration reference with the extra qualifiers on the
-  // type that the reference refers to.
-  if (const ReferenceType *TargetRef = ParamType->getAs<ReferenceType>()) {
-    VK = VK_LValue;
-    T = Context.getQualifiedType(T,
-                              TargetRef->getPointeeType().getQualifiers());
-  } else if (isa<FunctionDecl>(VD)) {
-    // References to functions are always lvalues.
-    VK = VK_LValue;
+    RefExpr = ImpCastExprToType(RefExpr.get(), DestExprType, CK,
+                                RefExpr.get()->getValueKind());
   }
 
-  return BuildDeclRefExpr(VD, T, VK, Loc);
+  return RefExpr;
 }
 
 /// Construct a new expression that refers to the given
@@ -7803,7 +7786,8 @@
   // template.
   SmallVector<TemplateArgument, 4> Converted;
   if (CheckTemplateArgumentList(ClassTemplate, TemplateNameLoc,
-                                TemplateArgs, false, Converted))
+                                TemplateArgs, false, Converted,
+                                /*UpdateArgsWithConversion=*/true))
     return true;
 
   // Find the class template (partial) specialization declaration that
@@ -9049,7 +9033,8 @@
   // template.
   SmallVector<TemplateArgument, 4> Converted;
   if (CheckTemplateArgumentList(ClassTemplate, TemplateNameLoc,
-                                TemplateArgs, false, Converted))
+                                TemplateArgs, false, Converted,
+                                /*UpdateArgsWithConversion=*/true))
     return true;
 
   // Find the class template specialization declaration that
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -2591,6 +2591,23 @@
   return ConvertArg(Arg, 0);
 }
 
+template<typename TemplateDeclT>
+static Sema::TemplateDeductionResult
+CheckDeducedArgumentConstraints(Sema& S, TemplateDeclT *Template,
+                                ArrayRef<TemplateArgument> DeducedArgs,
+                                TemplateDeductionInfo &Info) {
+  llvm::SmallVector<const Expr *, 3> AssociatedConstraints;
+  Template->getAssociatedConstraints(AssociatedConstraints);
+  if (S.CheckConstraintSatisfaction(Template, AssociatedConstraints,
+                                    DeducedArgs, Info.getLocation(),
+                                    Info.AssociatedConstraintsSatisfaction) ||
+      !Info.AssociatedConstraintsSatisfaction.IsSatisfied) {
+    Info.reset(TemplateArgumentList::CreateCopy(S.Context, DeducedArgs));
+    return Sema::TDK_ConstraintsNotSatisfied;
+  }
+  return Sema::TDK_Success;
+}
+
 // FIXME: This should not be a template, but
 // ClassTemplatePartialSpecializationDecl sadly does not derive from
 // TemplateDecl.
@@ -2688,6 +2705,10 @@
     // If we get here, we successfully used the default template argument.
   }
 
+  if (Sema::TemplateDeductionResult Result
+        = CheckDeducedArgumentConstraints(S, Template, Builder, Info))
+    return Result;
+
   return Sema::TDK_Success;
 }
 
@@ -2767,10 +2788,14 @@
     return Sema::TDK_SubstitutionFailure;
   }
 
+  bool ConstraintsNotSatisfied;
   SmallVector<TemplateArgument, 4> ConvertedInstArgs;
   if (S.CheckTemplateArgumentList(Template, Partial->getLocation(), InstArgs,
-                                  false, ConvertedInstArgs))
-    return Sema::TDK_SubstitutionFailure;
+                                  false, ConvertedInstArgs,
+                                  /*UpdateArgsWithConversions=*/true,
+                                  &ConstraintsNotSatisfied))
+    return ConstraintsNotSatisfied ? Sema::TDK_ConstraintsNotSatisfied :
+                                     Sema::TDK_SubstitutionFailure;
 
   TemplateParameterList *TemplateParams = Template->getTemplateParameters();
   for (unsigned I = 0, E = TemplateParams->size(); I != E; ++I) {
@@ -2831,7 +2856,6 @@
   return Sema::TDK_Success;
 }
 
-
 /// Perform template argument deduction to determine whether
 /// the given template arguments match the given class template
 /// partial specialization per C++ [temp.class.spec.match].
@@ -5049,6 +5073,7 @@
 static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2,
                                      TemplateLikeDecl *P2,
                                      TemplateDeductionInfo &Info) {
+  // TODO: Concepts: Regard constraints
   // C++ [temp.class.order]p1:
   //   For two class template partial specializations, the first is at least as
   //   specialized as the second if, given the following rewrite to two
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -363,7 +363,7 @@
 
 Sema::InstantiatingTemplate::InstantiatingTemplate(
     Sema &SemaRef, SourceLocation PointOfInstantiation,
-    ConstraintsCheck, TemplateDecl *Template,
+    ConstraintsCheck, NamedDecl *Template,
     ArrayRef<TemplateArgument> TemplateArgs, SourceRange InstantiationRange)
     : InstantiatingTemplate(
           SemaRef, CodeSynthesisContext::ConstraintsCheck,
@@ -372,7 +372,7 @@
 
 Sema::InstantiatingTemplate::InstantiatingTemplate(
     Sema &SemaRef, SourceLocation PointOfInstantiation,
-    ConstraintSubstitution, TemplateDecl *Template,
+    ConstraintSubstitution, NamedDecl *Template,
     sema::TemplateDeductionInfo &DeductionInfo, SourceRange InstantiationRange)
     : InstantiatingTemplate(
           SemaRef, CodeSynthesisContext::ConstraintSubstitution,
@@ -691,24 +691,27 @@
     case CodeSynthesisContext::Memoization:
       break;
     
-    case CodeSynthesisContext::ConstraintsCheck:
-      if (auto *CD = dyn_cast<ConceptDecl>(Active->Entity)) {
-        SmallVector<char, 128> TemplateArgsStr;
-        llvm::raw_svector_ostream OS(TemplateArgsStr);
-        CD->printName(OS);
-        printTemplateArgumentList(OS, Active->template_arguments(),
-                                  getPrintingPolicy());
-        Diags.Report(Active->PointOfInstantiation,
-                     diag::note_concept_specialization_here)
-          << OS.str()
-          << Active->InstantiationRange;
-        break;
+    case CodeSynthesisContext::ConstraintsCheck: {
+      unsigned DiagID = 0;
+      if (isa<ConceptDecl>(Active->Entity))
+        DiagID = diag::note_concept_specialization_here;
+      else if (isa<TemplateDecl>(Active->Entity))
+        DiagID = diag::note_checking_constraints_for_template_id_here;
+      else if (isa<VarTemplatePartialSpecializationDecl>(Active->Entity))
+        DiagID = diag::note_checking_constraints_for_var_spec_id_here;
+      else {
+        assert(isa<ClassTemplatePartialSpecializationDecl>(Active->Entity));
+        DiagID = diag::note_checking_constraints_for_class_spec_id_here;
       }
-      // TODO: Concepts - implement this for constrained templates and partial
-      // specializations.
-      llvm_unreachable("only concept constraints are supported right now");
+      SmallVector<char, 128> TemplateArgsStr;
+      llvm::raw_svector_ostream OS(TemplateArgsStr);
+      cast<NamedDecl>(Active->Entity)->printName(OS);
+      printTemplateArgumentList(OS, Active->template_arguments(),
+                                getPrintingPolicy());
+      Diags.Report(Active->PointOfInstantiation, DiagID) << OS.str()
+        << Active->InstantiationRange;
       break;
-      
+    }
     case CodeSynthesisContext::ConstraintSubstitution:
       Diags.Report(Active->PointOfInstantiation,
                    diag::note_constraint_substitution_here)
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3296,7 +3296,8 @@
                                         D->getLocation(),
                                         InstTemplateArgs,
                                         false,
-                                        Converted))
+                                        Converted,
+                                        /*UpdateArgsWithConversion=*/true))
     return nullptr;
 
   // Figure out where to insert this class template explicit specialization
@@ -3417,7 +3418,8 @@
   // Check that the template argument list is well-formed for this template.
   SmallVector<TemplateArgument, 4> Converted;
   if (SemaRef.CheckTemplateArgumentList(InstVarTemplate, D->getLocation(),
-                                        VarTemplateArgsInfo, false, Converted))
+                                        VarTemplateArgsInfo, false, Converted,
+                                        /*UpdateArgsWithConversion=*/true))
     return nullptr;
 
   // Check whether we've already seen a declaration of this specialization.
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Serialization/ASTReader.h"
+#include "clang/AST/ASTConcept.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/AttrIterator.h"
 #include "clang/AST/Decl.h"
@@ -742,14 +743,33 @@
   E->TemplateKWLoc = Record.readSourceLocation();
   E->ConceptNameLoc = Record.readSourceLocation();
   E->FoundDecl = ReadDeclAs<NamedDecl>();
-  E->NamedConcept.setPointer(ReadDeclAs<ConceptDecl>());
+  E->NamedConcept = ReadDeclAs<ConceptDecl>();
   const ASTTemplateArgumentListInfo *ArgsAsWritten =
       Record.readASTTemplateArgumentListInfo();
   llvm::SmallVector<TemplateArgument, 4> Args;
   for (unsigned I = 0; I < NumTemplateArgs; ++I)
     Args.push_back(Record.readTemplateArgument());
   E->setTemplateArguments(ArgsAsWritten, Args);
-  E->NamedConcept.setInt(Record.readInt() == 1);
+  ConstraintSatisfaction Satisfaction;
+  Satisfaction.IsSatisfied = Record.readInt();
+  if (!Satisfaction.IsSatisfied) {
+    unsigned NumDetailRecords = Record.readInt();
+    for (unsigned i = 0; i != NumDetailRecords; ++i) {
+      Expr *ConstraintExpr = Record.readExpr();
+      bool IsDiagnostic = Record.readInt();
+      if (IsDiagnostic) {
+        SourceLocation DiagLocation = Record.readSourceLocation();
+        std::string DiagMessage = Record.readString();
+        Satisfaction.Details.emplace_back(
+            ConstraintExpr, new (Record.getContext())
+                                ConstraintSatisfaction::SubstitutionDiagnostic{
+                                    DiagLocation, DiagMessage});
+      } else
+        Satisfaction.Details.emplace_back(ConstraintExpr, Record.readExpr());
+    }
+  }
+  E->Satisfaction = ASTConstraintSatisfaction::Create(Record.getContext(),
+                                                      Satisfaction);
 }
 
 void ASTStmtReader::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -401,7 +401,25 @@
   Record.AddASTTemplateArgumentListInfo(E->getTemplateArgsAsWritten());
   for (const TemplateArgument &Arg : TemplateArgs)
     Record.AddTemplateArgument(Arg);
-  Record.push_back(E->isSatisfied());
+  const ASTConstraintSatisfaction &Satisfaction = E->getSatisfaction();
+  Record.push_back(Satisfaction.IsSatisfied);
+  if (!Satisfaction.IsSatisfied) {
+    Record.push_back(Satisfaction.NumRecords);
+    for (const auto &DetailRecord : Satisfaction) {
+      Record.AddStmt(const_cast<Expr *>(DetailRecord.first));
+      auto *E = DetailRecord.second.dyn_cast<Expr *>();
+      Record.push_back(E == nullptr);
+      if (E)
+        Record.AddStmt(E);
+      else {
+        auto *Diag = DetailRecord.second.get<std::pair<SourceLocation,
+                                                       StringRef> *>();
+        Record.AddSourceLocation(Diag->first);
+        Record.AddString(Diag->second);
+      }
+    }
+  }
+
   Code = serialization::EXPR_CONCEPT_SPECIALIZATION;
 }
 
diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.id/p3.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.id/p3.cpp
--- a/clang/test/CXX/expr/expr.prim/expr.prim.id/p3.cpp
+++ b/clang/test/CXX/expr/expr.prim/expr.prim.id/p3.cpp
@@ -72,6 +72,15 @@
 static_assert(IsTypePredicate<T2>);
 static_assert(!IsTypePredicate<T1>);
 
+template<typename T, typename U, typename... Ts>
+concept OneOf = (Same<T, Ts> || ...);
+
+template<typename... X>
+constexpr bool S = OneOf<X..., int, int>;
+
+static_assert(S<int, long, int>);
+static_assert(!S<long, int, char, char>);
+
 namespace piecewise_substitution {
   template <typename T>
   concept True = true;
@@ -147,3 +156,25 @@
 struct X { static constexpr bool a = SameSize<T>; };
 
 static_assert(X<unsigned>::a);
+
+// static_assert concept diagnostics
+template<typename T>
+concept Large = sizeof(T) > 100;
+// expected-note@-1 2{{because 'sizeof(small) > 100' (1 > 100) evaluated to false}}
+
+struct small { };
+static_assert(Large<small>);
+// expected-error@-1 {{static_assert failed}}
+// expected-note@-2 {{because 'small' does not satisfy 'Large'}}
+static_assert(Large<small>, "small isn't large");
+// expected-error@-1 {{static_assert failed "small isn't large"}}
+// expected-note@-2 {{because 'small' does not satisfy 'Large'}}
+
+// Make sure access-checking can fail a concept specialization
+
+class T4 { static constexpr bool f = true; };
+template<typename T> concept AccessPrivate = T{}.f;
+// expected-note@-1{{because substituted constraint expression is ill-formed: 'f' is a private member of 'T4'}}
+static_assert(AccessPrivate<T4>);
+// expected-error@-1{{static_assert failed}}
+// expected-note@-2{{because 'T4' does not satisfy 'AccessPrivate'}}
diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.constr/function-templates.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.constr/function-templates.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/CXX/temp/temp.constr/temp.constr.constr/function-templates.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ -verify %s
+
+template<typename T>
+constexpr bool is_ptr_v = false;
+
+template<typename T>
+constexpr bool is_ptr_v<T*> = true;
+
+template<typename T, typename U>
+constexpr bool is_same_v = false;
+
+template<typename T>
+constexpr bool is_same_v<T, T> = true;
+
+template<typename T> requires is_ptr_v<T> // expected-note   {{because 'is_ptr_v<int>' evaluated to false}}
+                         // expected-note@-1{{because 'is_ptr_v<char>' evaluated to false}}
+auto dereference(T t) { // expected-note   {{candidate template ignored: constraints not satisfied [with T = int]}}
+                        // expected-note@-1{{candidate template ignored: constraints not satisfied [with T = char]}}
+  return *t;
+}
+
+static_assert(is_same_v<decltype(dereference<int*>(nullptr)), int>);
+static_assert(is_same_v<decltype(dereference(2)), int>); // expected-error {{no matching function for call to 'dereference'}}
+static_assert(is_same_v<decltype(dereference<char>('a')), char>); // expected-error {{no matching function for call to 'dereference'}}
+
+
+template<typename T> requires T{} + T{} // expected-note {{because substituted constraint expression is ill-formed: invalid operands to binary expression ('A' and 'A')}}
+auto foo(T t) { // expected-note {{candidate template ignored: constraints not satisfied [with T = A]}}
+  return t + t;
+}
+
+
+template<typename T> requires !((T{} - T{}) && (T{} + T{})) || false
+// expected-note@-1{{because substituted constraint expression is ill-formed: invalid operands to binary expression ('A' and 'A')}}
+// expected-note@-2{{and 'false' evaluated to false}}
+auto bar(T t) { // expected-note {{candidate template ignored: constraints not satisfied [with T = A]}}
+  return t + t;
+}
+
+struct A { };
+
+static_assert(foo(A{})); // expected-error {{no matching function for call to 'foo'}}
+static_assert(bar(A{})); // expected-error {{no matching function for call to 'bar'}}
\ No newline at end of file
diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.constr/non-function-templates.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.constr/non-function-templates.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/CXX/temp/temp.constr/temp.constr.constr/non-function-templates.cpp
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ -verify %s
+
+template<typename T> requires sizeof(T) >= 2 // expected-note{{because 'sizeof(char) >= 2' (1 >= 2) evaluated to false}}
+struct A {
+  static constexpr int value = sizeof(T);
+};
+
+static_assert(A<int>::value == 4);
+static_assert(A<char>::value == 1); // expected-error{{constraints not satisfied for class template 'A' [with T = char]}}
+
+template<typename T, typename U>
+  requires sizeof(T) != sizeof(U) // expected-note{{because 'sizeof(int) != sizeof(char [4])' (4 != 4) evaluated to false}}
+           && sizeof(T) >= 4 // expected-note{{because 'sizeof(char) >= 4' (1 >= 4) evaluated to false}}
+constexpr int SizeDiff = sizeof(T) > sizeof(U) ? sizeof(T) - sizeof(U) : sizeof(U) - sizeof(T);
+
+static_assert(SizeDiff<int, char> == 3);
+static_assert(SizeDiff<int, char[4]> == 0); // expected-error{{constraints not satisfied for variable template 'SizeDiff' [with T = int, U = char [4]]}}
+static_assert(SizeDiff<char, int> == 3); // expected-error{{constraints not satisfied for variable template 'SizeDiff' [with T = char, U = int]}}
+
+template<typename... Ts>
+  requires ((sizeof(Ts) == 4) || ...) // expected-note{{because 'sizeof(char) == 4' (1 == 4) evaluated to false}} expected-note{{'sizeof(long long) == 4' (8 == 4) evaluated to false}} expected-note{{'sizeof(int [20]) == 4' (80 == 4) evaluated to false}}
+constexpr auto SumSizes = (sizeof(Ts) + ...);
+
+static_assert(SumSizes<char, long long, int> == 13);
+static_assert(SumSizes<char, long long, int[20]> == 89); // expected-error{{constraints not satisfied for variable template 'SumSizes' [with Ts = <char, long long, int [20]>]}}
+
+template<typename T>
+concept IsBig = sizeof(T) > 100; // expected-note{{because 'sizeof(int) > 100' (4 > 100) evaluated to false}}
+
+template<typename T>
+  requires IsBig<T> // expected-note{{'int' does not satisfy 'IsBig'}}
+using BigPtr = T*;
+
+static_assert(sizeof(BigPtr<int>)); // expected-error{{constraints not satisfied for alias template 'BigPtr' [with T = int]}}}}
+
+template<typename T> requires T::value // expected-note{{because substituted constraint expression is ill-formed: type 'int' cannot be used prior to '::' because it has no members}}
+struct S { static constexpr bool value = true; };
+
+struct S2 { static constexpr bool value = true; };
+
+static_assert(S<int>::value); // expected-error{{constraints not satisfied for class template 'S' [with T = int]}}
+static_assert(S<S2>::value);
+
+template<typename T>
+struct AA
+{
+    template<typename U> requires sizeof(U) == sizeof(T) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}}
+    struct B
+    {
+        static constexpr int a = 0;
+    };
+
+    template<typename U> requires sizeof(U) == sizeof(T) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}}
+    static constexpr int b = 1;
+
+    template<typename U> requires sizeof(U) == sizeof(T) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}}
+    static constexpr int getB() { // expected-note{{candidate template ignored: constraints not satisfied [with U = int [2]]}}
+        return 2;
+    }
+
+    static auto foo()
+    {
+        return B<T[2]>::a; // expected-error{{constraints not satisfied for class template 'B' [with U = int [2]]}}
+    }
+
+    static auto foo1()
+    {
+        return b<T[2]>; // expected-error{{constraints not satisfied for variable template 'b' [with U = int [2]]}}
+    }
+
+    static auto foo2()
+    {
+        return AA<T>::getB<T[2]>(); // expected-error{{no matching function for call to 'getB'}}
+    }
+};
+
+constexpr auto x = AA<int>::foo(); // expected-note{{in instantiation of member function 'AA<int>::foo' requested here}}
+constexpr auto x1 = AA<int>::foo1(); // expected-note{{in instantiation of member function 'AA<int>::foo1' requested here}}
+constexpr auto x2 = AA<int>::foo2(); // expected-note{{in instantiation of member function 'AA<int>::foo2' requested here}}
+
+template<typename T>
+struct B { using type = typename T::type; }; // expected-error{{type 'int' cannot be used prior to '::' because it has no members}}
+
+template<typename T> requires B<T>::type // expected-note{{in instantiation of template class 'B<int>' requested here}}
+                                         // expected-note@-1{{while substituting template arguments into constraint expression here}}
+struct C { };
+
+template<typename T> requires T{} // expected-error{{atomic constraint must be of type 'bool' (found 'int')}}
+struct D { };
+
+static_assert(C<int>{}); // expected-note{{while checking constraint satisfaction for template 'C<int>' required here}}
+static_assert(D<int>{}); // expected-note{{while checking constraint satisfaction for template 'D<int>' required here}}
\ No newline at end of file
diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.constr/partial-specializations.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.constr/partial-specializations.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/CXX/temp/temp.constr/temp.constr.constr/partial-specializations.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ -verify %s
+
+namespace class_templates
+{
+  template<typename T, typename U> requires sizeof(T) >= 4 // expected-note {{because 'sizeof(char) >= 4' (1 >= 4) evaluated to false}}
+  struct is_same { static constexpr bool value = false; };
+
+  template<typename T> requires sizeof(T*) >= 4 && sizeof(T) >= 4
+  struct is_same<T*, T*> { static constexpr bool value = true; };
+
+  static_assert(!is_same<char*, char*>::value);
+  static_assert(!is_same<short*, short*>::value);
+  static_assert(is_same<int*, int*>::value);
+  static_assert(is_same<char, char>::value); // expected-error {{constraints not satisfied for class template 'is_same' [with T = char, U = char]}}
+
+  template<typename T>
+  struct A { using type = typename T::type; }; // expected-error{{type 'int *' cannot be used prior to '::' because it has no members}}
+
+  template<typename T>
+  struct B {};
+
+  template<typename T> requires A<T>::type // expected-note{{in instantiation of template class 'class_templates::A<int *>' requested here}}
+                                           // expected-note@-1{{while substituting template arguments into constraint expression here}}
+  struct B<T*> {};
+
+  template<typename T> requires T{} // expected-error{{atomic constraint must be of type 'bool' (found 'int')}}
+  struct B<T**> {};
+
+  static_assert((B<int**>{}, true)); // expected-note{{while checking constraint satisfaction for class template partial specialization 'B<int *>' required here}}
+  // expected-note@-1{{while checking constraint satisfaction for class template partial specialization 'B<int>' required here}}
+  // expected-note@-2{{during template argument deduction for class template partial specialization 'B<T *>' [with T = int *]}}
+  // expected-note@-3{{during template argument deduction for class template partial specialization 'B<T **>' [with T = int]}}
+  // expected-note@-4 2{{in instantiation of template class 'class_templates::B<int **>' requested here}}
+}
+
+namespace variable_templates
+{
+  template<typename T, typename U> requires sizeof(T) >= 4
+  constexpr bool is_same_v = false;
+
+  template<typename T> requires sizeof(T*) >= 4 && sizeof(T) >= 4
+  constexpr bool is_same_v<T*, T*> = true;
+
+  static_assert(!is_same_v<char*, char*>);
+  static_assert(!is_same_v<short*, short*>);
+  static_assert(is_same_v<int*, int*>);
+
+  template<typename T>
+  struct A { using type = typename T::type; }; // expected-error{{type 'int *' cannot be used prior to '::' because it has no members}}
+
+  template<typename T>
+  constexpr bool v1 = false;
+
+  template<typename T> requires A<T>::type // expected-note{{in instantiation of template class 'variable_templates::A<int *>' requested here}}
+                                           // expected-note@-1{{while substituting template arguments into constraint expression here}}
+  constexpr bool v1<T*> = true;
+
+  template<typename T> requires T{} // expected-error{{atomic constraint must be of type 'bool' (found 'int')}}
+  constexpr bool v1<T**> = true;
+
+  static_assert(v1<int**>); // expected-note{{while checking constraint satisfaction for variable template partial specialization 'v1<int *>' required here}}
+  // expected-note@-1{{while checking constraint satisfaction for variable template partial specialization 'v1<int>' required here}}
+  // expected-note@-2{{during template argument deduction for variable template partial specialization 'v1<T *>' [with T = int *]}}
+  // expected-note@-3{{during template argument deduction for variable template partial specialization 'v1<T **>' [with T = int]}}
+  // expected-error@-4{{static_assert failed due to requirement 'v1<int **>'}}
+
+}
\ No newline at end of file
diff --git a/clang/test/CodeGenObjCXX/synthesized-property-cleanup.mm b/clang/test/CodeGenObjCXX/synthesized-property-cleanup.mm
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenObjCXX/synthesized-property-cleanup.mm
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple arm64e-apple-ios13.0 -debug-info-kind=standalone -fobjc-arc \
+// RUN:   %s -emit-llvm -o - | FileCheck %s
+
+@interface NSObject
++ (id)alloc;
+@end
+
+@interface NSString : NSObject
+@end
+
+// CHECK: define {{.*}}@"\01-[MyData setData:]"
+// CHECK: [[DATA:%.*]] = alloca %struct.Data
+// CHECK: call %struct.Data* @_ZN4DataD1Ev(%struct.Data* [[DATA]]){{.*}}, !dbg [[LOC:![0-9]+]]
+// CHECK-NEXT: ret void
+
+// [[LOC]] = !DILocation(line: 0
+
+@interface MyData : NSObject
+struct Data {
+    NSString *name;
+};
+@property struct Data data;
+@end
+@implementation MyData
+@end
diff --git a/clang/test/SemaCXX/exceptions-seh.cpp b/clang/test/SemaCXX/exceptions-seh.cpp
--- a/clang/test/SemaCXX/exceptions-seh.cpp
+++ b/clang/test/SemaCXX/exceptions-seh.cpp
@@ -39,14 +39,13 @@
 }
 
 #if __cplusplus < 201103L
-// FIXME: Diagnose this case. For now we produce undef in codegen.
 template <typename T, T FN()>
 T func_template() {
-  return FN();
+  return FN(); // expected-error 2{{builtin functions must be directly called}}
 }
 void inject_builtins() {
-  func_template<void *, __exception_info>();
-  func_template<unsigned long, __exception_code>();
+  func_template<void *, __exception_info>(); // expected-note {{instantiation of}}
+  func_template<unsigned long, __exception_code>(); // expected-note {{instantiation of}}
 }
 #endif
 
diff --git a/clang/test/SemaCXX/warn-bool-conversion.cpp b/clang/test/SemaCXX/warn-bool-conversion.cpp
--- a/clang/test/SemaCXX/warn-bool-conversion.cpp
+++ b/clang/test/SemaCXX/warn-bool-conversion.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,expected-cxx11 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
-// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,expected-cxx11 -std=c++11 %s
 
 namespace BooleanFalse {
 int* j = false;
@@ -192,3 +192,23 @@
     // expected-warning@-1{{address of 'x' will always evaluate to 'true'}}
   }
 }
+
+namespace Template {
+  // FIXME: These cases should not warn.
+  template<int *p> void f() { if (p) {} } // expected-warning 2{{will always evaluate to 'true'}} expected-cxx11-warning {{implicit conversion of nullptr}}
+  template<int (*p)[3]> void g() { if (p) {} } // expected-warning 2{{will always evaluate to 'true'}} expected-cxx11-warning {{implicit conversion of nullptr}}
+  template<int (*p)()> void h() { if (p) {} }
+
+  int a, b[3], c[3][3], d();
+  template void f<&a>(); // expected-note {{instantiation of}}
+  template void f<b>(); // expected-note {{instantiation of}}
+#if __cplusplus >= 201103L
+  template void f<(int*)nullptr>(); // expected-note {{instantiation of}}
+#endif
+  template void g<&b>(); // expected-note {{instantiation of}}
+  template void g<c>(); // expected-note {{instantiation of}}
+#if __cplusplus >= 201103L
+  template void g<(int(*)[3])nullptr>(); // expected-note {{instantiation of}}
+#endif
+  template void h<d>();
+}
diff --git a/clang/test/SemaObjC/default-synthesize-sourceloc.m b/clang/test/SemaObjC/default-synthesize-sourceloc.m
new file mode 100644
--- /dev/null
+++ b/clang/test/SemaObjC/default-synthesize-sourceloc.m
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -ast-dump %s | FileCheck %s
+
+// Test that accessor stubs for default-synthesized ObjC accessors
+// have a valid source location.
+
+__attribute__((objc_root_class))
+@interface NSObject
++ (id)alloc;
+@end
+
+@interface NSString : NSObject
+@end
+
+@interface MyData : NSObject
+struct Data {
+    NSString *name;
+};
+@property struct Data data;
+@end
+// CHECK: ObjCImplementationDecl {{.*}}line:[[@LINE+2]]{{.*}} MyData
+// CHECK: ObjCMethodDecl {{.*}}col:23 implicit - setData: 'void'
+@implementation MyData
+@end
diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
--- a/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
+++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
@@ -394,6 +394,21 @@
   Z<f, f, f>::Q q;
 }
 
+namespace QualConv {
+  int *X;
+  template<const int *const *P> void f() {
+    using T = decltype(P);
+    using T = const int* const*;
+  }
+  template void f<&X>();
+
+  template<const int *const &R> void g() {
+    using T = decltype(R);
+    using T = const int *const &;
+  }
+  template void g<(const int *const&)X>();
+}
+
 namespace FunctionConversion {
   struct a { void c(char *) noexcept; };
   template<void (a::*f)(char*)> void g() {
@@ -401,4 +416,21 @@
     using T = void (a::*)(char*); // (not 'noexcept')
   }
   template void g<&a::c>();
+
+  void c() noexcept;
+  template<void (*p)()> void h() {
+    using T = decltype(p);
+    using T = void (*)(); // (not 'noexcept')
+  }
+  template void h<&c>();
+}
+
+namespace VoidPtr {
+  // Note, this is an extension in C++17 but valid in C++20.
+  template<void *P> void f() {
+    using T = decltype(P);
+    using T = void*;
+  }
+  int n;
+  template void f<(void*)&n>();
 }
diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt
--- a/clang/unittests/Tooling/CMakeLists.txt
+++ b/clang/unittests/Tooling/CMakeLists.txt
@@ -42,7 +42,6 @@
   RecursiveASTVisitorTests/LambdaDefaultCapture.cpp
   RecursiveASTVisitorTests/LambdaExpr.cpp
   RecursiveASTVisitorTests/LambdaTemplateParams.cpp
-  RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp
   RecursiveASTVisitorTests/NestedNameSpecifiers.cpp
   RecursiveASTVisitorTests/ParenExpr.cpp
   RecursiveASTVisitorTests/TemplateArgumentLocTraverser.cpp
diff --git a/clang/unittests/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp b/clang/unittests/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp
deleted file mode 100644
--- a/clang/unittests/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- unittest/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "TestVisitor.h"
-
-using namespace clang;
-
-namespace {
-
-class MemberPointerTypeLocVisitor
-    : public ExpectedLocationVisitor<MemberPointerTypeLocVisitor> {
-public:
-  bool VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) {
-    if (!TL)
-      return true;
-    Match(TL.getDecl()->getName(), TL.getNameLoc());
-    return true;
-  }
-  bool VisitRecordTypeLoc(RecordTypeLoc RTL) {
-    if (!RTL)
-      return true;
-    Match(RTL.getDecl()->getName(), RTL.getNameLoc());
-    return true;
-  }
-};
-
-TEST(RecursiveASTVisitor, VisitTypeLocInMemberPointerTypeLoc) {
-  MemberPointerTypeLocVisitor Visitor;
-  Visitor.ExpectMatch("Bar", 4, 36);
-  Visitor.ExpectMatch("T", 7, 23);
-  EXPECT_TRUE(Visitor.runOver(R"cpp(
-     class Bar { void func(int); };
-     class Foo {
-       void bind(const char*, void(Bar::*Foo)(int)) {}
-
-       template<typename T>
-       void test(void(T::*Foo)());
-     };
-  )cpp"));
-}
-
-} // end anonymous namespace
diff --git a/clang/utils/creduce-clang-crash.py b/clang/utils/creduce-clang-crash.py
--- a/clang/utils/creduce-clang-crash.py
+++ b/clang/utils/creduce-clang-crash.py
@@ -30,6 +30,7 @@
     print(*args, **kwargs)
 
 def check_file(fname):
+  fname = os.path.normpath(fname)
   if not os.path.isfile(fname):
     sys.exit("ERROR: %s does not exist" % (fname))
   return fname
@@ -40,6 +41,8 @@
   or absolute path to cmd_dir/cmd_name.
   """
   if cmd_path:
+    # Make the path absolute so the creduce test can be run from any directory.
+    cmd_path = os.path.abspath(cmd_path)
     cmd = find_executable(cmd_path)
     if cmd:
       return cmd
diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/float-display/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/float-display/Makefile
--- a/lldb/packages/Python/lldbsuite/test/functionalities/float-display/Makefile
+++ b/lldb/packages/Python/lldbsuite/test/functionalities/float-display/Makefile
@@ -1,3 +1,2 @@
-LEVEL = ../../make
 C_SOURCES := main.c
-include $(LEVEL)/Makefile.rules
+include Makefile.rules
diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/Makefile
--- a/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/Makefile
+++ b/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/Makefile
@@ -1,5 +1,3 @@
-LEVEL = ../../../make
 CXX_SOURCES := main.cpp
-include $(LEVEL)/Makefile.rules
 CXXFLAGS_EXTRAS := -O2 -glldb -Xclang -femit-debug-entry-values
 include Makefile.rules
diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/thread_local/Makefile b/lldb/packages/Python/lldbsuite/test/lang/cpp/thread_local/Makefile
--- a/lldb/packages/Python/lldbsuite/test/lang/cpp/thread_local/Makefile
+++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/thread_local/Makefile
@@ -1,3 +1,2 @@
-LEVEL = ../../../make
 CXX_SOURCES := main.cpp
-include $(LEVEL)/Makefile.rules
+include Makefile.rules
diff --git a/lldb/packages/Python/lldbsuite/test/macosx/macabi/Makefile b/lldb/packages/Python/lldbsuite/test/macosx/macabi/Makefile
--- a/lldb/packages/Python/lldbsuite/test/macosx/macabi/Makefile
+++ b/lldb/packages/Python/lldbsuite/test/macosx/macabi/Makefile
@@ -1,5 +1,3 @@
-LEVEL = ../../make
-
 C_SOURCES := main.c
 LD_EXTRAS := -L. -lfoo
 
@@ -12,4 +10,4 @@
 	$(MAKE) -f $(MAKEFILE_RULES) \
 		DYLIB_ONLY=YES DYLIB_NAME=foo DYLIB_C_SOURCES=foo.c
 
-include $(LEVEL)/Makefile.rules
+include Makefile.rules
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -12,6 +12,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
 #include "llvm/DebugInfo/GSYM/Range.h"
 #include "llvm/DebugInfo/GSYM/StringTable.h"
 #include <tuple>
@@ -21,6 +22,7 @@
 class raw_ostream;
 namespace gsym {
 
+class GsymReader;
 /// Function information in GSYM files encodes information for one contiguous
 /// address range. If a function has discontiguous address ranges, they will
 /// need to be encoded using multiple FunctionInfo objects.
@@ -140,6 +142,33 @@
   /// function info that was successfully written into the stream.
   llvm::Expected<uint64_t> encode(FileWriter &O) const;
 
+
+  /// Lookup an address within a FunctionInfo object's data stream.
+  ///
+  /// Instead of decoding an entire FunctionInfo object when doing lookups,
+  /// we can decode only the information we need from the FunctionInfo's data
+  /// for the specific address. The lookup result information is returned as
+  /// a LookupResult.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param GR The GSYM reader that contains the string and file table that
+  /// will be used to fill in information in the returned result.
+  ///
+  /// \param FuncAddr The function start address decoded from the GsymReader.
+  ///
+  /// \param Addr The address to lookup.
+  ///
+  /// \returns An LookupResult or an error describing the issue that was
+  /// encountered during decoding. An error should only be returned if the
+  /// address is not contained in the FunctionInfo or if the data is corrupted.
+  static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
+                                             const GsymReader &GR,
+                                             uint64_t FuncAddr,
+                                             uint64_t Addr);
+
   uint64_t startAddress() const { return Range.Start; }
   uint64_t endAddress() const { return Range.End; }
   uint64_t size() const { return Range.size(); }
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -1,9 +1,8 @@
 //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -94,28 +93,45 @@
 
   /// Get the full function info for an address.
   ///
+  /// This should be called when a client will store a copy of the complete
+  /// FunctionInfo for a given address. For one off lookups, use the lookup()
+  /// function below.
+  ///
+  /// Symbolication server processes might want to parse the entire function
+  /// info for a given address and cache it if the process stays around to
+  /// service many symbolication addresses, like for parsing profiling
+  /// information.
+  ///
   /// \param Addr A virtual address from the orignal object file to lookup.
+  ///
   /// \returns An expected FunctionInfo that contains the function info object
   /// or an error object that indicates reason for failing to lookup the
-  /// address,
+  /// address.
   llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
 
+  /// Lookup an address in the a GSYM.
+  ///
+  /// Lookup just the information needed for a specific address \a Addr. This
+  /// function is faster that calling getFunctionInfo() as it will only return
+  /// information that pertains to \a Addr and allows the parsing to skip any
+  /// extra information encoded for other addresses. For example the line table
+  /// parsing can stop when a matching LineEntry has been fouhnd, and the
+  /// InlineInfo can stop parsing early once a match has been found and also
+  /// skip information that doesn't match. This avoids memory allocations and
+  /// is much faster for lookups.
+  ///
+  /// \param Addr A virtual address from the orignal object file to lookup.
+  /// \returns An expected LookupResult that contains only the information
+  /// needed for the current address, or an error object that indicates reason
+  /// for failing to lookup the address.
+  llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
+
   /// Get a string from the string table.
   ///
   /// \param Offset The string table offset for the string to retrieve.
   /// \returns The string from the strin table.
   StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
 
-protected:
-  /// Gets an address from the address table.
-  ///
-  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
-  ///
-  /// \param Index A index into the address table.
-  /// \returns A resolved virtual address for adddress in the address table
-  /// or llvm::None if Index is out of bounds.
-  Optional<uint64_t> getAddress(size_t Index) const;
-
   /// Get the a file entry for the suppplied file index.
   ///
   /// Used to convert any file indexes in the FunctionInfo data back into
@@ -131,6 +147,16 @@
     return llvm::None;
   }
 
+protected:
+  /// Gets an address from the address table.
+  ///
+  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
+  ///
+  /// \param Index A index into the address table.
+  /// \returns A resolved virtual address for adddress in the address table
+  /// or llvm::None if Index is out of bounds.
+  Optional<uint64_t> getAddress(size_t Index) const;
+
   /// Get an appropriate address info offsets array.
   ///
   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
diff --git a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
--- a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -10,6 +10,8 @@
 #define LLVM_DEBUGINFO_GSYM_INLINEINFO_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
 #include "llvm/DebugInfo/GSYM/Range.h"
 #include "llvm/Support/Error.h"
 #include <stdint.h>
@@ -21,6 +23,7 @@
 
 namespace gsym {
 
+class GsymReader;
 /// Inline information stores the name of the inline function along with
 /// an array of address ranges. It also stores the call file and call line
 /// that called this inline function. This allows us to unwind inline call
@@ -74,6 +77,52 @@
 
   using InlineArray = std::vector<const InlineInfo *>;
 
+  /// Lookup a single address within the inline info data.
+  ///
+  /// Clients have the option to decode an entire InlineInfo object (using
+  /// InlineInfo::decode() ) or just find the matching inline info using this
+  /// function. The benefit of using this function is that only the information
+  /// needed for the lookup will be extracted, other info can be skipped and
+  /// parsing can stop as soon as the deepest match is found. This allows
+  /// symbolication tools to be fast and efficient and avoid allocation costs
+  /// when doing lookups.
+  ///
+  /// This function will augment the SourceLocations array \a SrcLocs with any
+  /// inline information that pertains to \a Addr. If no inline information
+  /// exists for \a Addr, then \a SrcLocs will be left untouched. If there is
+  /// inline information for \a Addr, then \a SrcLocs will be modifiied to
+  /// contain the deepest most inline function's SourceLocation at index zero
+  /// in the array and proceed up the the concrete function source file and
+  /// line at the end of the array.
+  ///
+  /// \param GR The GSYM reader that contains the string and file table that
+  /// will be used to fill in the source locations.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the LineTable object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param BaseAddr The base address to use when decoding the line table.
+  /// This will be the FunctionInfo's start address and will be used to
+  /// decode the correct addresses for the inline information.
+  ///
+  /// \param Addr The address to lookup.
+  ///
+  /// \param SrcLocs The inline source locations that matches \a Addr. This
+  ///                array must be initialized with the matching line entry
+  ///                from the line table upon entry. The name of the concrete
+  ///                function must be supplied since it will get pushed to
+  ///                the last SourceLocation entry and the inline information
+  ///                will fill in the source file and line from the inline
+  ///                information.
+  ///
+  /// \returns An error if the inline information is corrupt, or
+  ///          Error::success() for all other cases, even when no information
+  ///          is added to \a SrcLocs.
+  static llvm::Error lookup(const GsymReader &GR, DataExtractor &Data,
+                            uint64_t BaseAddr, uint64_t Addr,
+                            SourceLocations &SrcLocs);
+
   /// Lookup an address in the InlineInfo object
   ///
   /// This function is used to symbolicate an inline call stack and can
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
--- a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
@@ -119,8 +119,25 @@
   typedef std::vector<gsym::LineEntry> Collection;
   Collection Lines; ///< All line entries in the line table.
 public:
-  static LineEntry lookup(DataExtractor &Data, uint64_t BaseAddr,
-                          uint64_t Addr);
+  /// Lookup a single address within a line table's data.
+  ///
+  /// Clients have the option to decode an entire line table using
+  /// LineTable::decode() or just find a single matching entry using this
+  /// function. The benefit of using this function is that parsed LineEntry
+  /// objects that do not match will not be stored in an array. This will avoid
+  /// memory allocation costs and parsing can stop once a match has been found.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the LineTable object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param BaseAddr The base address to use when decoding the line table.
+  /// This will be the FunctionInfo's start address and will be used to
+  /// initialize the line table row prior to parsing any opcodes.
+  ///
+  /// \returns An LineEntry object if a match is found, error otherwise.
+  static Expected<LineEntry> lookup(DataExtractor &Data, uint64_t BaseAddr,
+                                    uint64_t Addr);
 
   /// Decode an LineTable object from a binary data stream.
   ///
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -0,0 +1,61 @@
+//===- LookupResult.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+#define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/ADT/StringRef.h"
+#include <inttypes.h>
+#include <vector>
+
+namespace llvm {
+class raw_ostream;
+namespace gsym {
+struct FileEntry;
+
+struct SourceLocation {
+  StringRef Name; ///< Function or symbol name.
+  StringRef Dir; ///< Line entry source file directory path.
+  StringRef Base; ///< Line entry source file basename.
+  uint32_t Line = 0; ///< Source file line number.
+};
+
+inline bool operator==(const SourceLocation &LHS, const SourceLocation &RHS) {
+  return LHS.Name == RHS.Name && LHS.Dir == RHS.Dir &&
+         LHS.Base == RHS.Base && LHS.Line == RHS.Line;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SourceLocation &R);
+
+using SourceLocations = std::vector<SourceLocation>;
+
+
+struct LookupResult {
+  uint64_t LookupAddr = 0; ///< The address that this lookup pertains to.
+  AddressRange FuncRange; ///< The concrete function address range.
+  StringRef FuncName; ///< The concrete function name that contains LookupAddr.
+  /// The source locations that match this address. This information will only
+  /// be filled in if the FunctionInfo contains a line table. If an address is
+  /// for a concrete function with no inlined functions, this array will have
+  /// one entry. If an address points to an inline function, there will be one
+  /// SourceLocation for each inlined function with the last entry pointing to
+  /// the concrete function itself. This allows one address to generate
+  /// multiple locations and allows unwinding of inline call stacks. The
+  /// deepest inline function will appear at index zero in the source locations
+  /// array, and the concrete function will appear at the end of the array.
+  SourceLocations Locations;
+  std::string getSourceFile(uint32_t Index) const;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/Range.h b/llvm/include/llvm/DebugInfo/GSYM/Range.h
--- a/llvm/include/llvm/DebugInfo/GSYM/Range.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/Range.h
@@ -61,6 +61,14 @@
   void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
   void encode(FileWriter &O, uint64_t BaseAddr) const;
   /// @}
+
+  /// Skip an address range object in the specified data a the specified
+  /// offset.
+  ///
+  /// \param Data The binary stream to read the data from.
+  ///
+  /// \param Offset The byte offset within \a Data.
+  static void skip(DataExtractor &Data, uint64_t &Offset);
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
@@ -100,6 +108,16 @@
   void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
   void encode(FileWriter &O, uint64_t BaseAddr) const;
   /// @}
+
+  /// Skip an address range object in the specified data a the specified
+  /// offset.
+  ///
+  /// \param Data The binary stream to read the data from.
+  ///
+  /// \param Offset The byte offset within \a Data.
+  ///
+  /// \returns The number of address ranges that were skipped.
+  static uint64_t skip(DataExtractor &Data, uint64_t &Offset);
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);
diff --git a/llvm/include/llvm/IR/ValueHandle.h b/llvm/include/llvm/IR/ValueHandle.h
--- a/llvm/include/llvm/IR/ValueHandle.h
+++ b/llvm/include/llvm/IR/ValueHandle.h
@@ -264,6 +264,7 @@
 #else
   AssertingVH() : ThePtr(nullptr) {}
   AssertingVH(ValueTy *P) : ThePtr(GetAsValue(P)) {}
+  AssertingVH(const AssertingVH<ValueTy> &) = default;
 #endif
 
   operator ValueTy*() const {
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -387,7 +387,10 @@
     if (FS != iter->second.end())
       return &FS->second;
     // If we cannot find exact match of the callee name, return the FS with
-    // the max total count.
+    // the max total count. Only do this when CalleeName is not provided, 
+    // i.e., only for indirect calls.
+    if (!CalleeName.empty()) 
+      return nullptr;
     uint64_t MaxTotalSamples = 0;
     const FunctionSamples *R = nullptr;
     for (const auto &NameFS : iter->second)
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -358,6 +358,15 @@
     return getSamplesFor(CanonName);
   }
 
+  /// Return the samples collected for function \p F, create empty
+  /// FunctionSamples if it doesn't exist.
+  FunctionSamples *getOrCreateSamplesFor(const Function &F) {
+    std::string FGUID;
+    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+    CanonName = getRepInFormat(CanonName, getFormat(), FGUID);
+    return &Profiles[CanonName];
+  }
+
   /// Return the samples collected for function \p F.
   virtual FunctionSamples *getSamplesFor(StringRef Fname) {
     if (Remapper) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1000,6 +1000,7 @@
   // Create the symbol that points to the first entry following the debug
   // address table (.debug_addr) header.
   AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
+  DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
 
   for (DICompileUnit *CUNode : M->debug_compile_units()) {
     // FIXME: Move local imported entities into a list attached to the
@@ -1163,7 +1164,6 @@
         U.addRnglistsBase();
 
       if (!DebugLocs.getLists().empty()) {
-        DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
         if (!useSplitDwarf())
           U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
                             DebugLocs.getSym(),
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1733,8 +1733,15 @@
       if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
         if (!DstInt->hasSubRanges()) {
           BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
-          LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
-          DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
+          LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+          LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+          LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+          DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
+          // The unused lanes are just empty live-ranges at this point.
+          // It is the caller responsibility to set the proper
+          // dead segments if there is an actual dead def of the
+          // unused lanes. This may happen with rematerialization.
+          DstInt->createSubRange(Allocator, UnusedLanes);
         }
         SlotIndex MIIdx = UseMI->isDebugValue()
                               ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
--- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -6,6 +6,7 @@
   GsymReader.cpp
   InlineInfo.cpp
   LineTable.cpp
+  LookupResult.cpp
   Range.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
 #include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
 #include "llvm/DebugInfo/GSYM/LineTable.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/Support/DataExtractor.h"
@@ -145,3 +146,104 @@
   O.writeU32(0);
   return FuncInfoOffset;
 }
+
+
+llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
+                                                  const GsymReader &GR,
+                                                  uint64_t FuncAddr,
+                                                  uint64_t Addr) {
+  LookupResult LR;
+  LR.LookupAddr = Addr;
+  LR.FuncRange.Start = FuncAddr;
+  uint64_t Offset = 0;
+  LR.FuncRange.End = FuncAddr + Data.getU32(&Offset);
+  uint32_t NameOffset = Data.getU32(&Offset);
+  // The "lookup" functions doesn't report errors as accurately as the "decode"
+  // function as it is meant to be fast. For more accurage errors we could call
+  // "decode".
+  if (!Data.isValidOffset(Offset))
+    return createStringError(std::errc::io_error,
+                              "FunctionInfo data is truncated");
+  // This function will be called with the result of a binary search of the
+  // address table, we must still make sure the address does not fall into a
+  // gap between functions are after the last function.
+  if (Addr >= LR.FuncRange.End)
+    return createStringError(std::errc::io_error,
+        "address 0x%" PRIx64 " is not in GSYM", Addr);
+
+  if (NameOffset == 0)
+    return createStringError(std::errc::io_error,
+        "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000",
+        Offset - 4);
+  LR.FuncName = GR.getString(NameOffset);
+  bool Done = false;
+  Optional<LineEntry> LineEntry;
+  Optional<DataExtractor> InlineInfoData;
+  while (!Done) {
+    if (!Data.isValidOffsetForDataOfSize(Offset, 8))
+      return createStringError(std::errc::io_error,
+                               "FunctionInfo data is truncated");
+    const uint32_t InfoType = Data.getU32(&Offset);
+    const uint32_t InfoLength = Data.getU32(&Offset);
+    const StringRef InfoBytes = Data.getData().substr(Offset, InfoLength);
+    if (InfoLength != InfoBytes.size())
+      return createStringError(std::errc::io_error,
+                               "FunctionInfo data is truncated");
+    DataExtractor InfoData(InfoBytes, Data.isLittleEndian(),
+                           Data.getAddressSize());
+    switch (InfoType) {
+      case InfoType::EndOfList:
+        Done = true;
+        break;
+
+      case InfoType::LineTableInfo:
+        if (auto ExpectedLE = LineTable::lookup(InfoData, FuncAddr, Addr))
+          LineEntry = ExpectedLE.get();
+        else
+          return ExpectedLE.takeError();
+        break;
+
+      case InfoType::InlineInfo:
+        // We will parse the inline info after our line table, but only if
+        // we have a line entry.
+        InlineInfoData = InfoData;
+        break;
+
+      default:
+        break;
+    }
+    Offset += InfoLength;
+  }
+
+  if (!LineEntry) {
+    // We don't have a valid line entry for our address, fill in our source
+    // location as best we can and return.
+    SourceLocation SrcLoc;
+    SrcLoc.Name = LR.FuncName;
+    LR.Locations.push_back(SrcLoc);
+    return LR;
+  }
+
+  Optional<FileEntry> LineEntryFile = GR.getFile(LineEntry->File);
+  if (!LineEntryFile)
+    return createStringError(std::errc::invalid_argument,
+                              "failed to extract file[%" PRIu32 "]",
+                              LineEntry->File);
+
+  SourceLocation SrcLoc;
+  SrcLoc.Name = LR.FuncName;
+  SrcLoc.Dir = GR.getString(LineEntryFile->Dir);
+  SrcLoc.Base = GR.getString(LineEntryFile->Base);
+  SrcLoc.Line = LineEntry->Line;
+  LR.Locations.push_back(SrcLoc);
+  // If we don't have inline information, we are done.
+  if (!InlineInfoData)
+    return LR;
+  // We have inline information. Try to augment the lookup result with this
+  // data.
+  llvm::Error Err = InlineInfo::lookup(GR, *InlineInfoData, FuncAddr, Addr,
+                                       LR.Locations);
+  if (Err)
+    return std::move(Err);
+  return LR;
+}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -1,9 +1,8 @@
 //===- GsymReader.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -263,3 +262,18 @@
                            "failed to extract address[%" PRIu64 "]",
                            *AddressIndex);
 }
+
+llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
+  Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
+  if (!AddressIndex)
+    return AddressIndex.takeError();
+  // Address info offsets size should have been checked in parse().
+  assert(*AddressIndex < AddrInfoOffsets.size());
+  auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
+  DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
+  if (Optional<uint64_t> OptAddr = getAddress(*AddressIndex))
+    return FunctionInfo::lookup(Data, *this, *OptAddr, Addr);
+  return createStringError(std::errc::invalid_argument,
+                           "failed to extract address[%" PRIu64 "]",
+                           *AddressIndex);
+}
diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
--- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -1,14 +1,14 @@
 //===- InlineInfo.cpp -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/GSYM/FileEntry.h"
 #include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/Support/DataExtractor.h"
 #include <algorithm>
@@ -60,6 +60,108 @@
   return llvm::None;
 }
 
+/// Skip an InlineInfo object in the specified data at the specified offset.
+///
+/// Used during the InlineInfo::lookup() call to quickly skip child InlineInfo
+/// objects where the addres ranges isn't contained in the InlineInfo object
+/// or its children. This avoids allocations by not appending child InlineInfo
+/// objects to the InlineInfo::Children array.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \param SkippedRanges If true, address ranges have already been skipped.
+
+static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) {
+  if (!SkippedRanges) {
+    if (AddressRanges::skip(Data, Offset) == 0)
+      return false;
+  }
+  bool HasChildren = Data.getU8(&Offset) != 0;
+  Data.getU32(&Offset); // Skip Inline.Name.
+  Data.getULEB128(&Offset); // Skip Inline.CallFile.
+  Data.getULEB128(&Offset); // Skip Inline.CallLine.
+  if (HasChildren) {
+    while (skip(Data, Offset, false /* SkippedRanges */))
+      /* Do nothing */;
+  }
+  // We skipped a valid InlineInfo.
+  return true;
+}
+
+/// A Lookup helper functions.
+///
+/// Used during the InlineInfo::lookup() call to quickly only parse an
+/// InlineInfo object if the address falls within this object. This avoids
+/// allocations by not appending child InlineInfo objects to the
+/// InlineInfo::Children array and also skips any InlineInfo objects that do
+/// not contain the address we are looking up.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \param BaseAddr The address that the relative address range offsets are
+///                 relative to.
+
+static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
+                   uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs,
+                   llvm::Error &Err) {
+  InlineInfo Inline;
+  Inline.Ranges.decode(Data, BaseAddr, Offset);
+  if (Inline.Ranges.empty())
+    return true;
+  // Check if the address is contained within the inline information, and if
+  // not, quickly skip this InlineInfo object and all its children.
+  if (!Inline.Ranges.contains(Addr)) {
+    skip(Data, Offset, true /* SkippedRanges */);
+    return false;
+  }
+
+  // The address range is contained within this InlineInfo, add the source
+  // location for this InlineInfo and any children that contain the address.
+  bool HasChildren = Data.getU8(&Offset) != 0;
+  Inline.Name = Data.getU32(&Offset);
+  Inline.CallFile = (uint32_t)Data.getULEB128(&Offset);
+  Inline.CallLine = (uint32_t)Data.getULEB128(&Offset);
+  if (HasChildren) {
+    // Child address ranges are encoded relative to the first address in the
+    // parent InlineInfo object.
+    const auto ChildBaseAddr = Inline.Ranges[0].Start;
+    bool Done = false;
+    while (!Done)
+      Done = lookup(GR, Data, Offset, ChildBaseAddr, Addr, SrcLocs, Err);
+  }
+
+  Optional<FileEntry> CallFile = GR.getFile(Inline.CallFile);
+  if (!CallFile) {
+    Err = createStringError(std::errc::invalid_argument,
+                            "failed to extract file[%" PRIu32 "]",
+                            Inline.CallFile);
+    return false;
+  }
+
+  SourceLocation SrcLoc;
+  SrcLoc.Name = SrcLocs.back().Name;
+  SrcLoc.Dir = GR.getString(CallFile->Dir);
+  SrcLoc.Base = GR.getString(CallFile->Base);
+  SrcLoc.Line = Inline.CallLine;
+  SrcLocs.back().Name = GR.getString(Inline.Name);
+  SrcLocs.push_back(SrcLoc);
+  return true;
+}
+
+llvm::Error InlineInfo::lookup(const GsymReader &GR, DataExtractor &Data,
+                               uint64_t BaseAddr, uint64_t Addr,
+                               SourceLocations &SrcLocs) {
+  // Call our recursive helper function starting at offset zero.
+  uint64_t Offset = 0;
+  llvm::Error Err = Error::success();
+  ::lookup(GR, Data, Offset, BaseAddr, Addr, SrcLocs, Err);
+  return Err;
+}
+
 /// Decode an InlineInfo in Data at the specified offset.
 ///
 /// A local helper function to decode InlineInfo objects. This function is
diff --git a/llvm/lib/DebugInfo/GSYM/LineTable.cpp b/llvm/lib/DebugInfo/GSYM/LineTable.cpp
--- a/llvm/lib/DebugInfo/GSYM/LineTable.cpp
+++ b/llvm/lib/DebugInfo/GSYM/LineTable.cpp
@@ -262,8 +262,8 @@
 // Parse the line table on the fly and find the row we are looking for.
 // We will need to determine if we need to cache the line table by calling
 // LineTable::parseAllEntries(...) or just call this function each time.
-// There is a CPU vs memory tradeoff we will need to determine.
-LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
+// There is a CPU vs memory tradeoff we will need to determined.
+Expected<LineEntry> LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
   LineEntry Result;
   llvm::Error Err = parse(Data, BaseAddr,
                           [Addr, &Result](const LineEntry &Row) -> bool {
@@ -277,7 +277,13 @@
     }
     return true; // Keep parsing till we find the right row.
   });
-  return Result;
+  if (Err)
+    return std::move(Err);
+  if (Result.isValid())
+    return Result;
+  return createStringError(std::errc::invalid_argument,
+                           "address 0x%" PRIx64 " is not in the line table",
+                           Addr);
 }
 
 raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable &LT) {
diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -0,0 +1,68 @@
+//===- LookupResult.cpp -------------------------------------------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace gsym;
+
+std::string LookupResult::getSourceFile(uint32_t Index) const {
+  std::string Fullpath;
+  if (Index < Locations.size()) {
+    if (!Locations[Index].Dir.empty()) {
+      if (Locations[Index].Base.empty()) {
+        Fullpath = Locations[Index].Dir;
+      } else {
+        llvm::SmallString<64> Storage;
+        llvm::sys::path::append(Storage, Locations[Index].Dir,
+                                Locations[Index].Base);
+        Fullpath.assign(Storage.begin(), Storage.end());
+      }
+    } else if (!Locations[Index].Base.empty())
+      Fullpath = Locations[Index].Base;
+  }
+  return Fullpath;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const SourceLocation &SL) {
+  OS << SL.Name << " @ ";
+  if (!SL.Dir.empty()) {
+    OS << SL.Dir;
+    if (SL.Dir.contains('\\') and not SL.Dir.contains('/'))
+      OS << '\\';
+    else
+      OS << '/';
+  }
+  if (SL.Base.empty())
+    OS << "<invalid-file>";
+  else
+    OS << SL.Base;
+  OS << ':' << SL.Line;
+  return OS;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) {
+  OS << HEX64(LR.LookupAddr) << ": ";
+  auto NumLocations = LR.Locations.size();
+  for (size_t I = 0; I < NumLocations; ++I) {
+    if (I > 0) {
+      OS << '\n';
+      OS.indent(20);
+    }
+    const bool IsInlined = I + 1 != NumLocations;
+    OS << LR.Locations[I];
+    if (IsInlined)
+      OS << " [inlined]";
+  }
+  OS << '\n';
+  return OS;
+}
diff --git a/llvm/lib/DebugInfo/GSYM/Range.cpp b/llvm/lib/DebugInfo/GSYM/Range.cpp
--- a/llvm/lib/DebugInfo/GSYM/Range.cpp
+++ b/llvm/lib/DebugInfo/GSYM/Range.cpp
@@ -100,3 +100,15 @@
   for (auto &Range : Ranges)
     Range.decode(Data, BaseAddr, Offset);
 }
+
+void AddressRange::skip(DataExtractor &Data, uint64_t &Offset) {
+  Data.getULEB128(&Offset);
+  Data.getULEB128(&Offset);
+}
+
+uint64_t AddressRanges::skip(DataExtractor &Data, uint64_t &Offset) {
+  uint64_t NumRanges = Data.getULEB128(&Offset);
+  for (uint64_t I=0; I<NumRanges; ++I)
+    AddressRange::skip(Data, Offset);
+  return NumRanges;
+}
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -26,6 +27,10 @@
 STATISTIC(WriteOnlyLiveGVars,
           "Number of live global variables marked write only");
 
+static cl::opt<bool> PropagateAttrs("propagate-attrs", cl::init(true),
+                                    cl::Hidden,
+                                    cl::desc("Propagate attributes in index"));
+
 FunctionSummary FunctionSummary::ExternalNode =
     FunctionSummary::makeDummyFunctionSummary({});
 
@@ -157,6 +162,8 @@
 // See internalizeGVsAfterImport.
 void ModuleSummaryIndex::propagateAttributes(
     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+  if (!PropagateAttrs)
+    return;
   for (auto &P : *this)
     for (auto &S : P.second.SummaryList) {
       if (!isGlobalValueLive(S.get()))
@@ -183,6 +190,7 @@
         }
       propagateAttributesToRefs(S.get());
     }
+  setWithAttributePropagation();
   if (llvm::AreStatisticsEnabled())
     for (auto &P : *this)
       if (P.second.SummaryList.size())
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5219,10 +5219,8 @@
     SelectCode(Res.getNode());
     return;
   }
-  case ISD::STRICT_FADD:
-  case ISD::STRICT_FSUB:
   case ISD::STRICT_FP_ROUND: {
-    // X87 instructions has enabled these strict fp operation.
+    // X87 instructions has enabled this strict fp operation.
     bool UsingFp80 = Node->getSimpleValueType(0) == MVT::f80 ||
                      Node->getOperand(1).getSimpleValueType() == MVT::f80;
     if (UsingFp80 || (!Subtarget->hasSSE1() && Subtarget->hasX87()))
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -593,8 +593,6 @@
       setOperationAction(ISD::FSINCOS, VT, Expand);
 
       // Handle constrained floating-point operations of scalar.
-      setOperationAction(ISD::STRICT_FMUL     , VT, Legal);
-      setOperationAction(ISD::STRICT_FDIV     , VT, Legal);
       setOperationAction(ISD::STRICT_FSQRT    , VT, Legal);
       setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
       // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
@@ -623,13 +621,15 @@
     } else // SSE immediates.
       addLegalFPImmediate(APFloat(+0.0)); // xorpd
   }
-
-  // FIXME: Mark these legal to prevent them from being expanded to a
-  // libcall in LegalizeDAG. They'll be mutated by X86ISelDAGToDAG::Select.
+  // Handle constrained floating-point operations of scalar.
   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
+  setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
+  setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
+  setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+  setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
 
   // We don't support FMA.
   setOperationAction(ISD::FMA, MVT::f64, Expand);
@@ -864,6 +864,10 @@
     setOperationAction(ISD::STRICT_FADD,        MVT::v2f64, Legal);
     setOperationAction(ISD::STRICT_FSUB,        MVT::v4f32, Legal);
     setOperationAction(ISD::STRICT_FSUB,        MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FMUL,        MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FMUL,        MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FDIV,        MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FDIV,        MVT::v2f64, Legal);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -1160,6 +1164,10 @@
     setOperationAction(ISD::STRICT_FADD,        MVT::v4f64, Legal);
     setOperationAction(ISD::STRICT_FSUB,        MVT::v8f32, Legal);
     setOperationAction(ISD::STRICT_FSUB,        MVT::v4f64, Legal);
+    setOperationAction(ISD::STRICT_FMUL,        MVT::v8f32, Legal);
+    setOperationAction(ISD::STRICT_FMUL,        MVT::v4f64, Legal);
+    setOperationAction(ISD::STRICT_FDIV,        MVT::v8f32, Legal);
+    setOperationAction(ISD::STRICT_FDIV,        MVT::v4f64, Legal);
 
     if (!Subtarget.hasAVX512())
       setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
@@ -1429,6 +1437,10 @@
     setOperationAction(ISD::STRICT_FADD,     MVT::v8f64, Legal);
     setOperationAction(ISD::STRICT_FSUB,     MVT::v16f32, Legal);
     setOperationAction(ISD::STRICT_FSUB,     MVT::v8f64, Legal);
+    setOperationAction(ISD::STRICT_FMUL,     MVT::v16f32, Legal);
+    setOperationAction(ISD::STRICT_FMUL,     MVT::v8f64, Legal);
+    setOperationAction(ISD::STRICT_FDIV,     MVT::v16f32, Legal);
+    setOperationAction(ISD::STRICT_FDIV,     MVT::v8f64, Legal);
 
     setTruncStoreAction(MVT::v8i64,   MVT::v8i8,   Legal);
     setTruncStoreAction(MVT::v8i64,   MVT::v8i16,  Legal);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -5397,13 +5397,13 @@
                               NAME#"SD">,
                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
 }
-defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
+defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
                                  SchedWriteFAddSizes, 1>;
-defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
+defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
                                  SchedWriteFMulSizes, 1>;
-defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
                                  SchedWriteFAddSizes, 0>;
-defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
                                  SchedWriteFDivSizes, 0>;
 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
                                SchedWriteFCmpSizes, 0>;
@@ -5555,16 +5555,16 @@
                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
 }
 
-defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
+defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512,
                               SchedWriteFAddSizes, 1>,
             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512,
                               SchedWriteFMulSizes, 1>,
             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512,
                               SchedWriteFAddSizes>,
             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512,
                               SchedWriteFDivSizes>,
             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2667,18 +2667,18 @@
 }
 
 // Binary Arithmetic instructions
-defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>,
-           basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>,
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>,
+           basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>,
            basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>;
-defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>,
-           basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>,
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
+           basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
            basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>;
 let isCommutable = 0 in {
-  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>,
-             basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>,
+  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
+             basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
              basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>;
-  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>,
-             basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>,
+  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
+             basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
              basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>;
   defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
              basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
@@ -2773,15 +2773,15 @@
   }
 }
 
-defm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
-defm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
-defm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
-defm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
 
-defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
-defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
-defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
-defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
 
 /// Unop Arithmetic
 /// In addition, we also have a special variant of the scalar form here to
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -901,19 +901,8 @@
     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
     bool ImportEnabled) {
   computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
-  if (ImportEnabled) {
+  if (ImportEnabled)
     Index.propagateAttributes(GUIDPreservedSymbols);
-  } else {
-    // If import is disabled we should drop read/write-only attribute
-    // from all summaries to prevent internalization.
-    for (auto &P : Index)
-      for (auto &S : P.second.SummaryList)
-        if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get())) {
-          GVS->setReadOnly(false);
-          GVS->setWriteOnly(false);
-        }
-  }
-  Index.setWithAttributePropagation();
 }
 
 /// Compute the set of summaries needed for a ThinLTO backend compilation of
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -33,6 +34,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -137,6 +140,16 @@
     cl::desc("For symbols in profile symbol list, regard their profiles to "
              "be accurate. It may be overriden by profile-sample-accurate. "));
 
+static cl::opt<bool> ProfileMergeInlinee(
+    "sample-profile-merge-inlinee", cl::Hidden, cl::init(false),
+    cl::desc("Merge past inlinee's profile to outline version if sample "
+             "profile loader decided not to inline a call site."));
+
+static cl::opt<bool> ProfileTopDownLoad(
+    "sample-profile-top-down-load", cl::Hidden, cl::init(false),
+    cl::desc("Do profile annotation and inlining for functions in top-down "
+             "order of call graph during sample profile loading."));
+
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -286,7 +299,7 @@
 
   bool doInitialization(Module &M);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
-                   ProfileSummaryInfo *_PSI);
+                   ProfileSummaryInfo *_PSI, CallGraph *CG);
 
   void dump() { Reader->dump(); }
 
@@ -318,6 +331,7 @@
   void propagateWeights(Function &F);
   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
   void buildEdges(Function &F);
+  std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
   bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
   void computeDominanceAndLoopInfo(Function &F);
   void clearFunctionData();
@@ -1008,9 +1022,26 @@
     if (!Callee || Callee->isDeclaration())
       continue;
     const FunctionSamples *FS = Pair.getSecond();
-    auto pair =
-        notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
-    pair.first->second.entryCount += FS->getEntrySamples();
+    if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
+      continue;
+    }
+
+    if (ProfileMergeInlinee) {
+      // Use entry samples as head samples during the merge, as inlinees
+      // don't have head samples.
+      assert(FS->getHeadSamples() == 0 && "Expect 0 head sample for inlinee");
+      const_cast<FunctionSamples *>(FS)->addHeadSamples(FS->getEntrySamples());
+
+      // Note that we have to do the merge right after processing function.
+      // This allows OutlineFS's profile to be used for annotation during
+      // top-down processing of functions' annotation.
+      FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+      OutlineFS->merge(*FS);
+    } else {
+      auto pair =
+          notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
+      pair.first->second.entryCount += FS->getEntrySamples();
+    }
   }
   return Changed;
 }
@@ -1674,6 +1705,33 @@
 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
                     "Sample Profile loader", false, false)
 
+std::vector<Function *>
+SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
+  std::vector<Function *> FunctionOrderList;
+  FunctionOrderList.reserve(M.size());
+
+  if (!ProfileTopDownLoad || CG == nullptr) {
+    for (Function &F : M)
+      if (!F.isDeclaration())
+        FunctionOrderList.push_back(&F);
+    return FunctionOrderList;
+  }
+
+  assert(&CG->getModule() == &M);
+  scc_iterator<CallGraph *> CGI = scc_begin(CG);
+  while (!CGI.isAtEnd()) {
+    for (CallGraphNode *node : *CGI) {
+      auto F = node->getFunction();
+      if (F && !F->isDeclaration())
+        FunctionOrderList.push_back(F);
+    }
+    ++CGI;
+  }
+
+  std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
+  return FunctionOrderList;
+}
+
 bool SampleProfileLoader::doInitialization(Module &M) {
   auto &Ctx = M.getContext();
 
@@ -1711,7 +1769,7 @@
 }
 
 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
-                                      ProfileSummaryInfo *_PSI) {
+                                      ProfileSummaryInfo *_PSI, CallGraph *CG) {
   GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
   if (!ProfileIsValid)
     return false;
@@ -1746,11 +1804,11 @@
   }
 
   bool retval = false;
-  for (auto &F : M)
-    if (!F.isDeclaration()) {
-      clearFunctionData();
-      retval |= runOnFunction(F, AM);
-    }
+  for (auto F : buildFunctionOrder(M, CG)) {
+    assert(!F->isDeclaration());
+    clearFunctionData();
+    retval |= runOnFunction(*F, AM);
+  }
 
   // Account for cold calls not inlined....
   for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
@@ -1765,7 +1823,7 @@
   TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
   ProfileSummaryInfo *PSI =
       &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-  return SampleLoader.runOnModule(M, nullptr, PSI);
+  return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
 }
 
 bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
@@ -1849,7 +1907,8 @@
   SampleLoader.doInitialization(M);
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
-  if (!SampleLoader.runOnModule(M, &AM, PSI))
+  CallGraph &CG = AM.getResult<CallGraphAnalysis>(M);
+  if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
diff --git a/llvm/test/CodeGen/SystemZ/regcoal-subranges-update-remat.mir b/llvm/test/CodeGen/SystemZ/regcoal-subranges-update-remat.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/regcoal-subranges-update-remat.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc  -mcpu=z13 -O3 -misched=ilpmin -systemz-subreg-liveness  -verify-machineinstrs -start-before simple-register-coalescing %s -mtriple s390x-ibm-linux -stop-after machine-scheduler -o - | FileCheck %s
+
+# Check that when the register coalescer rematerializes a register to set
+# only a sub register, it sets the subranges of the unused lanes as being dead
+# at the definition point.
+#
+# The way that test exercises that comes in two steps:
+# - First, we need the register coalescer to rematerialize something.
+#   In that test, %0 is rematerializable and will be rematerialized in
+#   %1 since %1 and %0 cannot be directly coalesced (they interfere).
+# - Second, we indirectly check that the subranges are valid for %1
+#   when, in the machine scheduler, we move the instructions that define %1
+#   closer to the return instruction (i.e., we move MSFI and the rematerialized
+#   definition of %0 (i.e., %1 = LGHI 25) down). When doing that displacement,
+#   the scheduler updates the live-ranges of %1. When the subrange for the
+#   unused lane (here the subrange for %1.subreg_h32) was not correct, the
+#   scheduler would hit an assertion or access some invalid memory location
+#   making the compiler crash.
+#
+# Bottom line, this test checks what was intended if at the end, both %0 and %1
+# are defined with `LGHI 25` and the instructions defining %1 are right before
+# the return instruction.
+#
+# PR41372
+---
+name:            main
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL
+    ; CHECK-LABEL: name: main
+    ; CHECK: [[LGHI:%[0-9]+]]:gr64bit = LGHI 25
+    ; CHECK: CHIMux [[LGHI]].subreg_l32, 0, implicit-def $cc
+    ; CHECK: [[LGHI1:%[0-9]+]]:gr64bit = LGHI 25
+    ; CHECK: undef [[LGHI1]].subreg_l32:gr64bit = MSFI [[LGHI1]].subreg_l32, -117440512
+    ; CHECK: Return implicit [[LGHI1]].subreg_l32
+    %0:gr64bit = LGHI 25
+    %1:gr32bit = COPY %0.subreg_l32
+    %1:gr32bit = MSFI %1, -117440512
+    %2:grx32bit = COPY %0.subreg_l32
+    CHIMux killed %2, 0, implicit-def $cc
+    %3:gr32bit = COPY killed %1
+    Return implicit %3
+...
diff --git a/llvm/test/CodeGen/X86/debug-loclists-lto.ll b/llvm/test/CodeGen/X86/debug-loclists-lto.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/debug-loclists-lto.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=x86_64-pc-linux -filetype=asm -function-sections < %s | FileCheck --implicit-check-not=loclists_table_base %s
+
+; CHECK: {{^}}.Lloclists_table_base0:
+; CHECK-NEXT: .long   .Ldebug_loc0-.Lloclists_table_base0
+; CHECK-NEXT: .long   .Ldebug_loc1-.Lloclists_table_base0
+; CHECK: .long   .Lloclists_table_base0  # DW_AT_loclists_base
+; CHECK: .long   .Lloclists_table_base0  # DW_AT_loclists_base
+
+; Function Attrs: uwtable
+define dso_local void @_Z2f2v() local_unnamed_addr #0 !dbg !15 {
+entry:
+  tail call void @_Z2f1v(), !dbg !19
+  call void @llvm.dbg.value(metadata i32 3, metadata !17, metadata !DIExpression()), !dbg !20
+  tail call void @_Z2f1v(), !dbg !21
+  ret void, !dbg !22
+}
+declare !dbg !4 dso_local void @_Z2f1v() local_unnamed_addr #1
+; Function Attrs: nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+; Function Attrs: uwtable
+define dso_local void @_Z2f3v() local_unnamed_addr #0 !dbg !23 {
+entry:
+  tail call void @_Z2f1v(), !dbg !26
+  call void @llvm.dbg.value(metadata i32 3, metadata !25, metadata !DIExpression()), !dbg !27
+  tail call void @_Z2f1v(), !dbg !28
+  ret void, !dbg !29
+}
+
+attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone speculatable willreturn }
+
+!llvm.dbg.cu = !{!0, !7}
+!llvm.ident = !{!11, !11}
+!llvm.module.flags = !{!12, !13, !14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 10.0.0 (git@github.com:llvm/llvm-project.git 9b962d83ece841e43fd2823375dc6ddc94c1b178)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None)
+!1 = !DIFile(filename: "loc1.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch", checksumkind: CSK_MD5, checksum: "3c96069dc8a3a1e7868038213ed0364a")
+!2 = !{}
+!3 = !{!4}
+!4 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !8, producer: "clang version 10.0.0 (git@github.com:llvm/llvm-project.git 9b962d83ece841e43fd2823375dc6ddc94c1b178)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !9, nameTableKind: None)
+!8 = !DIFile(filename: "loc2.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch", checksumkind: CSK_MD5, checksum: "2d309df0c6f5d8ce7264cc7696738fa9")
+!9 = !{!10}
+!10 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !8, file: !8, line: 1, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
+!11 = !{!"clang version 10.0.0 (git@github.com:llvm/llvm-project.git 9b962d83ece841e43fd2823375dc6ddc94c1b178)"}
+!12 = !{i32 7, !"Dwarf Version", i32 5}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!14 = !{i32 1, !"wchar_size", i32 4}
+!15 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16)
+!16 = !{!17}
+!17 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 3, type: !18)
+!18 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!19 = !DILocation(line: 4, column: 3, scope: !15)
+!20 = !DILocation(line: 0, scope: !15)
+!21 = !DILocation(line: 6, column: 3, scope: !15)
+!22 = !DILocation(line: 7, column: 1, scope: !15)
+!23 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !8, file: !8, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !24)
+!24 = !{!25}
+!25 = !DILocalVariable(name: "i", scope: !23, file: !8, line: 3, type: !18)
+!26 = !DILocation(line: 4, column: 3, scope: !23)
+!27 = !DILocation(line: 0, scope: !23)
+!28 = !DILocation(line: 6, column: 3, scope: !23)
+!29 = !DILocation(line: 7, column: 1, scope: !23)
diff --git a/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo b/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo
--- a/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo
+++ b/llvm/test/CodeGen/X86/insert-prefetch-inline.afdo
@@ -1,4 +1,4 @@
 caller:0:0
- 2:sum:0
+ 2: sum:0
   3: 0 __prefetch_nta_0:23456
   3.1: 0 __prefetch_nta_0:8764 __prefetch_nta_1:64
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll
--- a/llvm/test/CodeGen/X86/vec-strict-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX
 
 declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
 declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll
--- a/llvm/test/CodeGen/X86/vec-strict-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-256.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s
 
 declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
 declare <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float>, <8 x float>, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll
--- a/llvm/test/CodeGen/X86/vec-strict-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-512.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s
 
 declare <8 x double> @llvm.experimental.constrained.fadd.v8f64(<8 x double>, <8 x double>, metadata, metadata)
 declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -115,10 +115,10 @@
 ; CHECK-LABEL: constrained_vector_fdiv_v4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1]
-; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
-; CHECK-NEXT:    divpd %xmm2, %xmm0
 ; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0]
 ; CHECK-NEXT:    divpd %xmm2, %xmm1
+; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
+; CHECK-NEXT:    divpd %xmm2, %xmm0
 ; CHECK-NEXT:    retq
 ;
 ; AVX1-LABEL: constrained_vector_fdiv_v4f64:
@@ -507,10 +507,10 @@
 define <4 x double> @constrained_vector_fmul_v4f64() #0 {
 ; CHECK-LABEL: constrained_vector_fmul_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
-; CHECK-NEXT:    mulpd %xmm1, %xmm0
-; CHECK-NEXT:    mulpd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
+; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0]
+; CHECK-NEXT:    mulpd %xmm0, %xmm1
+; CHECK-NEXT:    mulpd {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
 ;
 ; AVX1-LABEL: constrained_vector_fmul_v4f64:
@@ -644,10 +644,10 @@
 define <4 x double> @constrained_vector_fadd_v4f64() #0 {
 ; CHECK-LABEL: constrained_vector_fadd_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,1.0000000000000001E-1]
-; CHECK-NEXT:    addpd %xmm1, %xmm0
-; CHECK-NEXT:    addpd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
+; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1]
+; CHECK-NEXT:    addpd %xmm0, %xmm1
+; CHECK-NEXT:    addpd {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
 ;
 ; AVX1-LABEL: constrained_vector_fadd_v4f64:
@@ -784,10 +784,10 @@
 define <4 x double> @constrained_vector_fsub_v4f64() #0 {
 ; CHECK-LABEL: constrained_vector_fsub_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
-; CHECK-NEXT:    movapd %xmm1, %xmm0
-; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
+; CHECK-NEXT:    movapd %xmm0, %xmm1
 ; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
 ;
 ; AVX1-LABEL: constrained_vector_fsub_v4f64:
diff --git a/llvm/test/ThinLTO/X86/writeonly.ll b/llvm/test/ThinLTO/X86/writeonly.ll
--- a/llvm/test/ThinLTO/X86/writeonly.ll
+++ b/llvm/test/ThinLTO/X86/writeonly.ll
@@ -25,6 +25,13 @@
 ; OPTIMIZE-NEXT:   %2 = tail call i32 @rand()
 ; OPTIMIZE-NEXT:   ret i32 0
 
+; Confirm that with -propagate-attrs=false we no longer do write-only importing
+; RUN: llvm-lto -propagate-attrs=false -thinlto-action=import -exported-symbol=main  %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported.bc -stats 2>&1 | FileCheck %s --check-prefix=STATS-NOPROP
+; RUN: llvm-dis %t1.imported.bc -o - | FileCheck %s --check-prefix=IMPORT-NOPROP
+; STATS-NOPROP-NOT: Number of live global variables marked write only
+; IMPORT-NOPROP: @gFoo.llvm.0 = available_externally
+; IMPORT-NOPROP-NEXT: @gBar = available_externally
+
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux-gnu"
 
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/einline.prof b/llvm/test/Transforms/SampleProfile/Inputs/einline.prof
--- a/llvm/test/Transforms/SampleProfile/Inputs/einline.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/einline.prof
@@ -1,7 +1,7 @@
 _Z3foov:200:100
- 1: _Z3barv:0
+ 1: _ZL3barv:0
  2: no_inline:100
- 3: _Z3barv:100
+ 3: _ZL3barv:100
 recursive:200:100
  1: recursive:100
  2: recursive:100
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof
--- a/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof
@@ -4,7 +4,7 @@
  1: direct_leaf_func:35000
   11: 3000
 test_cgscc_inline:63067:0
- 1: sample_loader_inlinee:1
+ 1: cgscc_inlinee:1
 cgscc_inlinee:3000:0
  1: direct_leaf_func:35000
   11: 3000
\ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-mergeprof.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-mergeprof.prof
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-mergeprof.prof
@@ -0,0 +1,13 @@
+main:225715:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: _Z3sumii:46
+  1: 23
+  2: _Z3subii:2
+   1: 2
+  3: 21
+
+_Z3sumii:11:22
+ 1: 11
+ 2: 10 _Z3subii:10
+ 3: 1
\ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-topdown.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-topdown.prof
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-topdown.prof
@@ -0,0 +1,10 @@
+main:225715:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: _Z3sumii:50000
+  1: _Z3subii:0
+   1: 0
+
+_Z3sumii:6010:50000
+ 1: _Z3subii:60000
+   1: 9
\ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll
@@ -0,0 +1,97 @@
+; Test we lose details of not inlined profile without '-sample-profile-merge-inlinee'
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -S | FileCheck -check-prefix=SCALE %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -S | FileCheck -check-prefix=SCALE %s
+
+; Test we properly merge not inlined profile properly with '-sample-profile-merge-inlinee'
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee -S | FileCheck -check-prefix=MERGE %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee -S | FileCheck -check-prefix=MERGE  %s
+
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+define i32 @main() !dbg !6 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  %tmp = load i32, i32* %i, align 4, !dbg !8
+  %tmp1 = load i32, i32* %s, align 4, !dbg !8
+  %call = call i32 @_Z3sumii(i32 %tmp, i32 %tmp1), !dbg !8
+; SCALE: call i32 @_Z3sumii
+; MERGE: call i32 @_Z3sumii
+  store i32 %call, i32* %s, align 4, !dbg !8
+  ret i32 0, !dbg !11
+}
+
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !12 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !13
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !13
+  %add = add nsw i32 %tmp, %tmp1, !dbg !13
+  %tmp2 = load i32, i32* %x.addr, align 4, !dbg !13
+  %tmp3 = load i32, i32* %y.addr, align 4, !dbg !13
+  %cmp1 = icmp ne i32 %tmp3, 100, !dbg !13
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !13
+
+if.then:                                          ; preds = %entry
+  %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !14
+  ret i32 %add, !dbg !14
+
+if.else:                                          ; preds = %entry
+  ret i32 %add, !dbg !15
+}
+
+define i32 @_Z3subii(i32 %x, i32 %y) !dbg !16 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !17
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !17
+  %add = sub nsw i32 %tmp, %tmp1, !dbg !17
+  ret i32 %add, !dbg !18
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.5 "}
+!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 10, scope: !9)
+!9 = !DILexicalBlockFile(scope: !10, file: !1, discriminator: 2)
+!10 = distinct !DILexicalBlock(scope: !6, file: !1, line: 10)
+!11 = !DILocation(line: 12, scope: !6)
+!12 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!13 = !DILocation(line: 4, scope: !12)
+!14 = !DILocation(line: 5, scope: !12)
+!15 = !DILocation(line: 6, scope: !12)
+!16 = distinct !DISubprogram(name: "sub", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!17 = !DILocation(line: 20, scope: !16)
+!18 = !DILocation(line: 21, scope: !16)
+
+; SCALE: name: "sum"
+; SCALE-NEXT: {!"function_entry_count", i64 46}
+; SCALE: !{!"branch_weights", i32 11, i32 2}
+; SCALE: !{!"branch_weights", i64 20}
+; SCALE: name: "sub"
+; SCALE-NEXT: {!"function_entry_count", i64 -1}
+
+; MERGE: name: "sum"
+; MERGE-NEXT: {!"function_entry_count", i64 46}
+; MERGE: !{!"branch_weights", i32 11, i32 23}
+; MERGE: !{!"branch_weights", i32 10}
+; MERGE: name: "sub"
+; MERGE-NEXT: {!"function_entry_count", i64 3}
\ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/inline-topdown.ll b/llvm/test/Transforms/SampleProfile/inline-topdown.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-topdown.ll
@@ -0,0 +1,123 @@
+; Note that this needs new pass manager for now. Passing `-sample-profile-top-down-load` to legacy pass manager is a no-op.
+
+; Test we aren't doing specialization for inlining with default source order
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -S | FileCheck -check-prefix=DEFAULT %s
+
+; Test we specialize based on call path with context-sensitive profile while inlining with '-sample-profile-top-down-load'
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -S | FileCheck -check-prefix=TOPDOWN  %s
+
+
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !6 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !8
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
+  %add = add nsw i32 %tmp, %tmp1, !dbg !8
+  %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
+  %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
+  %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
+  ret i32 %add, !dbg !8
+}
+
+define i32 @_Z3subii(i32 %x, i32 %y) !dbg !9 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !10
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
+  %add = sub nsw i32 %tmp, %tmp1, !dbg !10
+  ret i32 %add, !dbg !11
+}
+
+define i32 @main() !dbg !12 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !13
+  br label %while.cond, !dbg !14
+
+while.cond:                                       ; preds = %if.end, %entry
+  %tmp = load i32, i32* %i, align 4, !dbg !15
+  %inc = add nsw i32 %tmp, 1, !dbg !15
+  store i32 %inc, i32* %i, align 4, !dbg !15
+  %cmp = icmp slt i32 %tmp, 400000000, !dbg !15
+  br i1 %cmp, label %while.body, label %while.end, !dbg !15
+
+while.body:                                       ; preds = %while.cond
+  %tmp1 = load i32, i32* %i, align 4, !dbg !17
+  %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !17
+
+if.then:                                          ; preds = %while.body
+  %tmp2 = load i32, i32* %i, align 4, !dbg !19
+  %tmp3 = load i32, i32* %s, align 4, !dbg !19
+  %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
+  store i32 %call, i32* %s, align 4, !dbg !19
+  br label %if.end, !dbg !19
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !21
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !23
+
+while.end:                                        ; preds = %while.cond
+  %tmp4 = load i32, i32* %s, align 4, !dbg !25
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
+  ret i32 0, !dbg !26
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.5 "}
+!6 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 4, scope: !6)
+!9 = distinct !DISubprogram(name: "sub", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!10 = !DILocation(line: 20, scope: !9)
+!11 = !DILocation(line: 21, scope: !9)
+!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!13 = !DILocation(line: 8, scope: !12)
+!14 = !DILocation(line: 9, scope: !12)
+!15 = !DILocation(line: 9, scope: !16)
+!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
+!17 = !DILocation(line: 10, scope: !18)
+!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
+!19 = !DILocation(line: 10, scope: !20)
+!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
+!21 = !DILocation(line: 10, scope: !22)
+!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
+!23 = !DILocation(line: 10, scope: !24)
+!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
+!25 = !DILocation(line: 11, scope: !12)
+!26 = !DILocation(line: 12, scope: !12)
+
+
+; DEFAULT: @_Z3sumii
+; DEFAULT-NOT: call i32 @_Z3subii
+; DEFAULT: @main()
+; DEFAULT-NOT: call i32 @_Z3subii
+
+; TOPDOWN: @_Z3sumii
+; TOPDOWN-NOT: call i32 @_Z3subii
+; TOPDOWN: @main()
+; TOPDOWN: call i32 @_Z3subii
\ No newline at end of file
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -20,8 +20,10 @@
 #include "llvm/DebugInfo/GSYM/StringTable.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Testing/Support/Error.h"
 
 #include "gtest/gtest.h"
+#include "gmock/gmock.h"
 #include <string>
 
 using namespace llvm;
@@ -1302,3 +1304,100 @@
                             "address 0x1030 not in GSYM");
   }
 }
+
+TEST(GSYMTest, TestGsymLookups) {
+  // Test creating a GSYM file with a function that has a inline information.
+  // Verify that lookups work correctly. Lookups do not decode the entire
+  // FunctionInfo or InlineInfo, they only extract information needed for the
+  // lookup to happen which avoids allocations which can slow down
+  // symbolication.
+  GsymCreator GC;
+  FunctionInfo FI(0x1000, 0x100, GC.insertString("main"));
+  const auto ByteOrder = support::endian::system_endianness();
+  FI.OptLineTable = LineTable();
+  const uint32_t MainFileIndex = GC.insertFile("/tmp/main.c");
+  const uint32_t FooFileIndex = GC.insertFile("/tmp/foo.h");
+  FI.OptLineTable->push(LineEntry(0x1000, MainFileIndex, 5));
+  FI.OptLineTable->push(LineEntry(0x1010, FooFileIndex, 10));
+  FI.OptLineTable->push(LineEntry(0x1012, FooFileIndex, 20));
+  FI.OptLineTable->push(LineEntry(0x1014, FooFileIndex, 11));
+  FI.OptLineTable->push(LineEntry(0x1016, FooFileIndex, 30));
+  FI.OptLineTable->push(LineEntry(0x1018, FooFileIndex, 12));
+  FI.OptLineTable->push(LineEntry(0x1020, MainFileIndex, 8));
+  FI.Inline = InlineInfo();
+
+  FI.Inline->Name = GC.insertString("inline1");
+  FI.Inline->CallFile = MainFileIndex;
+  FI.Inline->CallLine = 6;
+  FI.Inline->Ranges.insert(AddressRange(0x1010, 0x1020));
+  InlineInfo Inline2;
+  Inline2.Name = GC.insertString("inline2");
+  Inline2.CallFile = FooFileIndex;
+  Inline2.CallLine = 33;
+  Inline2.Ranges.insert(AddressRange(0x1012, 0x1014));
+  FI.Inline->Children.emplace_back(Inline2);
+  InlineInfo Inline3;
+  Inline3.Name = GC.insertString("inline3");
+  Inline3.CallFile = FooFileIndex;
+  Inline3.CallLine = 35;
+  Inline3.Ranges.insert(AddressRange(0x1016, 0x1018));
+  FI.Inline->Children.emplace_back(Inline3);
+  GC.addFunctionInfo(std::move(FI));
+  Error FinalizeErr = GC.finalize(llvm::nulls());
+  ASSERT_FALSE(FinalizeErr);
+  SmallString<512> Str;
+  raw_svector_ostream OutStrm(Str);
+  FileWriter FW(OutStrm, ByteOrder);
+  llvm::Error Err = GC.encode(FW);
+  ASSERT_FALSE((bool)Err);
+  Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+  ASSERT_TRUE(bool(GR));
+
+  // Verify inline info is correct when doing lookups.
+  auto LR = GR->lookup(0x1000);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 5}));
+  LR = GR->lookup(0x100F);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 5}));
+
+  LR = GR->lookup(0x1010);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 10},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1012);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline2", "/tmp", "foo.h", 20},
+                         SourceLocation{"inline1", "/tmp", "foo.h", 33},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1014);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 11},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1016);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline3", "/tmp", "foo.h", 30},
+                         SourceLocation{"inline1", "/tmp", "foo.h", 35},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1018);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 12},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1020);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 8}));
+}
diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
--- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
@@ -37,6 +37,7 @@
   ]
   sources = [
     "APValue.cpp",
+    "ASTConcept.cpp",
     "ASTConsumer.cpp",
     "ASTContext.cpp",
     "ASTDiagnostic.cpp",
diff --git a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn
--- a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn
@@ -54,7 +54,6 @@
     "RecursiveASTVisitorTests/LambdaDefaultCapture.cpp",
     "RecursiveASTVisitorTests/LambdaExpr.cpp",
     "RecursiveASTVisitorTests/LambdaTemplateParams.cpp",
-    "RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp",
     "RecursiveASTVisitorTests/NestedNameSpecifiers.cpp",
     "RecursiveASTVisitorTests/ParenExpr.cpp",
     "RecursiveASTVisitorTests/TemplateArgumentLocTraverser.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/GSYM/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/GSYM/BUILD.gn
--- a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/GSYM/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/GSYM/BUILD.gn
@@ -12,6 +12,7 @@
     "Header.cpp",
     "InlineInfo.cpp",
     "LineTable.cpp",
+    "LookupResult.cpp",
     "Range.cpp",
   ]
 }
diff --git a/llvm/utils/unittest/googlemock/include/gmock/gmock-matchers.h b/llvm/utils/unittest/googlemock/include/gmock/gmock-matchers.h
--- a/llvm/utils/unittest/googlemock/include/gmock/gmock-matchers.h
+++ b/llvm/utils/unittest/googlemock/include/gmock/gmock-matchers.h
@@ -3589,6 +3589,8 @@
     GTEST_LOG_(FATAL) << "BoundSecondMatcher should never be assigned.";
   }
 
+  BoundSecondMatcher(const BoundSecondMatcher &) = default;
+
  private:
   template <typename T>
   class Impl : public MatcherInterface<T> {
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -22,6 +22,8 @@
 #define DEVICE __attribute__((device))
 #define INLINE inline DEVICE
 #define NOINLINE __attribute__((noinline)) DEVICE
+#define SHARED __attribute__((shared))
+#define ALIGN(N) __attribute__((aligned(N)))
 
 ////////////////////////////////////////////////////////////////////////////////
 // Kernel options
diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h
--- a/openmp/libomptarget/deviceRTLs/common/omptarget.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h
@@ -77,7 +77,7 @@
   uint32_t nArgs;
 };
 
-extern __device__ __shared__ omptarget_nvptx_SharedArgs
+extern DEVICE SHARED omptarget_nvptx_SharedArgs
     omptarget_nvptx_globalArgs;
 
 // Data structure to keep in shared memory that traces the current slot, stack,
@@ -107,7 +107,7 @@
   void *DataEnd;
   char Data[DS_Slot_Size];
 };
-extern __device__ __shared__ DataSharingStateTy DataSharingState;
+extern DEVICE SHARED DataSharingStateTy DataSharingState;
 
 ////////////////////////////////////////////////////////////////////////////////
 // task ICV and (implicit & explicit) task state
@@ -259,9 +259,9 @@
       workDescrForActiveParallel; // one, ONLY for the active par
   uint64_t lastprivateIterBuffer;
 
-  __align__(16)
-      __kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE];
-  __align__(16) __kmpc_data_sharing_master_slot_static master_rootS[1];
+  ALIGN(16)
+  __kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE];
+  ALIGN(16) __kmpc_data_sharing_master_slot_static master_rootS[1];
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -326,7 +326,7 @@
 /// Memory manager for statically allocated memory.
 class omptarget_nvptx_SimpleMemoryManager {
 private:
-  __align__(128) struct MemDataTy {
+  ALIGN(128) struct MemDataTy {
     volatile unsigned keys[OMP_STATE_COUNT];
   } MemData[MAX_SM];
 
@@ -345,20 +345,20 @@
 // global data tables
 ////////////////////////////////////////////////////////////////////////////////
 
-extern __device__ omptarget_nvptx_SimpleMemoryManager
+extern DEVICE omptarget_nvptx_SimpleMemoryManager
     omptarget_nvptx_simpleMemoryManager;
-extern __device__ __shared__ uint32_t usedMemIdx;
-extern __device__ __shared__ uint32_t usedSlotIdx;
-extern __device__ __shared__ uint8_t
+extern DEVICE SHARED uint32_t usedMemIdx;
+extern DEVICE SHARED uint32_t usedSlotIdx;
+extern DEVICE SHARED uint8_t
     parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
-extern __device__ __shared__ uint16_t threadLimit;
-extern __device__ __shared__ uint16_t threadsInTeam;
-extern __device__ __shared__ uint16_t nThreads;
-extern __device__ __shared__
+extern DEVICE SHARED uint16_t threadLimit;
+extern DEVICE SHARED uint16_t threadsInTeam;
+extern DEVICE SHARED uint16_t nThreads;
+extern DEVICE SHARED
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
 
-extern __device__ __shared__ uint32_t execution_param;
-extern __device__ __shared__ void *ReductionScratchpadPtr;
+extern DEVICE SHARED uint32_t execution_param;
+extern DEVICE SHARED void *ReductionScratchpadPtr;
 
 ////////////////////////////////////////////////////////////////////////////////
 // work function (outlined parallel/simd functions) and arguments.
@@ -366,7 +366,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 typedef void *omptarget_nvptx_WorkFn;
-extern volatile __device__ __shared__ omptarget_nvptx_WorkFn
+extern volatile DEVICE SHARED omptarget_nvptx_WorkFn
     omptarget_nvptx_workFn;
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
--- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
@@ -17,7 +17,7 @@
 // global data tables
 ////////////////////////////////////////////////////////////////////////////////
 
-extern __device__
+extern DEVICE
     omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
         omptarget_nvptx_device_State[MAX_SM];
 
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
rename from openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
rename to openmp/libomptarget/deviceRTLs/common/src/parallel.cu
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
@@ -1,4 +1,4 @@
-//===---- parallel.cu - NVPTX OpenMP parallel implementation ----- CUDA -*-===//
+//===---- parallel.cu - GPU OpenMP parallel implementation ------- CUDA -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu
rename from openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
rename to openmp/libomptarget/deviceRTLs/common/src/support.cu
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu
@@ -1,4 +1,4 @@
-//===--------- support.cu - NVPTX OpenMP support functions ------- CUDA -*-===//
+//===--------- support.cu - OpenMP support functions ------------- CUDA -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
rename from openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
rename to openmp/libomptarget/deviceRTLs/common/src/sync.cu
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
@@ -1,4 +1,4 @@
-//===------------ sync.h - NVPTX OpenMP synchronizations --------- CUDA -*-===//
+//===------------ sync.cu - OpenMP synchronizations -------------- CUDA -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -57,10 +57,10 @@
       src/libcall.cu
       ${devicertl_common_directory}/src/loop.cu
       ${devicertl_common_directory}/src/omptarget.cu
-      src/parallel.cu
+      ${devicertl_common_directory}/src/parallel.cu
       src/reduction.cu
-      src/support.cu
-      src/sync.cu
+      ${devicertl_common_directory}/src/support.cu
+      ${devicertl_common_directory}/src/sync.cu
       ${devicertl_common_directory}/src/task.cu
   )
 
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -97,7 +97,7 @@
 
   DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_begin\n");
 
-  // If the runtime has been elided, used __shared__ memory for master-worker
+  // If the runtime has been elided, used shared memory for master-worker
   // data sharing.
   if (!IsOMPRuntimeInitialized)
     return (void *)&DataSharingState;
@@ -300,7 +300,7 @@
                                           int16_t IsOMPRuntimeInitialized) {
   DSPRINT0(DSFLAG, "Entering __kmpc_get_data_sharing_environment_frame\n");
 
-  // If the runtime has been elided, use __shared__ memory for master-worker
+  // If the runtime has been elided, use shared memory for master-worker
   // data sharing.  We're reusing the statically allocated data structure
   // that is used for standard data sharing.
   if (!IsOMPRuntimeInitialized)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
@@ -17,27 +17,27 @@
 // global device environment
 ////////////////////////////////////////////////////////////////////////////////
 
-__device__ omptarget_device_environmentTy omptarget_device_environment;
+DEVICE omptarget_device_environmentTy omptarget_device_environment;
 
 ////////////////////////////////////////////////////////////////////////////////
 // global data holding OpenMP state information
 ////////////////////////////////////////////////////////////////////////////////
 
-__device__
+DEVICE
     omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
         omptarget_nvptx_device_State[MAX_SM];
 
-__device__ omptarget_nvptx_SimpleMemoryManager
+DEVICE omptarget_nvptx_SimpleMemoryManager
     omptarget_nvptx_simpleMemoryManager;
-__device__ __shared__ uint32_t usedMemIdx;
-__device__ __shared__ uint32_t usedSlotIdx;
+DEVICE SHARED uint32_t usedMemIdx;
+DEVICE SHARED uint32_t usedSlotIdx;
 
-__device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
-__device__ __shared__ uint16_t threadLimit;
-__device__ __shared__ uint16_t threadsInTeam;
-__device__ __shared__ uint16_t nThreads;
+DEVICE SHARED uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
+DEVICE SHARED uint16_t threadLimit;
+DEVICE SHARED uint16_t threadsInTeam;
+DEVICE SHARED uint16_t nThreads;
 // Pointer to this team's OpenMP state object
-__device__ __shared__
+DEVICE SHARED
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -45,24 +45,24 @@
 // communicate with the workers.  Since it is in shared memory, there is one
 // copy of these variables for each kernel, instance, and team.
 ////////////////////////////////////////////////////////////////////////////////
-volatile __device__ __shared__ omptarget_nvptx_WorkFn omptarget_nvptx_workFn;
+volatile DEVICE SHARED omptarget_nvptx_WorkFn omptarget_nvptx_workFn;
 
 ////////////////////////////////////////////////////////////////////////////////
 // OpenMP kernel execution parameters
 ////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ uint32_t execution_param;
+DEVICE SHARED uint32_t execution_param;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Data sharing state
 ////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ DataSharingStateTy DataSharingState;
+DEVICE SHARED DataSharingStateTy DataSharingState;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scratchpad for teams reduction.
 ////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ void *ReductionScratchpadPtr;
+DEVICE SHARED void *ReductionScratchpadPtr;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Data sharing related variables.
 ////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_globalArgs;
+DEVICE SHARED omptarget_nvptx_SharedArgs omptarget_nvptx_globalArgs;
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -233,7 +233,7 @@
                           : /*Master thread only*/ 1;
   uint32_t TeamId = GetBlockIdInKernel();
   uint32_t NumTeams = GetNumberOfBlocksInKernel();
-  __shared__ volatile bool IsLastTeam;
+  SHARED volatile bool IsLastTeam;
 
   // Team masters of all teams write to the scratchpad.
   if (ThreadId == 0) {
@@ -403,8 +403,8 @@
   return (s & ~(unsigned)(WARPSIZE - 1));
 }
 
-__device__ static volatile uint32_t IterCnt = 0;
-__device__ static volatile uint32_t Cnt = 0;
+DEVICE static volatile uint32_t IterCnt = 0;
+DEVICE static volatile uint32_t Cnt = 0;
 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
     kmp_Ident *loc, int32_t global_tid, void *global_buffer,
     int32_t num_of_records, void *reduce_data, kmp_ShuffleReductFctPtr shflFct,
@@ -426,8 +426,8 @@
                          : /*Master thread only*/ 1;
   uint32_t TeamId = GetBlockIdInKernel();
   uint32_t NumTeams = GetNumberOfBlocksInKernel();
-  __shared__ unsigned Bound;
-  __shared__ unsigned ChunkTeamCount;
+  SHARED unsigned Bound;
+  SHARED unsigned ChunkTeamCount;
 
   // Block progress for teams greater than the current upper
   // limit. We always only allow a number of teams less or equal
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -18,6 +18,8 @@
 #define DEVICE __device__
 #define INLINE __forceinline__ DEVICE
 #define NOINLINE __noinline__ DEVICE
+#define SHARED __shared__
+#define ALIGN(N) __align__(N)
 
 ////////////////////////////////////////////////////////////////////////////////
 // Kernel options