Index: compiler-rt/lib/hwasan/hwasan.h
===================================================================
--- compiler-rt/lib/hwasan/hwasan.h
+++ compiler-rt/lib/hwasan/hwasan.h
@@ -37,12 +37,8 @@
 const uptr kShadowAlignment = 1UL << kShadowScale;
 
 #define MEM_TO_SHADOW_OFFSET(mem) ((uptr)(mem) >> kShadowScale)
-#define MEM_TO_SHADOW(mem)         \
-  (((uptr)(mem) >> kShadowScale) + \
-   __hwasan_shadow_memory_dynamic_address_internal)
-#define SHADOW_TO_MEM(shadow)                                       \
-  (((uptr)(shadow)-__hwasan_shadow_memory_dynamic_address_internal) \
-   << kShadowScale)
+#define MEM_TO_SHADOW(mem) ((uptr)(mem) >> kShadowScale)
+#define SHADOW_TO_MEM(shadow) ((uptr)(shadow) << kShadowScale)
 
 #define MEM_IS_APP(mem) true
 
Index: compiler-rt/lib/hwasan/hwasan.cc
===================================================================
--- compiler-rt/lib/hwasan/hwasan.cc
+++ compiler-rt/lib/hwasan/hwasan.cc
@@ -238,10 +238,11 @@
   *p = x;
 }
 
+template<unsigned X>
 __attribute__((always_inline))
 static void SigIll() {
 #if defined(__aarch64__)
-  asm("hlt #0x1\n\t");
+  asm("hlt %0\n\t" ::"n"(X));
 #elif defined(__x86_64__) || defined(__i386__)
   asm("ud2\n\t");
 #else
@@ -251,15 +252,16 @@
   // __builtin_unreachable();
 }
 
+template<bool IsStore, unsigned LogSize>
 __attribute__((always_inline, nodebug))
 static void CheckAddress(uptr p) {
   tag_t ptr_tag = GetTagFromPointer(p);
   uptr ptr_raw = p & ~kAddressTagMask;
   tag_t mem_tag = *(tag_t *)MEM_TO_SHADOW(ptr_raw);
-  if (ptr_tag != mem_tag)
-    SigIll();
+  if (UNLIKELY(ptr_tag != mem_tag)) SigIll<0x100 + 0x10 * IsStore + LogSize>();
 }
 
+template<bool IsStore>
 __attribute__((always_inline, nodebug))
 static void CheckAddressSized(uptr p, uptr sz) {
   CHECK_NE(0, sz);
@@ -268,22 +270,22 @@
   tag_t *shadow_first = (tag_t *)MEM_TO_SHADOW(ptr_raw);
   tag_t *shadow_last = (tag_t *)MEM_TO_SHADOW(ptr_raw + sz - 1);
   for (tag_t *t = shadow_first; t <= shadow_last; ++t)
-    if (ptr_tag != *t) SigIll();
+    if (UNLIKELY(ptr_tag != *t)) SigIll<0x100 + 0x10 * IsStore + 0xf>();
 }
 
-void __hwasan_load(uptr p, uptr sz) { CheckAddressSized(p, sz); }
-void __hwasan_load1(uptr p) { CheckAddress(p); }
-void __hwasan_load2(uptr p) { CheckAddress(p); }
-void __hwasan_load4(uptr p) { CheckAddress(p); }
-void __hwasan_load8(uptr p) { CheckAddress(p); }
-void __hwasan_load16(uptr p) { CheckAddress(p); }
-
-void __hwasan_store(uptr p, uptr sz) { CheckAddressSized(p, sz); }
-void __hwasan_store1(uptr p) { CheckAddress(p); }
-void __hwasan_store2(uptr p) { CheckAddress(p); }
-void __hwasan_store4(uptr p) { CheckAddress(p); }
-void __hwasan_store8(uptr p) { CheckAddress(p); }
-void __hwasan_store16(uptr p) { CheckAddress(p); }
+void __hwasan_load(uptr p, uptr sz) { CheckAddressSized<false>(p, sz); }
+void __hwasan_load1(uptr p) { CheckAddress<false, 0>(p); }
+void __hwasan_load2(uptr p) { CheckAddress<false, 1>(p); }
+void __hwasan_load4(uptr p) { CheckAddress<false, 2>(p); }
+void __hwasan_load8(uptr p) { CheckAddress<false, 3>(p); }
+void __hwasan_load16(uptr p) { CheckAddress<false, 4>(p); }
+
+void __hwasan_store(uptr p, uptr sz) { CheckAddressSized<true>(p, sz); }
+void __hwasan_store1(uptr p) { CheckAddress<true, 0>(p); }
+void __hwasan_store2(uptr p) { CheckAddress<true, 1>(p); }
+void __hwasan_store4(uptr p) { CheckAddress<true, 2>(p); }
+void __hwasan_store8(uptr p) { CheckAddress<true, 3>(p); }
+void __hwasan_store16(uptr p) { CheckAddress<true, 4>(p); }
 
 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
 extern "C" {
Index: compiler-rt/lib/hwasan/hwasan_interface_internal.h
===================================================================
--- compiler-rt/lib/hwasan/hwasan_interface_internal.h
+++ compiler-rt/lib/hwasan/hwasan_interface_internal.h
@@ -31,14 +31,6 @@
 using __sanitizer::u16;
 using __sanitizer::u8;
 
-SANITIZER_INTERFACE_ATTRIBUTE
-extern uptr __hwasan_shadow_memory_dynamic_address;
-
-// Hidden alias for internal access.
-__attribute__((visibility("hidden")))
-extern uptr __hwasan_shadow_memory_dynamic_address_internal;
-
-
 SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_load(uptr, uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
Index: compiler-rt/lib/hwasan/hwasan_linux.cc
===================================================================
--- compiler-rt/lib/hwasan/hwasan_linux.cc
+++ compiler-rt/lib/hwasan/hwasan_linux.cc
@@ -32,18 +32,91 @@
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_procmaps.h"
 
-uptr __hwasan_shadow_memory_dynamic_address;
+namespace __hwasan {
 
-__attribute__((alias("__hwasan_shadow_memory_dynamic_address")))
-extern uptr __hwasan_shadow_memory_dynamic_address_internal;
+void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
+  CHECK_EQ((beg % GetMmapGranularity()), 0);
+  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
+  uptr size = end - beg + 1;
+  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
+  void *res = MmapFixedNoReserve(beg, size, name);
+  if (res != (void *)beg) {
+    Report(
+        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
+        "Perhaps you're using ulimit -v\n",
+        size);
+    Abort();
+  }
+  if (common_flags()->no_huge_pages_for_shadow) NoHugePagesInRegion(beg, size);
+  if (common_flags()->use_madv_dontdump) DontDumpShadowMemory(beg, size);
+}
 
-namespace __hwasan {
+static void ProtectGap(uptr addr, uptr size) {
+  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+  if (addr == (uptr)res) return;
+  // A few pages at the start of the address space can not be protected.
+  // But we really want to protect as much as possible, to prevent this memory
+  // being returned as a result of a non-FIXED mmap().
+  if (addr == 0) {
+    uptr step = GetMmapGranularity();
+    while (size > step) {
+      addr += step;
+      size -= step;
+      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+      if (addr == (uptr)res) return;
+    }
+  }
+
+  Report(
+      "ERROR: Failed to protect the shadow gap. "
+      "ASan cannot proceed correctly. ABORTING.\n");
+  DumpProcessMap();
+  Die();
+}
 
 bool InitShadow() {
   const uptr maxVirtualAddress = GetMaxUserVirtualAddress();
-  uptr shadow_size = MEM_TO_SHADOW_OFFSET(maxVirtualAddress) + 1;
-  __hwasan_shadow_memory_dynamic_address =
-      reinterpret_cast<uptr>(MmapNoReserveOrDie(shadow_size, "shadow"));
+
+  // LowMem covers as much of the first 4GB as possible.
+  const uptr kLowMemEnd = 1UL<<32;
+  const uptr kLowShadowEnd = kLowMemEnd >> kShadowScale;
+  const uptr kLowShadowStart = kLowShadowEnd >> kShadowScale;
+
+  // HighMem covers the upper part of the address space.
+  const uptr kHighShadowEnd = (maxVirtualAddress >> kShadowScale) + 1;
+  const uptr kHighShadowStart = Max(kLowMemEnd, kHighShadowEnd >> kShadowScale);
+  CHECK(kHighShadowStart < kHighShadowEnd);
+
+  const uptr kHighMemStart = kHighShadowStart << kShadowScale;
+  CHECK(kHighShadowEnd <= kHighMemStart);
+
+  if (Verbosity()) {
+    Printf("|| `[%p, %p]` || HighMem    ||\n", (void *)kHighMemStart,
+           (void *)maxVirtualAddress);
+    if (kHighMemStart > kHighShadowEnd)
+      Printf("|| `[%p, %p]` || ShadowGap2 ||\n", (void *)kHighShadowEnd,
+             (void *)kHighMemStart);
+    Printf("|| `[%p, %p]` || HighShadow ||\n", (void *)kHighShadowStart,
+           (void *)kHighShadowEnd);
+    if (kHighShadowStart > kLowMemEnd)
+      Printf("|| `[%p, %p]` || ShadowGap2 ||\n", (void *)kHighShadowEnd,
+             (void *)kHighMemStart);
+    Printf("|| `[%p, %p]` || LowMem     ||\n", (void *)kLowShadowEnd,
+           (void *)kLowMemEnd);
+    Printf("|| `[%p, %p]` || LowShadow  ||\n", (void *)kLowShadowStart,
+           (void *)kLowShadowEnd);
+    Printf("|| `[%p, %p]` || ShadowGap1 ||\n", (void *)0,
+           (void *)kLowShadowStart);
+  }
+
+  ReserveShadowMemoryRange(kLowShadowStart, kLowShadowEnd - 1, "low shadow");
+  ReserveShadowMemoryRange(kHighShadowStart, kHighShadowEnd - 1, "high shadow");
+  ProtectGap(0, kLowShadowStart);
+  if (kHighShadowStart > kLowMemEnd)
+    ProtectGap(kLowMemEnd, kHighShadowStart - kLowMemEnd);
+  if (kHighMemStart > kHighShadowEnd)
+    ProtectGap(kHighShadowEnd, kHighMemStart - kHighShadowEnd);
+
   return true;
 }
 
@@ -105,45 +178,28 @@
 
 #if defined(__aarch64__)
 static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
+  // Access type is encoded in HLT immediate as 0x1XY,
+  // where X is 1 for store, 0 for load.
+  // Valid values of Y are 0 to 4, which are interpreted as log2(access_size),
+  // and 0xF, which means that access size is stored in X1 register.
+  // Access address is always in X0 register.
   AccessInfo ai;
   uptr pc = (uptr)info->si_addr;
-
-  struct {
-    uptr addr;
-    unsigned size;
-    bool is_store;
-  } handlers[] = {
-      {(uptr)&__hwasan_load1, 1, false},   {(uptr)&__hwasan_load2, 2, false},
-      {(uptr)&__hwasan_load4, 4, false},   {(uptr)&__hwasan_load8, 8, false},
-      {(uptr)&__hwasan_load16, 16, false},  {(uptr)&__hwasan_load, 0, false},
-      {(uptr)&__hwasan_store1, 1, true},  {(uptr)&__hwasan_store2, 2, true},
-      {(uptr)&__hwasan_store4, 4, true},  {(uptr)&__hwasan_store8, 8, true},
-      {(uptr)&__hwasan_store16, 16, true}, {(uptr)&__hwasan_store, 0, true}};
-  int best = -1;
-  uptr best_distance = 0;
-  for (size_t i = 0; i < sizeof(handlers) / sizeof(handlers[0]); ++i) {
-    uptr handler = handlers[i].addr;
-    // Don't accept pc == handler: HLT is never the first instruction.
-    if (pc <= handler) continue;
-    uptr distance = pc - handler;
-    if (distance > 256) continue;
-    if (best == -1 || best_distance > distance) {
-      best = i;
-      best_distance = distance;
-    }
-  }
-
-  // Not ours.
-  if (best == -1)
-    return AccessInfo{0, 0, false, false};
-
-  ai.is_store = handlers[best].is_store;
-  ai.is_load = !handlers[best].is_store;
-  ai.size = handlers[best].size;
-
+  unsigned code = ((*(u32 *)pc) >> 5) & 0xffff;
+  if ((code & 0xff00) != 0x100)
+    return AccessInfo{0, 0, false, false}; // Not ours.
+  bool is_store = code & 0x10;
+  unsigned size_log = code & 0xff;
+  if (size_log > 4 && size_log != 0xf)
+    return AccessInfo{0, 0, false, false}; // Not ours.
+
+  ai.is_store = is_store;
+  ai.is_load = !is_store;
   ai.addr = uc->uc_mcontext.regs[0];
-  if (ai.size == 0)
+  if (size_log == 0xf)
     ai.size = uc->uc_mcontext.regs[1];
+  else
+    ai.size = 1U << size_log;
   return ai;
 }
 #else
@@ -152,11 +208,11 @@
 }
 #endif
 
-static void HwasanOnSIGILL(int signo, siginfo_t *info, ucontext_t *uc) {
+static bool HwasanOnSIGILL(int signo, siginfo_t *info, ucontext_t *uc) {
   SignalContext sig{info, uc};
   AccessInfo ai = GetAccessInfo(info, uc);
   if (!ai.is_store && !ai.is_load)
-    return;
+    return false;
 
   InternalScopedBuffer<BufferedStackTrace> stack_buffer(1);
   BufferedStackTrace *stack = stack_buffer.data();
@@ -169,8 +225,9 @@
   ++hwasan_report_count;
   if (flags()->halt_on_error)
     Die();
-  else
-    uc->uc_mcontext.pc += 4;
+
+  uc->uc_mcontext.pc += 4;
+  return true;
 }
 
 static void OnStackUnwind(const SignalContext &sig, const void *,
@@ -181,11 +238,11 @@
 
 void HwasanOnDeadlySignal(int signo, void *info, void *context) {
   // Probably a tag mismatch.
-  // FIXME: detect pc range in __hwasan_load* or __hwasan_store*.
   if (signo == SIGILL)
-    HwasanOnSIGILL(signo, (siginfo_t *)info, (ucontext_t*)context);
-  else
-    HandleDeadlySignal(info, context, GetTid(), &OnStackUnwind, nullptr);
+    if (HwasanOnSIGILL(signo, (siginfo_t *)info, (ucontext_t*)context))
+      return;
+
+  HandleDeadlySignal(info, context, GetTid(), &OnStackUnwind, nullptr);
 }
 
 
Index: compiler-rt/test/hwasan/TestCases/halt-on-error.cc
===================================================================
--- compiler-rt/test/hwasan/TestCases/halt-on-error.cc
+++ compiler-rt/test/hwasan/TestCases/halt-on-error.cc
@@ -11,19 +11,16 @@
   __hwasan_disable_allocator_tagging();
   return x[2] + ((char *)x)[6] + ((char *)x)[9];
   // CHECK: READ of size 4 at
-  // CHECK: #0 {{.*}} in __hwasan_load4 {{.*}}hwasan.cc
-  // CHECK: #1 {{.*}} in main {{.*}}halt-on-error.cc:12
-  // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in __hwasan_load4
+  // CHECK: #0 {{.*}} in main {{.*}}halt-on-error.cc:12
+  // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in main
 
   // CHECK: READ of size 1 at
-  // CHECK: #0 {{.*}} in __hwasan_load1 {{.*}}hwasan.cc
-  // CHECK: #1 {{.*}} in main {{.*}}halt-on-error.cc:12
-  // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in __hwasan_load1
+  // CHECK: #0 {{.*}} in main {{.*}}halt-on-error.cc:12
+  // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in main
 
   // CHECK: READ of size 1 at
-  // CHECK: #0 {{.*}} in __hwasan_load1 {{.*}}hwasan.cc
-  // CHECK: #1 {{.*}} in main {{.*}}halt-on-error.cc:12
-  // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in __hwasan_load1
+  // CHECK: #0 {{.*}} in main {{.*}}halt-on-error.cc:12
+  // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in main
 
   // CHECK-NOT: tag-mismatch
 }
Index: compiler-rt/test/hwasan/TestCases/use-after-free.cc
===================================================================
--- compiler-rt/test/hwasan/TestCases/use-after-free.cc
+++ compiler-rt/test/hwasan/TestCases/use-after-free.cc
@@ -14,8 +14,7 @@
   __hwasan_disable_allocator_tagging();
   return x[5];
   // CHECK: READ of size 1 at
-  // CHECK: #0 {{.*}} in __hwasan_load1 {{.*}}hwasan.cc
-  // CHECK: #1 {{.*}} in main {{.*}}use-after-free.cc:15
+  // CHECK: #0 {{.*}} in main {{.*}}use-after-free.cc:15
 
   // CHECK: freed here:
   // CHECK: #0 {{.*}} in free {{.*}}hwasan_interceptors.cc
@@ -25,5 +24,5 @@
   // CHECK: #0 {{.*}} in __interceptor_malloc {{.*}}hwasan_interceptors.cc
   // CHECK: #1 {{.*}} in main {{.*}}use-after-free.cc:12
 
-  // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in __hwasan_load1
+  // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in main
 }
Index: llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -22,7 +22,10 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/IR/Function.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstVisitor.h"
@@ -51,11 +54,19 @@
 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
 static const size_t kNumberOfAccessSizes = 5;
 
+static const size_t kShadowScale = 4;
+static const unsigned kPointerTagShift = 56;
+
 static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
     "hwasan-memory-access-callback-prefix",
     cl::desc("Prefix for memory access callbacks"), cl::Hidden,
     cl::init("__hwasan_"));
 
+static cl::opt<bool>
+    ClInstrumentWithCalls("hwasan-instrument-with-calls",
+                cl::desc("instrument reads and writes with callbacks"),
+                cl::Hidden, cl::init(false));
+
 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
                                        cl::desc("instrument read instructions"),
                                        cl::Hidden, cl::init(true));
@@ -86,6 +97,9 @@
   bool doInitialization(Module &M) override;
 
   void initializeCallbacks(Module &M);
+  void instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
+                                 unsigned AccessSizeIndex,
+                                 Instruction *InsertBefore);
   bool instrumentMemAccess(Instruction *I);
   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
                                    uint64_t *TypeSize, unsigned *Alignment,
@@ -219,6 +233,31 @@
   return Res;
 }
 
+void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
+                                                   unsigned AccessSizeIndex,
+                                                   Instruction *InsertBefore) {
+  IRBuilder<> IRB(InsertBefore);
+  Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift), IRB.getInt8Ty());
+  Value *AddrLong =
+      IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(),
+                                              ~(0xFFULL << kPointerTagShift)));
+  Value *ShadowLong = IRB.CreateLShr(AddrLong, kShadowScale);
+  Value *MemTag = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, IRB.getInt8PtrTy()));
+  Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
+
+  TerminatorInst *CheckTerm =
+      SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
+                                MDBuilder(*C).createBranchWeights(1, 100000));
+
+  IRB.SetInsertPoint(CheckTerm);
+  // The signal handler will find the data address in x0.
+  InlineAsm *Asm = InlineAsm::get(
+      FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
+      "hlt #" + itostr(0x100 + IsWrite * 0x10 + AccessSizeIndex), "{x0}",
+      /*hasSideEffects=*/true);
+  IRB.CreateCall(Asm, PtrLong);
+}
+
 bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
   DEBUG(dbgs() << "Instrumenting: " << *I << "\n");
   bool IsWrite = false;
@@ -237,10 +276,16 @@
   IRBuilder<> IRB(I);
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
   if (isPowerOf2_64(TypeSize) &&
-      (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1)))) {
+      (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) &&
+      (Alignment >= (1UL << kShadowScale) || Alignment == 0 ||
+       Alignment >= TypeSize / 8)) {
     size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
-    IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
-                   AddrLong);
+    if (ClInstrumentWithCalls) {
+      IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
+                     AddrLong);
+    } else {
+      instrumentMemAccessInline(AddrLong, IsWrite, AccessSizeIndex, I);
+    }
   } else {
     IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite],
                    {AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8)});
Index: llvm/test/Instrumentation/HWAddressSanitizer/basic.ll
===================================================================
--- llvm/test/Instrumentation/HWAddressSanitizer/basic.ll
+++ llvm/test/Instrumentation/HWAddressSanitizer/basic.ll
@@ -8,9 +8,20 @@
 define i8 @test_load8(i8* %a) sanitize_hwaddress {
 ; CHECK-LABEL: @test_load8(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i8* %a to i64
-; CHECK: call void @__hwasan_load1(i64 %[[A]])
-; CHECK: %[[B:[^ ]*]] = load i8, i8* %a
-; CHECK: ret i8 %[[B]]
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #256", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: %[[G:[^ ]*]] = load i8, i8* %a, align 4
+; CHECK: ret i8 %[[G]]
 
 entry:
   %b = load i8, i8* %a, align 4
@@ -20,9 +31,20 @@
 define i16 @test_load16(i16* %a) sanitize_hwaddress {
 ; CHECK-LABEL: @test_load16(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i16* %a to i64
-; CHECK: call void @__hwasan_load2(i64 %[[A]])
-; CHECK: %[[B:[^ ]*]] = load i16, i16* %a
-; CHECK: ret i16 %[[B]]
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #257", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: %[[G:[^ ]*]] = load i16, i16* %a, align 4
+; CHECK: ret i16 %[[G]]
 
 entry:
   %b = load i16, i16* %a, align 4
@@ -32,9 +54,20 @@
 define i32 @test_load32(i32* %a) sanitize_hwaddress {
 ; CHECK-LABEL: @test_load32(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i32* %a to i64
-; CHECK: call void @__hwasan_load4(i64 %[[A]])
-; CHECK: %[[B:[^ ]*]] = load i32, i32* %a
-; CHECK: ret i32 %[[B]]
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #258", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: %[[G:[^ ]*]] = load i32, i32* %a, align 4
+; CHECK: ret i32 %[[G]]
 
 entry:
   %b = load i32, i32* %a, align 4
@@ -44,9 +77,20 @@
 define i64 @test_load64(i64* %a) sanitize_hwaddress {
 ; CHECK-LABEL: @test_load64(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i64* %a to i64
-; CHECK: call void @__hwasan_load8(i64 %[[A]])
-; CHECK: %[[B:[^ ]*]] = load i64, i64* %a
-; CHECK: ret i64 %[[B]]
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #259", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: %[[G:[^ ]*]] = load i64, i64* %a, align 8
+; CHECK: ret i64 %[[G]]
 
 entry:
   %b = load i64, i64* %a, align 8
@@ -56,9 +100,20 @@
 define i128 @test_load128(i128* %a) sanitize_hwaddress {
 ; CHECK-LABEL: @test_load128(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i128* %a to i64
-; CHECK: call void @__hwasan_load16(i64 %[[A]])
-; CHECK: %[[B:[^ ]*]] = load i128, i128* %a
-; CHECK: ret i128 %[[B]]
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #260", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: %[[G:[^ ]*]] = load i128, i128* %a, align 16
+; CHECK: ret i128 %[[G]]
 
 entry:
   %b = load i128, i128* %a, align 16
@@ -80,8 +135,19 @@
 define void @test_store8(i8* %a, i8 %b) sanitize_hwaddress {
 ; CHECK-LABEL: @test_store8(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i8* %a to i64
-; CHECK: call void @__hwasan_store1(i64 %[[A]])
-; CHECK: store i8 %b, i8* %a
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #272", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: store i8 %b, i8* %a, align 4
 ; CHECK: ret void
 
 entry:
@@ -92,8 +158,19 @@
 define void @test_store16(i16* %a, i16 %b) sanitize_hwaddress {
 ; CHECK-LABEL: @test_store16(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i16* %a to i64
-; CHECK: call void @__hwasan_store2(i64 %[[A]])
-; CHECK: store i16 %b, i16* %a
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #273", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: store i16 %b, i16* %a, align 4
 ; CHECK: ret void
 
 entry:
@@ -104,8 +181,19 @@
 define void @test_store32(i32* %a, i32 %b) sanitize_hwaddress {
 ; CHECK-LABEL: @test_store32(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i32* %a to i64
-; CHECK: call void @__hwasan_store4(i64 %[[A]])
-; CHECK: store i32 %b, i32* %a
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #274", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: store i32 %b, i32* %a, align 4
 ; CHECK: ret void
 
 entry:
@@ -116,24 +204,46 @@
 define void @test_store64(i64* %a, i64 %b) sanitize_hwaddress {
 ; CHECK-LABEL: @test_store64(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i64* %a to i64
-; CHECK: call void @__hwasan_store8(i64 %[[A]])
-; CHECK: store i64 %b, i64* %a
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #275", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: store i64 %b, i64* %a, align 8
 ; CHECK: ret void
 
 entry:
-  store i64 %b, i64* %a, align 4
+  store i64 %b, i64* %a, align 8
   ret void
 }
 
 define void @test_store128(i128* %a, i128 %b) sanitize_hwaddress {
 ; CHECK-LABEL: @test_store128(
 ; CHECK: %[[A:[^ ]*]] = ptrtoint i128* %a to i64
-; CHECK: call void @__hwasan_store16(i64 %[[A]])
-; CHECK: store i128 %b, i128* %a
+; CHECK: %[[B:[^ ]*]] = lshr i64 %[[A]], 56
+; CHECK: %[[PTRTAG:[^ ]*]] = trunc i64 %[[B]] to i8
+; CHECK: %[[C:[^ ]*]] = and i64 %[[A]], 72057594037927935
+; CHECK: %[[D:[^ ]*]] = lshr i64 %[[C]], 4
+; CHECK: %[[E:[^ ]*]] = inttoptr i64 %[[D]] to i8*
+; CHECK: %[[MEMTAG:[^ ]*]] = load i8, i8* %[[E]]
+; CHECK: %[[F:[^ ]*]] = icmp ne i8 %[[PTRTAG]], %[[MEMTAG]]
+; CHECK: br i1 %[[F]], label {{.*}}, label {{.*}}, !prof {{.*}}
+
+; CHECK: call void asm sideeffect "hlt #276", "{x0}"(i64 %[[A]])
+; CHECK: br label
+
+; CHECK: store i128 %b, i128* %a, align 16
 ; CHECK: ret void
 
 entry:
-  store i128 %b, i128* %a, align 4
+  store i128 %b, i128* %a, align 16
   ret void
 }
 
@@ -149,6 +259,18 @@
   ret void
 }
 
+define void @test_store_unaligned(i64* %a, i64 %b) sanitize_hwaddress {
+; CHECK-LABEL: @test_store_unaligned(
+; CHECK: %[[A:[^ ]*]] = ptrtoint i64* %a to i64
+; CHECK: call void @__hwasan_store(i64 %[[A]], i64 8)
+; CHECK: store i64 %b, i64* %a, align 4
+; CHECK: ret void
+
+entry:
+  store i64 %b, i64* %a, align 4
+  ret void
+}
+
 define i8 @test_load_noattr(i8* %a) {
 ; CHECK-LABEL: @test_load_noattr(
 ; CHECK-NEXT: entry:
Index: llvm/test/Instrumentation/HWAddressSanitizer/with-calls.ll
===================================================================
--- llvm/test/Instrumentation/HWAddressSanitizer/with-calls.ll
+++ llvm/test/Instrumentation/HWAddressSanitizer/with-calls.ll
@@ -1,6 +1,6 @@
 ; Test basic address sanitizer instrumentation.
 ;
-; RUN: opt < %s -hwasan -S | FileCheck %s
+; RUN: opt < %s -hwasan -hwasan-instrument-with-calls -S | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-android"