Index: llvm/lib/CodeGen/BranchRelaxation.cpp
===================================================================
--- llvm/lib/CodeGen/BranchRelaxation.cpp
+++ llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -507,25 +507,31 @@
       Next = std::next(J);
       MachineInstr &MI = *J;
 
-      if (MI.isConditionalBranch()) {
-        MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
-        if (!isBlockInRange(MI, *DestBB)) {
-          if (Next != MBB.end() && Next->isConditionalBranch()) {
-            // If there are multiple conditional branches, this isn't an
-            // analyzable block. Split later terminators into a new block so
-            // each one will be analyzable.
-
-            splitBlockBeforeInstr(*Next, DestBB);
-          } else {
-            fixupConditionalBranch(MI);
-            ++NumConditionalRelaxed;
-          }
+      if (!MI.isConditionalBranch())
+        continue;
+
+      if (MI.getOpcode() == TargetOpcode::FAULTING_OP)
+        // FAULTING_OP's destination is not encoded in the instruction stream
+        // and thus never needs relaxed.
+        continue;
+
+      MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+      if (!isBlockInRange(MI, *DestBB)) {
+        if (Next != MBB.end() && Next->isConditionalBranch()) {
+          // If there are multiple conditional branches, this isn't an
+          // analyzable block. Split later terminators into a new block so
+          // each one will be analyzable.
+
+          splitBlockBeforeInstr(*Next, DestBB);
+        } else {
+          fixupConditionalBranch(MI);
+          ++NumConditionalRelaxed;
+        }
 
-          Changed = true;
+        Changed = true;
 
-          // This may have modified all of the terminators, so start over.
-          Next = MBB.getFirstTerminator();
-        }
+        // This may have modified all of the terminators, so start over.
+        Next = MBB.getFirstTerminator();
       }
     }
   }
Index: llvm/lib/CodeGen/ImplicitNullChecks.cpp
===================================================================
--- llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -373,6 +373,10 @@
   bool OffsetIsScalable;
   const MachineOperand *BaseOp;
 
+  // Implementation restriction for faulting_op insertion
+  // TODO: This could be relaxed if we find a test case which warrants it.
+  if (MI.getDesc().getNumDefs() > 1)
+   return SR_Unsuitable;
 
   // FIXME: This handles only simple addressing mode.
   if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
@@ -502,9 +506,9 @@
          MBP.Predicate == MachineBranchPredicate::PRED_EQ)))
     return false;
 
-  // If we cannot erase the test instruction itself, then making the null check
-  // implicit does not buy us much.
-  if (!MBP.SingleUseCondition)
+  // If there is a separate condition generation instruction, we chose not to
+  // transform unless we can remove both condition and consuming branch.
+  if (MBP.ConditionDef && !MBP.SingleUseCondition)
     return false;
 
   MachineBasicBlock *NotNullSucc, *NullSucc;
@@ -522,32 +526,34 @@
   if (NotNullSucc->pred_size() != 1)
     return false;
 
-  // To prevent the invalid transformation of the following code:
-  //
-  //   mov %rax, %rcx
-  //   test %rax, %rax
-  //   %rax = ...
-  //   je throw_npe
-  //   mov(%rcx), %r9
-  //   mov(%rax), %r10
-  //
-  // into:
-  //
-  //   mov %rax, %rcx
-  //   %rax = ....
-  //   faulting_load_op("movl (%rax), %r10", throw_npe)
-  //   mov(%rcx), %r9
-  //
-  // we must ensure that there are no instructions between the 'test' and
-  // conditional jump that modify %rax.
   const Register PointerReg = MBP.LHS.getReg();
 
-  assert(MBP.ConditionDef->getParent() ==  &MBB && "Should be in basic block");
-
-  for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
-    if (I->modifiesRegister(PointerReg, TRI))
-      return false;
+  if (MBP.ConditionDef) { 
+    // To prevent the invalid transformation of the following code:
+    //
+    //   mov %rax, %rcx
+    //   test %rax, %rax
+    //   %rax = ...
+    //   je throw_npe
+    //   mov(%rcx), %r9
+    //   mov(%rax), %r10
+    //
+    // into:
+    //
+    //   mov %rax, %rcx
+    //   %rax = ....
+    //   faulting_load_op("movl (%rax), %r10", throw_npe)
+    //   mov(%rcx), %r9
+    //
+    // we must ensure that there are no instructions between the 'test' and
+    // conditional jump that modify %rax.
+    assert(MBP.ConditionDef->getParent() ==  &MBB &&
+           "Should be in basic block");
 
+    for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
+      if (I->modifiesRegister(PointerReg, TRI))
+        return false;
+  }
   // Starting with a code fragment like:
   //
   //   test %rax, %rax
@@ -725,10 +731,13 @@
       }
     }
 
+
     NC.getMemOperation()->eraseFromParent();
-    NC.getCheckOperation()->eraseFromParent();
+    if (auto *CheckOp = NC.getCheckOperation())
+      CheckOp->eraseFromParent();
 
-    // Insert an *unconditional* branch to not-null successor.
+    // Insert an *unconditional* branch to not-null successor - we expect
+    // block placement to remove fallthroughs later.
     TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
                       /*Cond=*/None, DL);
 
Index: llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -32,6 +32,7 @@
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/FaultMaps.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -69,12 +70,13 @@
 class AArch64AsmPrinter : public AsmPrinter {
   AArch64MCInstLower MCInstLowering;
   StackMaps SM;
+  FaultMaps FM;
   const AArch64Subtarget *STI;
 
 public:
   AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
       : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
-        SM(*this) {}
+        SM(*this), FM(*this) {}
 
   StringRef getPassName() const override { return "AArch64 Assembly Printer"; }
 
@@ -97,6 +99,7 @@
                        const MachineInstr &MI);
   void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
                        const MachineInstr &MI);
+  void LowerFAULTING_OP(const MachineInstr &MI);
 
   void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
   void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
@@ -541,7 +544,11 @@
     // generates code that does this, it is always safe to set.
     OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
+  
+  // Emit stack and fault map information.
   emitStackMaps(SM);
+  FM.serializeToFaultMapSection();
+
 }
 
 void AArch64AsmPrinter::EmitLOHs() {
@@ -987,6 +994,42 @@
   SM.recordStatepoint(*MILabel, MI);
 }
 
+void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
+  // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
+  //                  <opcode>, <operands>
+
+  Register DefRegister = FaultingMI.getOperand(0).getReg();
+  FaultMaps::FaultKind FK =
+      static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
+  MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
+  unsigned Opcode = FaultingMI.getOperand(3).getImm();
+  unsigned OperandsBeginIdx = 4;
+
+  auto &Ctx = OutStreamer->getContext();
+  MCSymbol *FaultingLabel = Ctx.createTempSymbol();
+  OutStreamer->emitLabel(FaultingLabel);
+
+  assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
+  FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
+
+  MCInst MI;
+  MI.setOpcode(Opcode);
+
+  if (DefRegister != (Register)0)
+    MI.addOperand(MCOperand::createReg(DefRegister));
+
+  for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
+            E = FaultingMI.operands_end();
+       I != E; ++I) {
+    MCOperand Dest;
+    lowerOperand(*I, Dest);
+    MI.addOperand(Dest);
+  }
+
+  OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
+  OutStreamer->emitInstruction(MI, getSubtargetInfo());
+}
+
 void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
   Register DestReg = MI.getOperand(0).getReg();
   if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
@@ -1271,6 +1314,9 @@
   case TargetOpcode::STATEPOINT:
     return LowerSTATEPOINT(*OutStreamer, SM, *MI);
 
+  case TargetOpcode::FAULTING_OP:
+    return LowerFAULTING_OP(*MI);
+
   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
     LowerPATCHABLE_FUNCTION_ENTER(*MI);
     return;
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -188,6 +188,9 @@
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
+  bool analyzeBranchPredicate(MachineBasicBlock &MBB,
+                              MachineBranchPredicate &MBP,
+                              bool AllowModify) const override;
   unsigned removeBranch(MachineBasicBlock &MBB,
                         int *BytesRemoved = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -328,6 +328,58 @@
   return true;
 }
 
+bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
+                                              MachineBranchPredicate &MBP,
+                                              bool AllowModify) const {
+  // For the moment, handle only a block which ends with a cb(n)zx followed by
+  // a fallthrough.  Why this?  Because it is a common form.
+  // TODO: Should we handle b.cc?
+
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return true;
+
+  // Skip over SpeculationBarrierEndBB terminators
+  if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
+      I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
+    --I;
+  }
+
+  if (!isUnpredicatedTerminator(*I))
+    return true;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = &*I;
+  unsigned LastOpc = LastInst->getOpcode();
+  if (!isCondBranchOpcode(LastOpc))
+    return true;
+
+  switch (LastOpc) {
+  default:
+    return true;
+  case AArch64::CBZW:
+  case AArch64::CBZX:
+  case AArch64::CBNZW:
+  case AArch64::CBNZX:
+    break;
+  };
+
+  MBP.TrueDest = LastInst->getOperand(1).getMBB();;
+  assert(MBP.TrueDest && "expected!");
+  MBP.FalseDest = MBB.getNextNode();
+
+  MBP.ConditionDef = nullptr;
+  MBP.SingleUseCondition = false;
+
+  MBP.LHS = LastInst->getOperand(0);
+  MBP.RHS = MachineOperand::CreateImm(0);
+  MBP.Predicate = LastOpc == AArch64::CBNZX
+    ? MachineBranchPredicate::PRED_NE
+    : MachineBranchPredicate::PRED_EQ;
+  return false;
+}
+
+
 bool AArch64InstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   if (Cond[0].getImm() != -1) {
Index: llvm/test/CodeGen/AArch64/implicit-null-check.ll
===================================================================
--- llvm/test/CodeGen/AArch64/implicit-null-check.ll
+++ llvm/test/CodeGen/AArch64/implicit-null-check.ll
@@ -5,15 +5,14 @@
 ; file with the same name in the X86 tree, but adjusted to remove patterns
 ; related to memory folding of arithmetic (since aarch64 doesn't), and add
 ; a couple of aarch64 specific tests.
-; NOTE: Currently negative tests as these are being precommitted before
-; the changes to enable.
 
 define i32 @imp_null_check_load_fallthrough(i32* %x) {
 ; CHECK-LABEL: imp_null_check_load_fallthrough:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB0_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldr w0, [x0]
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    ldr w0, [x0] // on-fault: .LBB0_2
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_1: // %not_null
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_2: // %is_null
 ; CHECK-NEXT:    mov w0, #42
@@ -34,9 +33,10 @@
 define i32 @imp_null_check_load_reorder(i32* %x) {
 ; CHECK-LABEL: imp_null_check_load_reorder:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB1_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldr w0, [x0]
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:    ldr w0, [x0] // on-fault: .LBB1_2
+; CHECK-NEXT:    b .LBB1_1
+; CHECK-NEXT:  .LBB1_1: // %not_null
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB1_2: // %is_null
 ; CHECK-NEXT:    mov w0, #42
@@ -56,9 +56,10 @@
 define i32 @imp_null_check_unordered_load(i32* %x) {
 ; CHECK-LABEL: imp_null_check_unordered_load:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB2_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldr w0, [x0]
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    ldr w0, [x0] // on-fault: .LBB2_2
+; CHECK-NEXT:    b .LBB2_1
+; CHECK-NEXT:  .LBB2_1: // %not_null
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB2_2: // %is_null
 ; CHECK-NEXT:    mov w0, #42
@@ -76,6 +77,8 @@
 }
 
 
+; TODO: Can be converted into implicit check.
+;; Probably could be implicit, but we're conservative for now
 define i32 @imp_null_check_seq_cst_load(i32* %x) {
 ; CHECK-LABEL: imp_null_check_seq_cst_load:
 ; CHECK:       // %bb.0: // %entry
@@ -125,9 +128,10 @@
 define i8 @imp_null_check_load_i8(i8* %x) {
 ; CHECK-LABEL: imp_null_check_load_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB5_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldrb w0, [x0]
+; CHECK-NEXT:  .Ltmp3:
+; CHECK-NEXT:    ldrb w0, [x0] // on-fault: .LBB5_2
+; CHECK-NEXT:    b .LBB5_1
+; CHECK-NEXT:  .LBB5_1: // %not_null
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB5_2: // %is_null
 ; CHECK-NEXT:    mov w0, #42
@@ -176,9 +180,10 @@
 define i32 @imp_null_check_gep_load(i32* %x) {
 ; CHECK-LABEL: imp_null_check_gep_load:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB7_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldr w0, [x0, #128]
+; CHECK-NEXT:  .Ltmp4:
+; CHECK-NEXT:    ldr w0, [x0, #128] // on-fault: .LBB7_2
+; CHECK-NEXT:    b .LBB7_1
+; CHECK-NEXT:  .LBB7_1: // %not_null
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB7_2: // %is_null
 ; CHECK-NEXT:    mov w0, #42
@@ -199,9 +204,10 @@
 define i32 @imp_null_check_add_result(i32* %x, i32 %p) {
 ; CHECK-LABEL: imp_null_check_add_result:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB8_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:  .Ltmp5:
+; CHECK-NEXT:    ldr w8, [x0] // on-fault: .LBB8_2
+; CHECK-NEXT:    b .LBB8_1
+; CHECK-NEXT:  .LBB8_1: // %not_null
 ; CHECK-NEXT:    add w0, w8, w1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB8_2: // %is_null
@@ -225,9 +231,10 @@
 define i32 @imp_null_check_hoist_over_udiv(i32* %x, i32 %a, i32 %b) {
 ; CHECK-LABEL: imp_null_check_hoist_over_udiv:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB9_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:  .Ltmp6:
+; CHECK-NEXT:    ldr w8, [x0] // on-fault: .LBB9_2
+; CHECK-NEXT:    b .LBB9_1
+; CHECK-NEXT:  .LBB9_1: // %not_null
 ; CHECK-NEXT:    udiv w9, w1, w2
 ; CHECK-NEXT:    add w0, w8, w9
 ; CHECK-NEXT:    ret
@@ -249,6 +256,8 @@
 }
 
 
+; TODO: We should be able to hoist this - we can on x86, why isn't this
+; working for aarch64?  Aliasing?
 define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z) {
 ; CHECK-LABEL: imp_null_check_hoist_over_unrelated_load:
 ; CHECK:       // %bb.0: // %entry
@@ -278,9 +287,10 @@
 define i32 @imp_null_check_gep_load_with_use_dep(i32* %x, i32 %a) {
 ; CHECK-LABEL: imp_null_check_gep_load_with_use_dep:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB11_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:  .Ltmp7:
+; CHECK-NEXT:    ldr w8, [x0] // on-fault: .LBB11_2
+; CHECK-NEXT:    b .LBB11_1
+; CHECK-NEXT:  .LBB11_1: // %not_null
 ; CHECK-NEXT:    add w9, w0, w1
 ; CHECK-NEXT:    add w8, w9, w8
 ; CHECK-NEXT:    add w0, w8, #4 // =4
@@ -304,6 +314,8 @@
   ret i32 %z
 }
 
+;; TODO: We could handle this case as we can lift the fence into the
+;; previous block before the conditional without changing behavior.
 define i32 @imp_null_check_load_fence1(i32* %x) {
 ; CHECK-LABEL: imp_null_check_load_fence1:
 ; CHECK:       // %bb.0: // %entry
@@ -328,6 +340,8 @@
   ret i32 %t
 }
 
+;; TODO: We could handle this case as we can lift the fence into the
+;; previous block before the conditional without changing behavior.
 define i32 @imp_null_check_load_fence2(i32* %x) {
 ; CHECK-LABEL: imp_null_check_load_fence2:
 ; CHECK:       // %bb.0: // %entry
@@ -352,6 +366,7 @@
   ret i32 %t
 }
 
+; TODO: We can fold to implicit null here, not sure why this isn't working
 define void @imp_null_check_store(i32* %x) {
 ; CHECK-LABEL: imp_null_check_store:
 ; CHECK:       // %bb.0: // %entry
@@ -374,6 +389,7 @@
   ret void
 }
 
+;; TODO: can be implicit
 define void @imp_null_check_unordered_store(i32* %x) {
 ; CHECK-LABEL: imp_null_check_unordered_store:
 ; CHECK:       // %bb.0: // %entry
@@ -399,9 +415,10 @@
 define i32 @imp_null_check_neg_gep_load(i32* %x) {
 ; CHECK-LABEL: imp_null_check_neg_gep_load:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cbz x0, .LBB16_2
-; CHECK-NEXT:  // %bb.1: // %not_null
-; CHECK-NEXT:    ldur w0, [x0, #-128]
+; CHECK-NEXT:  .Ltmp8:
+; CHECK-NEXT:    ldur w0, [x0, #-128] // on-fault: .LBB16_2
+; CHECK-NEXT:    b .LBB16_1
+; CHECK-NEXT:  .LBB16_1: // %not_null
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB16_2: // %is_null
 ; CHECK-NEXT:    mov w0, #42