diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -113,20 +113,20 @@
     cl::init(false));
 
 static cl::opt<bool>
-EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
-                         cl::desc("Enable AArch64 logical imm instruction "
-                                  "optimization"),
-                         cl::init(true));
+    EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
+                             cl::desc("Enable AArch64 logical imm instruction "
+                                      "optimization"),
+                             cl::init(true));
 
 // Temporary option added for the purpose of testing functionality added
 // to DAGCombiner.cpp in D92230. It is expected that this can be removed
 // in future when both implementations will be based off MGATHER rather
 // than the GLD1 nodes added for the SVE gather load intrinsics.
 static cl::opt<bool>
-EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
-                                cl::desc("Combine extends of AArch64 masked "
-                                         "gather intrinsics"),
-                                cl::init(true));
+    EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
+                                   cl::desc("Combine extends of AArch64 masked "
+                                            "gather intrinsics"),
+                                   cl::init(true));
 
 /// Value type used for condition codes.
 static const MVT MVT_CC = MVT::i32;
@@ -604,21 +604,45 @@
   }
 
   if (!Subtarget->hasFullFP16()) {
-    for (auto Op :
-         {ISD::SETCC,          ISD::SELECT_CC,
-          ISD::BR_CC,          ISD::FADD,           ISD::FSUB,
-          ISD::FMUL,           ISD::FDIV,           ISD::FMA,
-          ISD::FNEG,           ISD::FABS,           ISD::FCEIL,
-          ISD::FSQRT,          ISD::FFLOOR,         ISD::FNEARBYINT,
-          ISD::FRINT,          ISD::FROUND,         ISD::FROUNDEVEN,
-          ISD::FTRUNC,         ISD::FMINNUM,        ISD::FMAXNUM,
-          ISD::FMINIMUM,       ISD::FMAXIMUM,       ISD::STRICT_FADD,
-          ISD::STRICT_FSUB,    ISD::STRICT_FMUL,    ISD::STRICT_FDIV,
-          ISD::STRICT_FMA,     ISD::STRICT_FCEIL,   ISD::STRICT_FFLOOR,
-          ISD::STRICT_FSQRT,   ISD::STRICT_FRINT,   ISD::STRICT_FNEARBYINT,
-          ISD::STRICT_FROUND,  ISD::STRICT_FTRUNC,  ISD::STRICT_FROUNDEVEN,
-          ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
-          ISD::STRICT_FMAXIMUM})
+    for (auto Op : {ISD::SETCC,
+                    ISD::SELECT_CC,
+                    ISD::BR_CC,
+                    ISD::FADD,
+                    ISD::FSUB,
+                    ISD::FMUL,
+                    ISD::FDIV,
+                    ISD::FMA,
+                    ISD::FNEG,
+                    ISD::FABS,
+                    ISD::FCEIL,
+                    ISD::FSQRT,
+                    ISD::FFLOOR,
+                    ISD::FNEARBYINT,
+                    ISD::FRINT,
+                    ISD::FROUND,
+                    ISD::FROUNDEVEN,
+                    ISD::FTRUNC,
+                    ISD::FMINNUM,
+                    ISD::FMAXNUM,
+                    ISD::FMINIMUM,
+                    ISD::FMAXIMUM,
+                    ISD::STRICT_FADD,
+                    ISD::STRICT_FSUB,
+                    ISD::STRICT_FMUL,
+                    ISD::STRICT_FDIV,
+                    ISD::STRICT_FMA,
+                    ISD::STRICT_FCEIL,
+                    ISD::STRICT_FFLOOR,
+                    ISD::STRICT_FSQRT,
+                    ISD::STRICT_FRINT,
+                    ISD::STRICT_FNEARBYINT,
+                    ISD::STRICT_FROUND,
+                    ISD::STRICT_FTRUNC,
+                    ISD::STRICT_FROUNDEVEN,
+                    ISD::STRICT_FMINNUM,
+                    ISD::STRICT_FMAXNUM,
+                    ISD::STRICT_FMINIMUM,
+                    ISD::STRICT_FMAXIMUM})
       setOperationAction(Op, MVT::f16, Promote);
 
     // Round-to-integer need custom lowering for fp16, as Promote doesn't work
@@ -628,53 +652,53 @@
       setOperationAction(Op, MVT::f16, Custom);
 
     // promote v4f16 to v4f32 when that is known to be safe.
-    setOperationAction(ISD::FADD,        MVT::v4f16, Promote);
-    setOperationAction(ISD::FSUB,        MVT::v4f16, Promote);
-    setOperationAction(ISD::FMUL,        MVT::v4f16, Promote);
-    setOperationAction(ISD::FDIV,        MVT::v4f16, Promote);
-    AddPromotedToType(ISD::FADD,         MVT::v4f16, MVT::v4f32);
-    AddPromotedToType(ISD::FSUB,         MVT::v4f16, MVT::v4f32);
-    AddPromotedToType(ISD::FMUL,         MVT::v4f16, MVT::v4f32);
-    AddPromotedToType(ISD::FDIV,         MVT::v4f16, MVT::v4f32);
-
-    setOperationAction(ISD::FABS,        MVT::v4f16, Expand);
-    setOperationAction(ISD::FNEG,        MVT::v4f16, Expand);
-    setOperationAction(ISD::FROUND,      MVT::v4f16, Expand);
-    setOperationAction(ISD::FROUNDEVEN,  MVT::v4f16, Expand);
-    setOperationAction(ISD::FMA,         MVT::v4f16, Expand);
-    setOperationAction(ISD::SETCC,       MVT::v4f16, Expand);
-    setOperationAction(ISD::BR_CC,       MVT::v4f16, Expand);
-    setOperationAction(ISD::SELECT,      MVT::v4f16, Expand);
-    setOperationAction(ISD::SELECT_CC,   MVT::v4f16, Expand);
-    setOperationAction(ISD::FTRUNC,      MVT::v4f16, Expand);
-    setOperationAction(ISD::FCOPYSIGN,   MVT::v4f16, Expand);
-    setOperationAction(ISD::FFLOOR,      MVT::v4f16, Expand);
-    setOperationAction(ISD::FCEIL,       MVT::v4f16, Expand);
-    setOperationAction(ISD::FRINT,       MVT::v4f16, Expand);
-    setOperationAction(ISD::FNEARBYINT,  MVT::v4f16, Expand);
-    setOperationAction(ISD::FSQRT,       MVT::v4f16, Expand);
-
-    setOperationAction(ISD::FABS,        MVT::v8f16, Expand);
-    setOperationAction(ISD::FADD,        MVT::v8f16, Expand);
-    setOperationAction(ISD::FCEIL,       MVT::v8f16, Expand);
-    setOperationAction(ISD::FCOPYSIGN,   MVT::v8f16, Expand);
-    setOperationAction(ISD::FDIV,        MVT::v8f16, Expand);
-    setOperationAction(ISD::FFLOOR,      MVT::v8f16, Expand);
-    setOperationAction(ISD::FMA,         MVT::v8f16, Expand);
-    setOperationAction(ISD::FMUL,        MVT::v8f16, Expand);
-    setOperationAction(ISD::FNEARBYINT,  MVT::v8f16, Expand);
-    setOperationAction(ISD::FNEG,        MVT::v8f16, Expand);
-    setOperationAction(ISD::FROUND,      MVT::v8f16, Expand);
-    setOperationAction(ISD::FROUNDEVEN,  MVT::v8f16, Expand);
-    setOperationAction(ISD::FRINT,       MVT::v8f16, Expand);
-    setOperationAction(ISD::FSQRT,       MVT::v8f16, Expand);
-    setOperationAction(ISD::FSUB,        MVT::v8f16, Expand);
-    setOperationAction(ISD::FTRUNC,      MVT::v8f16, Expand);
-    setOperationAction(ISD::SETCC,       MVT::v8f16, Expand);
-    setOperationAction(ISD::BR_CC,       MVT::v8f16, Expand);
-    setOperationAction(ISD::SELECT,      MVT::v8f16, Expand);
-    setOperationAction(ISD::SELECT_CC,   MVT::v8f16, Expand);
-    setOperationAction(ISD::FP_EXTEND,   MVT::v8f16, Expand);
+    setOperationAction(ISD::FADD, MVT::v4f16, Promote);
+    setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
+    setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
+    setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
+    AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
+    AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
+    AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
+    AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
+
+    setOperationAction(ISD::FABS, MVT::v4f16, Expand);
+    setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
+    setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
+    setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
+    setOperationAction(ISD::FMA, MVT::v4f16, Expand);
+    setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
+    setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
+    setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
+    setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
+    setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
+    setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
+    setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
+    setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
+
+    setOperationAction(ISD::FABS, MVT::v8f16, Expand);
+    setOperationAction(ISD::FADD, MVT::v8f16, Expand);
+    setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
+    setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
+    setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
+    setOperationAction(ISD::FMA, MVT::v8f16, Expand);
+    setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
+    setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
+    setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
+    setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
+    setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
+    setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
+    setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
+    setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
+    setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
+    setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
+    setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
+    setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
   }
 
   // AArch64 has implementations of a lot of rounding-like FP operations.
@@ -1025,8 +1049,8 @@
       setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
     }
 
-    setOperationAction(ISD::CTLZ,       MVT::v1i64, Expand);
-    setOperationAction(ISD::CTLZ,       MVT::v2i64, Expand);
+    setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
+    setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
     setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
     setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
     setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
@@ -1048,8 +1072,8 @@
     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
 
     // Saturates
-    for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
-                    MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+    for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
+                   MVT::v4i32, MVT::v2i64}) {
       setOperationAction(ISD::SADDSAT, VT, Legal);
       setOperationAction(ISD::UADDSAT, VT, Legal);
       setOperationAction(ISD::SSUBSAT, VT, Legal);
@@ -1067,8 +1091,8 @@
     }
 
     // Vector reductions
-    for (MVT VT : { MVT::v4f16, MVT::v2f32,
-                    MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
+    for (MVT VT :
+         {MVT::v4f16, MVT::v2f32, MVT::v8f16, MVT::v4f32, MVT::v2f64}) {
       if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
         setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
         setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -1076,8 +1100,8 @@
         setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
       }
     }
-    for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
-                    MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+    for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
+                   MVT::v4i32}) {
       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
@@ -1129,18 +1153,18 @@
 
     setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
 
-    setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Custom);
+    setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
-    setLoadExtAction(ISD::EXTLOAD,  MVT::v4i32, MVT::v4i8, Custom);
+    setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
 
     // ADDP custom lowering
-    for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
+    for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64})
       setOperationAction(ISD::ADD, VT, Custom);
     // FADDP custom lowering
-    for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
+    for (MVT VT : {MVT::v16f16, MVT::v8f32, MVT::v4f64})
       setOperationAction(ISD::FADD, VT, Custom);
   }
 
@@ -1229,8 +1253,8 @@
       setOperationAction(ISD::BITCAST, VT, Custom);
 
     for (auto VT :
-         { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
-           MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
+         {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
+          MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16})
       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
 
     for (auto VT :
@@ -1428,8 +1452,8 @@
       setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
 
       // Int operations with no NEON support.
-      for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
-                      MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
+      for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
+                      MVT::v4i32, MVT::v2i64}) {
         setOperationAction(ISD::BITREVERSE, VT, Custom);
         setOperationAction(ISD::CTTZ, VT, Custom);
         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
@@ -1534,8 +1558,7 @@
 
   // F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP
   // NEON types.
-  if (VT.isFloatingPoint() &&
-      VT.getVectorElementType() != MVT::bf16 &&
+  if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::bf16 &&
       (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
     for (unsigned Opcode :
          {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
@@ -1830,8 +1853,8 @@
   if (NewImm == 0 || NewImm == OrigMask) {
     New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
                           TLO.DAG.getConstant(NewImm, DL, VT));
-  // Otherwise, create a machine node so that target independent DAG combine
-  // doesn't undo this optimization.
+    // Otherwise, create a machine node so that target independent DAG combine
+    // doesn't undo this optimization.
   } else {
     Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
     SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
@@ -1949,7 +1972,8 @@
     ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
     Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
     switch (IntID) {
-    default: return;
+    default:
+      return;
     case Intrinsic::aarch64_ldaxr:
     case Intrinsic::aarch64_ldxr: {
       unsigned BitWidth = Known.getBitWidth();
@@ -2412,8 +2436,9 @@
   return EndBB;
 }
 
-MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
-       MachineInstr &MI, MachineBasicBlock *BB) const {
+MachineBasicBlock *
+AArch64TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
+                                           MachineBasicBlock *BB) const {
   assert(!isAsynchronousEHPersonality(classifyEHPersonality(
              BB->getParent()->getFunction().getPersonalityFn())) &&
          "SEH does not use catchret!");
@@ -2934,10 +2959,9 @@
       // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
       // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
       // of the signed comparisons.
-      const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
-                                           DAG.getVTList(VT, MVT_CC),
-                                           LHS.getOperand(0),
-                                           LHS.getOperand(1));
+      const SDValue ANDSNode =
+          DAG.getNode(AArch64ISD::ANDS, dl, DAG.getVTList(VT, MVT_CC),
+                      LHS.getOperand(0), LHS.getOperand(1));
       // Replace all users of (and X, Y) with newly generated (ands X, Y)
       DAG.ReplaceAllUsesWith(LHS, ANDSNode);
       return ANDSNode.getValue(1);
@@ -3073,11 +3097,11 @@
     SDValue O1 = Val->getOperand(1);
     bool CanNegateL;
     bool MustBeFirstL;
-    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
+    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth + 1))
       return false;
     bool CanNegateR;
     bool MustBeFirstR;
-    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
+    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth + 1))
       return false;
 
     if (MustBeFirstL && MustBeFirstR)
@@ -3114,8 +3138,8 @@
 /// \p Negate is true if we want this sub-tree being negated just by changing
 /// SETCC conditions.
 static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
-    AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
-    AArch64CC::CondCode Predicate) {
+                                  AArch64CC::CondCode &OutCC, bool Negate,
+                                  SDValue CCOp, AArch64CC::CondCode Predicate) {
   // We're at a tree leaf, produce a conditional comparison operation.
   unsigned Opcode = Val->getOpcode();
   if (Opcode == ISD::SETCC) {
@@ -3309,8 +3333,7 @@
       case ISD::SETGT:
         if ((VT == MVT::i32 && C != INT32_MAX &&
              isLegalArithImmed((uint32_t)(C + 1))) ||
-            (VT == MVT::i64 && C != INT64_MAX &&
-             isLegalArithImmed(C + 1ULL))) {
+            (VT == MVT::i64 && C != INT64_MAX && isLegalArithImmed(C + 1ULL))) {
           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
           C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
           RHS = DAG.getConstant(C, dl, VT);
@@ -3381,9 +3404,9 @@
         SDValue SExt =
             DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
                         DAG.getValueType(MVT::i16));
-        Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
-                                                   RHS.getValueType()),
-                             CC, dl, DAG);
+        Cmp = emitComparison(
+            SExt, DAG.getConstant(ValueofRHS, dl, RHS.getValueType()), CC, dl,
+            DAG);
         AArch64CC = changeIntCCToAArch64CC(CC);
       }
     }
@@ -3475,10 +3498,9 @@
     } else {
       SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
-      Overflow =
-          DAG.getNode(AArch64ISD::SUBS, DL, VTs,
-                      DAG.getConstant(0, DL, MVT::i64),
-                      UpperBits).getValue(1);
+      Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+                             DAG.getConstant(0, DL, MVT::i64), UpperBits)
+                     .getValue(1);
     }
     break;
   }
@@ -3660,8 +3682,8 @@
   // too. This will allow it to be selected to a single instruction:
   // CSINC Wd, WZR, WZR, invert(cond).
   SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
-  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
-                         CCVal, Overflow);
+  Overflow =
+      DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, CCVal, Overflow);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
   return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
@@ -3690,10 +3712,10 @@
   }
 
   // built the mask value encoding the expected behavior.
-  unsigned PrfOp = (IsWrite << 4) |     // Load/Store bit
-                   (!IsData << 3) |     // IsDataCache bit
-                   (Locality << 1) |    // Cache level bits
-                   (unsigned)IsStream;  // Stream bit
+  unsigned PrfOp = (IsWrite << 4) |    // Load/Store bit
+                   (!IsData << 3) |    // IsDataCache bit
+                   (Locality << 1) |   // Cache level bits
+                   (unsigned)IsStream; // Stream bit
   return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
                      DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
 }
@@ -3757,8 +3779,7 @@
   unsigned NumElts = InVT.getVectorNumElements();
 
   // f16 conversions are promoted to f32 when full fp16 is not supported.
-  if (InVT.getVectorElementType() == MVT::f16 &&
-      !Subtarget->hasFullFP16()) {
+  if (InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) {
     MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
     SDLoc dl(Op);
     if (IsStrict) {
@@ -3840,9 +3861,8 @@
       return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
                          {Ext.getValue(1), Ext.getValue(0)});
     }
-    return DAG.getNode(
-        Op.getOpcode(), dl, Op.getValueType(),
-        DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
+    return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
+                       DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
   }
 
   if (SrcVal.getValueType() != MVT::f128) {
@@ -4015,8 +4035,7 @@
         MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
                          InVT.getVectorNumElements());
     if (IsStrict) {
-      In = DAG.getNode(Opc, dl, {CastVT, MVT::Other},
-                       {Op.getOperand(0), In});
+      In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, {Op.getOperand(0), In});
       return DAG.getNode(
           ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
           {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)});
@@ -4038,9 +4057,9 @@
   // Use a scalar operation for conversions between single-element vectors of
   // the same size.
   if (VT.getVectorNumElements() == 1) {
-    SDValue Extract = DAG.getNode(
-        ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
-        In, DAG.getConstant(0, dl, MVT::i64));
+    SDValue Extract =
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(), In,
+                    DAG.getConstant(0, dl, MVT::i64));
     EVT ScalarVT = VT.getScalarType();
     if (IsStrict)
       return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
@@ -4052,7 +4071,7 @@
 }
 
 SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
-                                            SelectionDAG &DAG) const {
+                                              SelectionDAG &DAG) const {
   if (Op.getValueType().isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
 
@@ -4069,10 +4088,9 @@
           ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other},
           {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)});
     }
-    return DAG.getNode(
-        ISD::FP_ROUND, dl, MVT::f16,
-        DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
-        DAG.getIntPtrConstant(0, dl));
+    return DAG.getNode(ISD::FP_ROUND, dl, MVT::f16,
+                       DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
+                       DAG.getIntPtrConstant(0, dl));
   }
 
   // i128 conversions are libcalls.
@@ -4104,8 +4122,8 @@
   Entry.IsZExt = false;
   Args.push_back(Entry);
 
-  RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
-                                        : RTLIB::SINCOS_STRET_F32;
+  RTLIB::Libcall LC =
+      ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
   const char *LibcallName = getLibcallName(LC);
   SDValue Callee =
       DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
@@ -4176,12 +4194,13 @@
 
   MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
   switch (OrigSimpleTy) {
-  default: llvm_unreachable("Unexpected Vector Type");
+  default:
+    llvm_unreachable("Unexpected Vector Type");
   case MVT::v2i8:
   case MVT::v2i16:
-     return MVT::v2i32;
+    return MVT::v2i32;
   case MVT::v4i8:
-    return  MVT::v4i16;
+    return MVT::v4i16;
   }
 }
 
@@ -4249,10 +4268,9 @@
 static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
   if (N->getOpcode() == ISD::SIGN_EXTEND ||
       N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
-    return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
-                                             N->getOperand(0)->getValueType(0),
-                                             N->getValueType(0),
-                                             N->getOpcode());
+    return addRequiredExtensionForVectorMULL(
+        N->getOperand(0), DAG, N->getOperand(0)->getValueType(0),
+        N->getValueType(0), N->getOpcode());
 
   assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
   EVT VT = N->getValueType(0);
@@ -4288,8 +4306,8 @@
   if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
     SDNode *N0 = N->getOperand(0).getNode();
     SDNode *N1 = N->getOperand(1).getNode();
-    return N0->hasOneUse() && N1->hasOneUse() &&
-      isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+    return N0->hasOneUse() && N1->hasOneUse() && isSignExtended(N0, DAG) &&
+           isSignExtended(N1, DAG);
   }
   return false;
 }
@@ -4299,8 +4317,8 @@
   if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
     SDNode *N0 = N->getOperand(0).getNode();
     SDNode *N1 = N->getOperand(1).getNode();
-    return N0->hasOneUse() && N1->hasOneUse() &&
-      isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+    return N0->hasOneUse() && N1->hasOneUse() && isZeroExtended(N0, DAG) &&
+           isZeroExtended(N1, DAG);
   }
   return false;
 }
@@ -4405,11 +4423,11 @@
         NewOpc = AArch64ISD::SMULL;
         isMLA = true;
       } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
-        NewOpc =  AArch64ISD::UMULL;
+        NewOpc = AArch64ISD::UMULL;
         isMLA = true;
       } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
         std::swap(N0, N1);
-        NewOpc =  AArch64ISD::UMULL;
+        NewOpc = AArch64ISD::UMULL;
         isMLA = true;
       }
     }
@@ -4441,11 +4459,12 @@
   SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
   SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
   EVT Op1VT = Op1.getValueType();
-  return DAG.getNode(N0->getOpcode(), DL, VT,
-                     DAG.getNode(NewOpc, DL, VT,
-                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
-                     DAG.getNode(NewOpc, DL, VT,
-                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+  return DAG.getNode(
+      N0->getOpcode(), DL, VT,
+      DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N00),
+                  Op1),
+      DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N01),
+                  Op1));
 }
 
 static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
@@ -4537,12 +4556,14 @@
   }
 }
 
-SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
-                                                     SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                               SelectionDAG &DAG) const {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   SDLoc dl(Op);
   switch (IntNo) {
-  default: return SDValue();    // Don't custom lower most intrinsics.
+  default:
+    return SDValue(); // Don't custom lower most intrinsics.
   case Intrinsic::thread_pointer: {
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
     return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
@@ -4550,8 +4571,8 @@
   case Intrinsic::aarch64_neon_abs: {
     EVT Ty = Op.getValueType();
     if (Ty == MVT::i64) {
-      SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
-                                   Op.getOperand(1));
+      SDValue Result =
+          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Op.getOperand(1));
       Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
       return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
     } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
@@ -4584,17 +4605,17 @@
         Op2);
   }
   case Intrinsic::aarch64_neon_smax:
-    return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
+    return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(2));
   case Intrinsic::aarch64_neon_umax:
-    return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
+    return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(2));
   case Intrinsic::aarch64_neon_smin:
-    return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
+    return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(2));
   case Intrinsic::aarch64_neon_umin:
-    return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
+    return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(2));
 
   case Intrinsic::aarch64_sve_sunpkhi:
     return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
@@ -4624,8 +4645,8 @@
     return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_tbl:
-    return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
+    return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), Op.getOperand(1),
+                       Op.getOperand(2));
   case Intrinsic::aarch64_sve_trn1:
     return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
@@ -4697,8 +4718,9 @@
     return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frinti:
-    return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+    return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl,
+                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+                       Op.getOperand(1));
   case Intrinsic::aarch64_sve_frintx:
     return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -4706,8 +4728,9 @@
     return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frintn:
-    return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+    return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl,
+                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+                       Op.getOperand(1));
   case Intrinsic::aarch64_sve_frintz:
     return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -4720,13 +4743,11 @@
                        Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_fcvtzu:
-    return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
-                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
-                       Op.getOperand(1));
+    return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_fcvtzs:
-    return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
-                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
-                       Op.getOperand(1));
+    return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_fsqrt:
     return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -4868,8 +4889,8 @@
   }
   case Intrinsic::aarch64_neon_sabd:
   case Intrinsic::aarch64_neon_uabd: {
-    unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
-                                                            : ISD::ABDS;
+    unsigned Opcode =
+        IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU : ISD::ABDS;
     return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   }
@@ -5185,9 +5206,8 @@
 }
 
 // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
-static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
-                                        EVT VT, EVT MemVT,
-                                        SelectionDAG &DAG) {
+static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT,
+                                        EVT MemVT, SelectionDAG &DAG) {
   assert(VT.isVector() && "VT should be a vector type");
   assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
 
@@ -5201,29 +5221,28 @@
   //   str  s0, [x0]
 
   SDValue Undef = DAG.getUNDEF(MVT::i16);
-  SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
-                                        {Undef, Undef, Undef, Undef});
+  SDValue UndefVec =
+      DAG.getBuildVector(MVT::v4i16, DL, {Undef, Undef, Undef, Undef});
 
-  SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
-                                 Value, UndefVec);
+  SDValue TruncExt =
+      DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Value, UndefVec);
   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
 
   Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
   SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
                                      Trunc, DAG.getConstant(0, DL, MVT::i64));
 
-  return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
-                      ST->getBasePtr(), ST->getMemOperand());
+  return DAG.getStore(ST->getChain(), DL, ExtractTrunc, ST->getBasePtr(),
+                      ST->getMemOperand());
 }
 
 // Custom lowering for any store, vector or scalar and/or default or with
 // a truncate operations.  Currently only custom lower truncate operation
 // from vector v4i16 to v4i8 or volatile stores of i128.
-SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
-                                          SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SDLoc Dl(Op);
   StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
-  assert (StoreNode && "Can only custom lower store nodes");
+  assert(StoreNode && "Can only custom lower store nodes");
 
   SDValue Value = StoreNode->getValue();
 
@@ -5283,8 +5302,8 @@
     SDValue Base = StoreNode->getBasePtr();
     EVT PtrVT = Base.getValueType();
     for (unsigned i = 0; i < 8; i++) {
-      SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
-                                 Value, DAG.getConstant(i, Dl, MVT::i32));
+      SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64, Value,
+                                 DAG.getConstant(i, Dl, MVT::i32));
       SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
                                 DAG.getConstant(i * 8, Dl, PtrVT));
       Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
@@ -5321,8 +5340,7 @@
   return Result;
 }
 
-SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
-                                         SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
   LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
   assert(LoadNode && "Expected custom lowering of a load node");
@@ -5335,9 +5353,9 @@
     for (unsigned i = 0; i < 8; i++) {
       SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
                                 DAG.getConstant(i * 8, DL, PtrVT));
-      SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
-                                 LoadNode->getPointerInfo(),
-                                 LoadNode->getOriginalAlign());
+      SDValue Part =
+          DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
+                      LoadNode->getOriginalAlign());
       Ops.push_back(Part);
       Chain = SDValue(Part.getNode(), 1);
     }
@@ -5385,9 +5403,8 @@
   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
                             Op.getOperand(0));
   // Generate SUBS & CSEL.
-  SDValue Cmp =
-      DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
-                  Op.getOperand(0), DAG.getConstant(0, DL, VT));
+  SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
+                            Op.getOperand(0), DAG.getConstant(0, DL, VT));
   return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
                      DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
                      Cmp.getValue(1));
@@ -5793,13 +5810,13 @@
       return CC_AArch64_DarwinPCS;
     return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
                                       : CC_AArch64_DarwinPCS_VarArg;
-   case CallingConv::Win64:
+  case CallingConv::Win64:
     return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
-   case CallingConv::CFGuard_Check:
-     return CC_AArch64_Win64_CFGuard_Check;
-   case CallingConv::AArch64_VectorCall:
-   case CallingConv::AArch64_SVE_VectorCall:
-     return CC_AArch64_AAPCS;
+  case CallingConv::CFGuard_Check:
+    return CC_AArch64_Win64_CFGuard_Check;
+  case CallingConv::AArch64_VectorCall:
+  case CallingConv::AArch64_SVE_VectorCall:
+    return CC_AArch64_AAPCS;
   }
 }
 
@@ -5822,7 +5839,8 @@
   SmallVector<ISD::OutputArg, 4> Outs;
   GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs,
                 DAG.getTargetLoweringInfo(), MF.getDataLayout());
-  if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); }))
+  if (any_of(Outs,
+             [](ISD::OutputArg &Out) { return Out.VT.isScalableVector(); }))
     FuncInfo->setIsSVECC(true);
 
   // Assign locations to all of the incoming arguments.
@@ -5876,10 +5894,10 @@
       int Size = Ins[i].Flags.getByValSize();
       unsigned NumRegs = (Size + 7) / 8;
 
-      // FIXME: This works on big-endian for composite byvals, which are the common
-      // case. It should also work for fundamental types too.
+      // FIXME: This works on big-endian for composite byvals, which are the
+      // common case. It should also work for fundamental types too.
       unsigned FrameIdx =
-        MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
+          MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
       SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
       InVals.push_back(FrameIdxN);
 
@@ -5951,7 +5969,8 @@
       unsigned ArgOffset = VA.getLocMemOffset();
       unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
                               ? VA.getLocVT().getSizeInBits()
-                              : VA.getValVT().getSizeInBits()) / 8;
+                              : VA.getValVT().getSizeInBits()) /
+                         8;
 
       uint32_t BEAlign = 0;
       if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
@@ -5997,7 +6016,7 @@
 
     if (VA.getLocInfo() == CCValAssign::Indirect) {
       assert(VA.getValVT().isScalableVector() &&
-           "Only scalable vectors can be passed indirectly");
+             "Only scalable vectors can be passed indirectly");
 
       uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
       unsigned NumParts = 1;
@@ -6056,8 +6075,8 @@
       // The AAPCS variadic function ABI is identical to the non-variadic
       // one. As a result there may be more arguments in registers and we should
       // save them for future reference.
-      // Win64 variadic functions also pass arguments in registers, but all float
-      // arguments are passed in integer registers.
+      // Win64 variadic functions also pass arguments in registers, but all
+      // float arguments are passed in integer registers.
       saveVarArgRegisters(CCInfo, DAG, DL, Chain);
     }
 
@@ -6073,7 +6092,7 @@
       RegParmTypes.push_back(MVT::f128);
       // Compute the set of forwarded registers. The rest are scratch.
       SmallVectorImpl<ForwardedRegister> &Forwards =
-                                       FuncInfo->getForwardedMustTailRegParms();
+          FuncInfo->getForwardedMustTailRegParms();
       CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
                                                CC_AArch64_AAPCS);
 
@@ -6139,13 +6158,14 @@
   MachineFrameInfo &MFI = MF.getFrameInfo();
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
-  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
+  bool IsWin64 =
+      Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
 
   SmallVector<SDValue, 8> MemOps;
 
-  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
-                                          AArch64::X3, AArch64::X4, AArch64::X5,
-                                          AArch64::X6, AArch64::X7 };
+  static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+                                         AArch64::X3, AArch64::X4, AArch64::X5,
+                                         AArch64::X6, AArch64::X7};
   static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
   unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
 
@@ -6156,7 +6176,8 @@
       GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
       if (GPRSaveSize & 15)
         // The extra size here, if triggered, will always be 8.
-        MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
+        MFI.CreateFixedObject(16 - (GPRSaveSize & 15),
+                              -(int)alignTo(GPRSaveSize, 16), false);
     } else
       GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
 
@@ -6324,9 +6345,9 @@
       }
     } else {
       // Get type of the original argument.
-      EVT ActualVT =
-          TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty,
-                       /*AllowUnknown*/ true);
+      EVT ActualVT = TLI.getValueType(DAG.getDataLayout(),
+                                      CLI.Args[Outs[i].OrigArgIndex].Ty,
+                                      /*AllowUnknown*/ true);
       MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT;
       // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
       if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
@@ -6394,7 +6415,8 @@
       return false;
   }
 
-  if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
+  if (canGuaranteeTCO(CalleeCC,
+                      getTargetMachine().Options.GuaranteedTailCallOpt))
     return CCMatch;
 
   // Externally-defined functions with weak linkage should not be
@@ -6450,10 +6472,11 @@
   analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
 
   if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
-    // When we are musttail, additional checks have been done and we can safely ignore this check
-    // At least two cases here: if caller is fastcc then we can't have any
-    // memory arguments (we'd be expected to clean up the stack afterwards). If
-    // caller is C then we could potentially use its argument area.
+    // When we are musttail, additional checks have been done and we can safely
+    // ignore this check At least two cases here: if caller is fastcc then we
+    // can't have any memory arguments (we'd be expected to clean up the stack
+    // afterwards). If caller is C then we could potentially use its argument
+    // area.
 
     // FIXME: for now we take the most conservative of these in both cases:
     // disallow all variadic memory operands.
@@ -6570,12 +6593,10 @@
   // Check callee args/returns for SVE registers and set calling convention
   // accordingly.
   if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
-    bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
-      return Out.VT.isScalableVector();
-    });
-    bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
-      return In.VT.isScalableVector();
-    });
+    bool CalleeOutSVE = any_of(
+        Outs, [](ISD::OutputArg &Out) { return Out.VT.isScalableVector(); });
+    bool CalleeInSVE =
+        any_of(Ins, [](ISD::InputArg &In) { return In.VT.isScalableVector(); });
 
     if (CalleeInSVE || CalleeOutSVE)
       CallConv = CallingConv::AArch64_SVE_VectorCall;
@@ -6673,7 +6694,7 @@
     const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
     for (const auto &F : Forwards) {
       SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
-       RegsToPass.emplace_back(F.PReg, Val);
+      RegsToPass.emplace_back(F.PReg, Val);
     }
   }
 
@@ -6892,8 +6913,8 @@
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
   for (auto &RegToPass : RegsToPass) {
-    Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
-                             RegToPass.second, InFlag);
+    Chain =
+        DAG.getCopyToReg(Chain, DL, RegToPass.first, RegToPass.second, InFlag);
     InFlag = Chain.getValue(1);
   }
 
@@ -6947,8 +6968,8 @@
   // Add argument registers to the end of the list so that they are known live
   // into the call.
   for (auto &RegToPass : RegsToPass)
-    Ops.push_back(DAG.getRegister(RegToPass.first,
-                                  RegToPass.second.getValueType()));
+    Ops.push_back(
+        DAG.getRegister(RegToPass.first, RegToPass.second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
   const uint32_t *Mask;
@@ -7118,7 +7139,7 @@
     Flag = Chain.getValue(1);
 
     RetOps.push_back(
-      DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+        DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
   }
 
   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -7167,7 +7188,7 @@
                                    N->getOffset(), Flag);
 }
 
-SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
+SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty,
                                              SelectionDAG &DAG,
                                              unsigned Flag) const {
   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
@@ -7375,8 +7396,7 @@
                                       HiVar,
                                       DAG.getTargetConstant(0, DL, MVT::i32)),
                    0);
-    return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
-                                      LoVar,
+    return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr, LoVar,
                                       DAG.getTargetConstant(0, DL, MVT::i32)),
                    0);
   }
@@ -7583,8 +7603,8 @@
   // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
   // offset into the TLSArray.
   TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
-  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
-                             DAG.getConstant(3, DL, PtrVT));
+  SDValue Slot =
+      DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, DAG.getConstant(3, DL, PtrVT));
   SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
                             DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
                             MachinePointerInfo());
@@ -8291,7 +8311,7 @@
       else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
         FVal = LHS;
     } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
-      assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
+      assert(CTVal && CFVal && "Expected constant operands for CSNEG.");
       // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
       // avoid materializing C.
       AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
@@ -8501,8 +8521,7 @@
   return getAddr(JT, DAG);
 }
 
-SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
-                                          SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   // Jump table entries as PC relative offsets. No additional tweaking
   // is necessary here. Just get the address of the jump table.
   SDLoc DL(Op);
@@ -8538,7 +8557,7 @@
 }
 
 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
-                                               SelectionDAG &DAG) const {
+                                                 SelectionDAG &DAG) const {
   BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
       !Subtarget->isTargetMachO()) {
@@ -8550,7 +8569,7 @@
 }
 
 SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
-                                                 SelectionDAG &DAG) const {
+                                                   SelectionDAG &DAG) const {
   AArch64FunctionInfo *FuncInfo =
       DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
 
@@ -8676,9 +8695,9 @@
   SDLoc DL(Op);
   unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
   unsigned VaListSize =
-      (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
-          ? PtrSize
-          : Subtarget->isTargetILP32() ? 20 : 32;
+      (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) ? PtrSize
+      : Subtarget->isTargetILP32()                                  ? 20
+                                                                    : 32;
   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
@@ -8750,7 +8769,7 @@
     SDValue NarrowFP =
         DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
                     DAG.getIntPtrConstant(1, DL, /*isTarget=*/true));
-    SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
+    SDValue Ops[] = {NarrowFP, WideFP.getValue(1)};
     // Merge the rounded value with the chain output of the load.
     return DAG.getMergeValues(Ops, DL);
   }
@@ -8794,8 +8813,9 @@
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-Register AArch64TargetLowering::
-getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
+Register
+AArch64TargetLowering::getRegisterByName(const char *RegName, LLT VT,
+                                         const MachineFunction &MF) const {
   Register Reg = MatchRegisterName(RegName);
   if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
     const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
@@ -8805,8 +8825,8 @@
   }
   if (Reg)
     return Reg;
-  report_fatal_error(Twine("Invalid register name \""
-                              + StringRef(RegName)  + "\"."));
+  report_fatal_error(
+      Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
 }
 
 SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
@@ -8904,14 +8924,14 @@
     // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
     // movw+movk is fused). So we limit up to 2 instrdduction at most.
     SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
-    AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
-			      Insn);
+    AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), Insn);
     unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
     IsLegal = Insn.size() <= Limit;
   }
 
   LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
-                    << " imm value: "; Imm.dump(););
+                    << " imm value: ";
+             Imm.dump(););
   return IsLegal;
 }
 
@@ -8977,8 +8997,8 @@
       // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
       // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
       for (int i = ExtraSteps; i > 0; --i) {
-        SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
-                                   Flags);
+        SDValue Step =
+            DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, Flags);
         Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
         Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
       }
@@ -9007,8 +9027,8 @@
       // Newton reciprocal iteration: E * (2 - X * E)
       // AArch64 reciprocal iteration instruction: (2 - M * N)
       for (int i = ExtraSteps; i > 0; --i) {
-        SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
-                                   Estimate, Flags);
+        SDValue Step =
+            DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, Estimate, Flags);
         Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
       }
 
@@ -9060,19 +9080,14 @@
   if (ConstraintVT.isFloatingPoint())
     return "w";
 
-  if (ConstraintVT.isVector() &&
-     (ConstraintVT.getSizeInBits() == 64 ||
-      ConstraintVT.getSizeInBits() == 128))
+  if (ConstraintVT.isVector() && (ConstraintVT.getSizeInBits() == 64 ||
+                                  ConstraintVT.getSizeInBits() == 128))
     return "w";
 
   return "r";
 }
 
-enum PredicateConstraint {
-  Upl,
-  Upa,
-  Invalid
-};
+enum PredicateConstraint { Upl, Upa, Invalid };
 
 static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
   PredicateConstraint P = PredicateConstraint::Invalid;
@@ -9114,7 +9129,7 @@
     }
   } else if (parsePredicateConstraint(Constraint) !=
              PredicateConstraint::Invalid)
-      return C_RegisterClass;
+    return C_RegisterClass;
   return TargetLowering::getConstraintType(Constraint);
 }
 
@@ -9473,10 +9488,10 @@
     int WindowScale;
 
     ShuffleSourceInfo(SDValue Vec)
-      : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
+        : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
           ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
 
-    bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+    bool operator==(SDValue OtherVec) { return Vec == OtherVec; }
   };
 
   // First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -9562,8 +9577,8 @@
       TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32));
     assert((Mask.size() == 8 || Mask.size() == 16) &&
            "Expected a v8i8 or v16i8 Mask");
-    TBLOperands.push_back(
-        DAG.getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
+    TBLOperands.push_back(DAG.getBuildVector(
+        Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
 
     SDValue Shuffle =
         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
@@ -9654,14 +9669,15 @@
 
       if (!SrcVT.is64BitVector()) {
         LLVM_DEBUG(
-          dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
-                    "for SVE vectors.");
+            dbgs()
+            << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
+               "for SVE vectors.");
         return SDValue();
       }
 
-      Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
-                                   VEXTSrc2,
-                                   DAG.getConstant(Imm, dl, MVT::i32));
+      Src.ShuffleVec =
+          DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, VEXTSrc2,
+                      DAG.getConstant(Imm, dl, MVT::i32));
       Src.WindowBase = -Src.MinElt;
     }
   }
@@ -9700,8 +9716,8 @@
     // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
     // segment.
     EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
-    int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
-                               VT.getScalarSizeInBits());
+    int BitsDefined =
+        std::min(OrigEltTy.getScalarSizeInBits(), VT.getScalarSizeInBits());
     int LanesDefined = BitsDefined / BitsPerShuffleLane;
 
     // This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -9720,12 +9736,12 @@
     return SDValue();
   }
 
-  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+  SDValue ShuffleOps[] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
   for (unsigned i = 0; i < Sources.size(); ++i)
     ShuffleOps[i] = Sources[i].ShuffleVec;
 
-  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
-                                         ShuffleOps[1], Mask);
+  SDValue Shuffle =
+      DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask);
   SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
 
   LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
@@ -9768,7 +9784,8 @@
 // Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
 // v4i32s. This is really a truncate, which we can construct out of (legal)
 // concats and truncate nodes.
-static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) {
+static SDValue ReconstructTruncateFromBuildVector(SDValue V,
+                                                  SelectionDAG &DAG) {
   if (V.getValueType() != MVT::v16i8)
     return SDValue();
   assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR");
@@ -10057,8 +10074,8 @@
   return true;
 }
 
-static bool isINSMask(ArrayRef<int> M, int NumInputElements,
-                      bool &DstIsLeft, int &Anomaly) {
+static bool isINSMask(ArrayRef<int> M, int NumInputElements, bool &DstIsLeft,
+                      int &Anomaly) {
   if (M.size() != static_cast<size_t>(NumInputElements))
     return false;
 
@@ -10146,11 +10163,11 @@
 
 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
 /// the specified operations to build the shuffle. ID is the perfect-shuffle
-//ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
-//table entry and LHS/RHS are the immediate inputs for this stage of the
-//shuffle.
-static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
-                                      SDValue V2, unsigned PFEntry, SDValue LHS,
+// ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
+// table entry and LHS/RHS are the immediate inputs for this stage of the
+// shuffle.
+static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
+                                      unsigned PFEntry, SDValue LHS,
                                       SDValue RHS, SelectionDAG &DAG,
                                       const SDLoc &dl) {
   unsigned OpNum = (PFEntry >> 26) & 0x0F;
@@ -10385,8 +10402,9 @@
       Shuffle = DAG.getNode(
           ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
           DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
-          V2Cst, DAG.getBuildVector(IndexVT, DL,
-                                    makeArrayRef(TBLMask.data(), IndexLen)));
+          V2Cst,
+          DAG.getBuildVector(IndexVT, DL,
+                             makeArrayRef(TBLMask.data(), IndexLen)));
     }
   }
   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
@@ -10591,9 +10609,9 @@
   for (unsigned LaneSize : {64U, 32U, 16U}) {
     unsigned Lane = 0;
     if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
-      unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
-                                       : LaneSize == 32 ? AArch64ISD::DUPLANE32
-                                                        : AArch64ISD::DUPLANE16;
+      unsigned Opcode = LaneSize == 64   ? AArch64ISD::DUPLANE64
+                        : LaneSize == 32 ? AArch64ISD::DUPLANE32
+                                         : AArch64ISD::DUPLANE16;
       // Cast V1 to an integer vector with required lane size
       MVT NewEltTy = MVT::getIntegerVT(LaneSize);
       unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
@@ -10793,7 +10811,6 @@
   return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
 }
 
-
 static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
                                APInt &UndefBits) {
   EVT VT = BVN->getValueType(0);
@@ -10818,7 +10835,7 @@
 
 // Try 64-bit splatted SIMD immediate.
 static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
-                                 const APInt &Bits) {
+                                  const APInt &Bits) {
   if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
     uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
     EVT VT = Op.getValueType();
@@ -10828,8 +10845,8 @@
       Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
 
       SDLoc dl(Op);
-      SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
-                                DAG.getConstant(Value, dl, MVT::i32));
+      SDValue Mov =
+          DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
       return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
@@ -10851,16 +10868,13 @@
     if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
       Shift = 0;
-    }
-    else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
+    } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
       Shift = 8;
-    }
-    else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
+    } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
       Shift = 16;
-    }
-    else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
+    } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
       Shift = 24;
     }
@@ -10874,9 +10888,9 @@
                           DAG.getConstant(Value, dl, MVT::i32),
                           DAG.getConstant(Shift, dl, MVT::i32));
       else
-        Mov = DAG.getNode(NewOp, dl, MovTy,
-                          DAG.getConstant(Value, dl, MVT::i32),
-                          DAG.getConstant(Shift, dl, MVT::i32));
+        Mov =
+            DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+                        DAG.getConstant(Shift, dl, MVT::i32));
 
       return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
@@ -10899,8 +10913,7 @@
     if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
       Shift = 0;
-    }
-    else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
+    } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
       Shift = 8;
     }
@@ -10914,9 +10927,9 @@
                           DAG.getConstant(Value, dl, MVT::i32),
                           DAG.getConstant(Shift, dl, MVT::i32));
       else
-        Mov = DAG.getNode(NewOp, dl, MovTy,
-                          DAG.getConstant(Value, dl, MVT::i32),
-                          DAG.getConstant(Shift, dl, MVT::i32));
+        Mov =
+            DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+                        DAG.getConstant(Shift, dl, MVT::i32));
 
       return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
@@ -10938,17 +10951,16 @@
     if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
       Shift = 264;
-    }
-    else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
+    } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
       Shift = 272;
     }
 
     if (isAdvSIMDModImm) {
       SDLoc dl(Op);
-      SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
-                                DAG.getConstant(Value, dl, MVT::i32),
-                                DAG.getConstant(Shift, dl, MVT::i32));
+      SDValue Mov =
+          DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+                      DAG.getConstant(Shift, dl, MVT::i32));
       return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
@@ -10968,8 +10980,8 @@
       Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
 
       SDLoc dl(Op);
-      SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
-                                DAG.getConstant(Value, dl, MVT::i32));
+      SDValue Mov =
+          DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
       return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
@@ -10990,17 +11002,16 @@
     if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
       MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
-    }
-    else if (isWide &&
-             (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
+    } else if (isWide &&
+               (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
       Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
       MovTy = MVT::v2f64;
     }
 
     if (isAdvSIMDModImm) {
       SDLoc dl(Op);
-      SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
-                                DAG.getConstant(Value, dl, MVT::i32));
+      SDValue Mov =
+          DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
       return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
@@ -11143,16 +11154,15 @@
   if (resolveBuildVector(BVN, DefBits, UndefBits)) {
     SDValue NewOp;
 
-    if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
-                                    DefBits, &LHS)) ||
-        (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
-                                    DefBits, &LHS)))
+    if ((NewOp =
+             tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS)) ||
+        (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS)))
       return NewOp;
 
-    if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
-                                    UndefBits, &LHS)) ||
-        (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
-                                    UndefBits, &LHS)))
+    if ((NewOp =
+             tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, UndefBits, &LHS)) ||
+        (NewOp =
+             tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, UndefBits, &LHS)))
       return NewOp;
   }
 
@@ -11162,12 +11172,11 @@
 
 // Normalize the operands of BUILD_VECTOR. The value of constant operands will
 // be truncated to fit element width.
-static SDValue NormalizeBuildVector(SDValue Op,
-                                    SelectionDAG &DAG) {
+static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
-  EVT EltTy= VT.getVectorElementType();
+  EVT EltTy = VT.getVectorElementType();
 
   if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
     return Op;
@@ -11179,8 +11188,7 @@
     // (with operands cast to integers), then the only possibilities
     // are constants and UNDEFs.
     if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
-      APInt LowBits(EltTy.getSizeInBits(),
-                    CstLane->getZExtValue());
+      APInt LowBits(EltTy.getSizeInBits(), CstLane->getZExtValue());
       Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
     } else if (Lane.getNode()->isUndef()) {
       Lane = DAG.getUNDEF(MVT::i32);
@@ -11431,8 +11439,9 @@
     if (VT.getVectorElementType().isFloatingPoint()) {
       SmallVector<SDValue, 8> Ops;
       EVT EltTy = VT.getVectorElementType();
-      assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
-               EltTy == MVT::f64) && "Unsupported floating-point vector type");
+      assert((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
+              EltTy == MVT::f64) &&
+             "Unsupported floating-point vector type");
       LLVM_DEBUG(
           dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
                     "BITCASTS, and try again\n");
@@ -11971,8 +11980,9 @@
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
-  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
-                                    HasAnyUndefs, ElementBits) ||
+  if (!BVN ||
+      !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+                            ElementBits) ||
       SplatBitSize > ElementBits)
     return false;
   Cnt = SplatBits.getSExtValue();
@@ -12043,10 +12053,10 @@
     if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
       return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
                          DAG.getConstant(Cnt, DL, MVT::i32));
-    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                       DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
-                                       MVT::i32),
-                       Op.getOperand(0), Op.getOperand(1));
+    return DAG.getNode(
+        ISD::INTRINSIC_WO_CHAIN, DL, VT,
+        DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, MVT::i32),
+        Op.getOperand(0), Op.getOperand(1));
   case ISD::SRA:
   case ISD::SRL:
     if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
@@ -12071,10 +12081,9 @@
     // negate the shift amount
     SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
                                    Op.getOperand(1));
-    SDValue NegShiftLeft =
-        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                    DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
-                    NegShift);
+    SDValue NegShiftLeft = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                                       DAG.getConstant(Opc, DL, MVT::i32),
+                                       Op.getOperand(0), NegShift);
     return NegShiftLeft;
   }
 
@@ -12219,7 +12228,7 @@
   }
 
   assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
-          LHS.getValueType().getVectorElementType() != MVT::f128);
+         LHS.getValueType().getVectorElementType() != MVT::f128);
 
   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two branches to implement.
@@ -12227,15 +12236,14 @@
   bool ShouldInvert;
   changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
 
-  bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
-  SDValue Cmp =
-      EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
+  bool NoNaNs =
+      getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
+  SDValue Cmp = EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
   if (!Cmp.getNode())
     return SDValue();
 
   if (CC2 != AArch64CC::AL) {
-    SDValue Cmp2 =
-        EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
+    SDValue Cmp2 = EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
     if (!Cmp2.getNode())
       return SDValue();
 
@@ -12321,14 +12329,12 @@
   case ISD::VECREDUCE_FMAX: {
     return DAG.getNode(
         ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
-        DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
-        Src);
+        DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32), Src);
   }
   case ISD::VECREDUCE_FMIN: {
     return DAG.getNode(
         ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
-        DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
-        Src);
+        DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32), Src);
   }
   default:
     llvm_unreachable("Unhandled reduction");
@@ -12650,7 +12656,7 @@
     // The shift can be combined if it matches the size of the value being
     // loaded (and so reducing the width would make it not match).
     uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
-    uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
+    uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits() / 8;
     if (ShiftAmount == Log2_32(LoadBytes))
       return false;
   }
@@ -12695,10 +12701,10 @@
   const DataLayout &DL = F->getParent()->getDataLayout();
   Type *Ty = User->getOperand(0)->getType();
 
-  return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
-           isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
-           (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-            Options.UnsafeFPMath));
+  return !(
+      isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+      isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
 }
 
 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
@@ -12757,14 +12763,15 @@
     case Instruction::GetElementPtr: {
       gep_type_iterator GTI = gep_type_begin(Instr);
       auto &DL = Ext->getModule()->getDataLayout();
-      std::advance(GTI, U.getOperandNo()-1);
+      std::advance(GTI, U.getOperandNo() - 1);
       Type *IdxTy = GTI.getIndexedType();
       // This extension will end up with a shift because of the scaling factor.
       // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
       // Get the shift amount based on the scaling factor:
       // log2(sizeof(IdxTy)) - log2(8).
       uint64_t ShiftAmt =
-        countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
+          countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) -
+          3;
       // Is the constant foldable in the shift of the addressing mode?
       // I.e., shift amount is between 1 and 4 inclusive.
       if (ShiftAmt == 0 || ShiftAmt > 4)
@@ -13363,6 +13370,13 @@
 
   auto Mask = SVI->getShuffleMask();
 
+  // Sanity check if all the indices are NOT in range.
+  // If mask is `undef` or `poison`, `Mask` may be a vector of -1s.
+  // If all of them are `undef`, OOB read will happen later.
+  if (llvm::all_of(Mask, [](int Idx) { return Idx == UndefMaskElem; })) {
+    return false;
+  }
+
   Type *PtrTy =
       UseScalable
           ? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())
@@ -13576,8 +13590,8 @@
   }
   // Same encoding for add/sub, just flip the sign.
   Immed = std::abs(Immed);
-  bool IsLegal = ((Immed >> 12) == 0 ||
-                  ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
+  bool IsLegal =
+      ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
   LLVM_DEBUG(dbgs() << "Is " << Immed
                     << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
   return IsLegal;
@@ -13620,7 +13634,8 @@
 /// by AM is legal for this target, for a load/store of the specified type.
 bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                                   const AddrMode &AM, Type *Ty,
-                                                  unsigned AS, Instruction *I) const {
+                                                  unsigned AS,
+                                                  Instruction *I) const {
   // AArch64 has five basic addressing modes:
   //  reg
   //  reg + 9-bit signed offset
@@ -13738,15 +13753,13 @@
   // LR is a callee-save register, but we must treat it as clobbered by any call
   // site. Hence we include LR in the scratch registers, which are in turn added
   // as implicit-defs for stackmaps and patchpoints.
-  static const MCPhysReg ScratchRegs[] = {
-    AArch64::X16, AArch64::X17, AArch64::LR, 0
-  };
+  static const MCPhysReg ScratchRegs[] = {AArch64::X16, AArch64::X17,
+                                          AArch64::LR, 0};
   return ScratchRegs;
 }
 
-bool
-AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
-                                                     CombineLevel Level) const {
+bool AArch64TargetLowering::isDesirableToCommuteWithShift(
+    const SDNode *N, CombineLevel Level) const {
   assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
           N->getOpcode() == ISD::SRL) &&
          "Expected shift op");
@@ -13754,8 +13767,8 @@
   SDValue ShiftLHS = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine
-  // it with shift 'N' to let it be lowered to UBFX.
+  // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
+  // combine it with shift 'N' to let it be lowered to UBFX.
   if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
       isa<ConstantSDNode>(ShiftLHS.getOperand(1))) {
     uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1);
@@ -14063,7 +14076,6 @@
   return SDValue();
 }
 
-
 static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const AArch64Subtarget *Subtarget) {
@@ -14079,7 +14091,7 @@
                                      SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   if (isIntDivCheap(N->getValueType(0), Attr))
-    return SDValue(N,0); // Lower SDIV as SDIV
+    return SDValue(N, 0); // Lower SDIV as SDIV
 
   EVT VT = N->getValueType(0);
 
@@ -14177,7 +14189,7 @@
 }
 
 static bool IsSVECntIntrinsic(SDValue S) {
-  switch(getIntrinsicID(S.getNode())) {
+  switch (getIntrinsicID(S.getNode())) {
   default:
     break;
   case Intrinsic::aarch64_sve_cntb:
@@ -14409,11 +14421,10 @@
 
   // Allow the scaling to be folded into the `cnt` instruction by preventing
   // the scaling to be obscured here. This makes it easier to pattern match.
-  if (IsSVECntIntrinsic(N0) ||
-     (N0->getOpcode() == ISD::TRUNCATE &&
-      (IsSVECntIntrinsic(N0->getOperand(0)))))
-       if (ConstValue.sge(1) && ConstValue.sle(16))
-         return SDValue();
+  if (IsSVECntIntrinsic(N0) || (N0->getOpcode() == ISD::TRUNCATE &&
+                                (IsSVECntIntrinsic(N0->getOperand(0)))))
+    if (ConstValue.sge(1) && ConstValue.sle(16))
+      return SDValue();
 
   // Multiplication of a power of two plus/minus one can be done more
   // cheaply as as shift+add/sub. For now, this is true unilaterally. If
@@ -14565,7 +14576,8 @@
   // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
   // This eliminates an "integer-to-vector-move" UOP and improves throughput.
   SDValue N0 = N->getOperand(0);
-  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) &&
+      N0.hasOneUse() &&
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -14577,8 +14589,8 @@
     // to use the new Chain.
     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
 
-    unsigned Opcode =
-        (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
+    unsigned Opcode = (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF
+                                                          : AArch64ISD::UITOF;
     return DAG.getNode(Opcode, SDLoc(N), VT, Load);
   }
 
@@ -14691,7 +14703,8 @@
 
   BitVector UndefElements;
   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
-  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
+  int32_t C =
+      BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
   if (C == -1 || C == 0 || C > FloatBits)
     return SDValue();
 
@@ -14790,7 +14803,7 @@
 }
 
 static SDValue tryCombineToBSL(SDNode *N,
-                                TargetLowering::DAGCombinerInfo &DCI) {
+                               TargetLowering::DAGCombinerInfo &DCI) {
   EVT VT = N->getValueType(0);
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
@@ -15034,8 +15047,8 @@
     Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
                       DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
 
-    SDValue And = DAG.getNode(ISD::AND, DL,
-                              UnpkOp->getValueType(0), UnpkOp, Dup);
+    SDValue And =
+        DAG.getNode(ISD::AND, DL, UnpkOp->getValueType(0), UnpkOp, Dup);
 
     return DAG.getNode(Opc, DL, N->getValueType(0), And);
   }
@@ -15295,8 +15308,7 @@
       // with the strict_fadd, but we also need uses of the chain output of the
       // original strict_fadd to use the chain output of the new strict_fadd as
       // otherwise it may not be deleted.
-      SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
-                                {VT, MVT::Other},
+      SDValue Ret = DAG.getNode(N0->getOpcode(), DL, {VT, MVT::Other},
                                 {N0->getOperand(0), Extract1, Extract2});
       DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Ret);
       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1));
@@ -15373,9 +15385,8 @@
           Ops.push_back(DAG.getUNDEF(MVT::f32));
         else {
           LoadSDNode *LD = cast<LoadSDNode>(V);
-          SDValue NewLoad =
-              DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(),
-                          LD->getMemOperand());
+          SDValue NewLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(),
+                                        LD->getBasePtr(), LD->getMemOperand());
           DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
           Ops.push_back(NewLoad);
         }
@@ -15385,7 +15396,6 @@
     }
   }
 
-
   // Wait 'til after everything is legalized to try this. That way we have
   // legal vector types and such.
   if (DCI.isBeforeLegalizeOps())
@@ -15731,11 +15741,11 @@
 }
 
 // Returns true if Op is setcc or zext of setcc.
-static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
+static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info) {
   if (isSetCC(Op, Info))
     return true;
   return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
-    isSetCC(Op->getOperand(0), Info));
+          isSetCC(Op->getOperand(0), Info));
 }
 
 // The folding we want to perform is:
@@ -16154,7 +16164,8 @@
     if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT))
       return SDValue();
 
-    SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
+    SDValue SubvectorIdx =
+        DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
 
     SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,
@@ -16309,8 +16320,7 @@
                                            SelectionDAG &DAG) {
   SDLoc dl(N);
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
-                     DAG.getNode(Opc, dl,
-                                 N->getOperand(1).getSimpleValueType(),
+                     DAG.getNode(Opc, dl, N->getOperand(1).getSimpleValueType(),
                                  N->getOperand(1)),
                      DAG.getConstant(0, dl, MVT::i64));
 }
@@ -16412,7 +16422,7 @@
     case Intrinsic::aarch64_sve_cmphs_wide:
     case Intrinsic::aarch64_sve_cmphi_wide:
     case Intrinsic::aarch64_sve_cmplo_wide:
-    case Intrinsic::aarch64_sve_cmpls_wide:  {
+    case Intrinsic::aarch64_sve_cmpls_wide: {
       if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
         uint64_t ImmVal = CN->getZExtValue();
         if (ImmVal <= 127)
@@ -16967,9 +16977,8 @@
   uint64_t BaseOffset = 0;
 
   const MachinePointerInfo &PtrInfo = St.getPointerInfo();
-  SDValue NewST1 =
-      DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
-                   OrigAlignment, St.getMemOperand()->getFlags());
+  SDValue NewST1 = DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
+                                OrigAlignment, St.getMemOperand()->getFlags());
 
   // As this in ISel, we will not merge this add which may degrade results.
   if (BasePtr->getOpcode() == ISD::ADD &&
@@ -17034,10 +17043,10 @@
     ContainerVT = getSVEContainerType(ContainerVT);
 
   SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
-  SDValue Ops[] = { N->getOperand(0), // Chain
-                    N->getOperand(2), // Pg
-                    N->getOperand(3), // Base
-                    DAG.getValueType(VT) };
+  SDValue Ops[] = {N->getOperand(0), // Chain
+                   N->getOperand(2), // Pg
+                   N->getOperand(3), // Base
+                   DAG.getValueType(VT)};
 
   SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
   SDValue LoadChain = SDValue(Load.getNode(), 1);
@@ -17045,7 +17054,7 @@
   if (ContainerVT.isInteger() && (VT != ContainerVT))
     Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
 
-  return DAG.getMergeValues({ Load, LoadChain }, DL);
+  return DAG.getMergeValues({Load, LoadChain}, DL);
 }
 
 static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
@@ -17059,16 +17068,16 @@
 
   auto *MINode = cast<MemIntrinsicSDNode>(N);
   SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
-  SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
-                                MINode->getOperand(3), DAG.getUNDEF(PtrTy),
-                                MINode->getOperand(2), PassThru,
-                                MINode->getMemoryVT(), MINode->getMemOperand(),
-                                ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
+  SDValue L =
+      DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(), MINode->getOperand(3),
+                        DAG.getUNDEF(PtrTy), MINode->getOperand(2), PassThru,
+                        MINode->getMemoryVT(), MINode->getMemOperand(),
+                        ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
 
-   if (VT.isFloatingPoint()) {
-     SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
-     return DAG.getMergeValues(Ops, DL);
-   }
+  if (VT.isFloatingPoint()) {
+    SDValue Ops[] = {DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1)};
+    return DAG.getMergeValues(Ops, DL);
+  }
 
   return L;
 }
@@ -17111,12 +17120,11 @@
   else
     SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
 
-  SDValue Ops[] = { N->getOperand(0), // Chain
-                    SrcNew,
-                    N->getOperand(4), // Base
-                    N->getOperand(3), // Pg
-                    InputVT
-                  };
+  SDValue Ops[] = {N->getOperand(0), // Chain
+                   SrcNew,
+                   N->getOperand(4), // Base
+                   N->getOperand(3), // Pg
+                   InputVT};
 
   return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
 }
@@ -17268,7 +17276,7 @@
   }
   // Check that all vector element locations were inserted to.
   if (IndexNotInserted.any())
-      return SDValue();
+    return SDValue();
 
   return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
 }
@@ -17600,11 +17608,12 @@
   SDValue Addr = LD->getOperand(1);
   SDValue Vector = N->getOperand(0);
   // Search for a use of the address operand that is an increment.
-  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
-       Addr.getNode()->use_end(); UI != UE; ++UI) {
+  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+                            UE = Addr.getNode()->use_end();
+       UI != UE; ++UI) {
     SDNode *User = *UI;
-    if (User->getOpcode() != ISD::ADD
-        || UI.getUse().getResNo() != Addr.getResNo())
+    if (User->getOpcode() != ISD::ADD ||
+        UI.getUse().getResNo() != Addr.getResNo())
       continue;
 
     // If the increment is a constant, it must match the memory ref size.
@@ -17630,19 +17639,19 @@
       continue;
 
     SmallVector<SDValue, 8> Ops;
-    Ops.push_back(LD->getOperand(0));  // Chain
+    Ops.push_back(LD->getOperand(0)); // Chain
     if (IsLaneOp) {
-      Ops.push_back(Vector);           // The vector to be inserted
-      Ops.push_back(Lane);             // The lane to be inserted in the vector
+      Ops.push_back(Vector); // The vector to be inserted
+      Ops.push_back(Lane);   // The lane to be inserted in the vector
     }
     Ops.push_back(Addr);
     Ops.push_back(Inc);
 
-    EVT Tys[3] = { VT, MVT::i64, MVT::Other };
+    EVT Tys[3] = {VT, MVT::i64, MVT::Other};
     SDVTList SDTys = DAG.getVTList(Tys);
-    unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
-    SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
-                                           MemVT,
+    unsigned NewOp =
+        IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
+    SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, MemVT,
                                            LoadSDN->getMemOperand());
 
     // Update the uses.
@@ -17651,8 +17660,8 @@
         SDValue(UpdN.getNode(), 2) // Chain
     };
     DCI.CombineTo(LD, NewResults);
-    DCI.CombineTo(N, SDValue(UpdN.getNode(), 0));     // Dup/Inserted Result
-    DCI.CombineTo(User, SDValue(UpdN.getNode(), 1));  // Write back register
+    DCI.CombineTo(N, SDValue(UpdN.getNode(), 0));    // Dup/Inserted Result
+    DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
 
     break;
   }
@@ -17936,7 +17945,8 @@
 
   // Search for a use of the address operand that is an increment.
   for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
-       UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+                            UE = Addr.getNode()->use_end();
+       UI != UE; ++UI) {
     SDNode *User = *UI;
     if (User->getOpcode() != ISD::ADD ||
         UI.getUse().getResNo() != Addr.getResNo())
@@ -17961,49 +17971,110 @@
     unsigned NumVecs = 0;
     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
     switch (IntNo) {
-    default: llvm_unreachable("unexpected intrinsic for Neon base update");
-    case Intrinsic::aarch64_neon_ld2:       NewOpc = AArch64ISD::LD2post;
-      NumVecs = 2; break;
-    case Intrinsic::aarch64_neon_ld3:       NewOpc = AArch64ISD::LD3post;
-      NumVecs = 3; break;
-    case Intrinsic::aarch64_neon_ld4:       NewOpc = AArch64ISD::LD4post;
-      NumVecs = 4; break;
-    case Intrinsic::aarch64_neon_st2:       NewOpc = AArch64ISD::ST2post;
-      NumVecs = 2; IsStore = true; break;
-    case Intrinsic::aarch64_neon_st3:       NewOpc = AArch64ISD::ST3post;
-      NumVecs = 3; IsStore = true; break;
-    case Intrinsic::aarch64_neon_st4:       NewOpc = AArch64ISD::ST4post;
-      NumVecs = 4; IsStore = true; break;
-    case Intrinsic::aarch64_neon_ld1x2:     NewOpc = AArch64ISD::LD1x2post;
-      NumVecs = 2; break;
-    case Intrinsic::aarch64_neon_ld1x3:     NewOpc = AArch64ISD::LD1x3post;
-      NumVecs = 3; break;
-    case Intrinsic::aarch64_neon_ld1x4:     NewOpc = AArch64ISD::LD1x4post;
-      NumVecs = 4; break;
-    case Intrinsic::aarch64_neon_st1x2:     NewOpc = AArch64ISD::ST1x2post;
-      NumVecs = 2; IsStore = true; break;
-    case Intrinsic::aarch64_neon_st1x3:     NewOpc = AArch64ISD::ST1x3post;
-      NumVecs = 3; IsStore = true; break;
-    case Intrinsic::aarch64_neon_st1x4:     NewOpc = AArch64ISD::ST1x4post;
-      NumVecs = 4; IsStore = true; break;
-    case Intrinsic::aarch64_neon_ld2r:      NewOpc = AArch64ISD::LD2DUPpost;
-      NumVecs = 2; IsDupOp = true; break;
-    case Intrinsic::aarch64_neon_ld3r:      NewOpc = AArch64ISD::LD3DUPpost;
-      NumVecs = 3; IsDupOp = true; break;
-    case Intrinsic::aarch64_neon_ld4r:      NewOpc = AArch64ISD::LD4DUPpost;
-      NumVecs = 4; IsDupOp = true; break;
-    case Intrinsic::aarch64_neon_ld2lane:   NewOpc = AArch64ISD::LD2LANEpost;
-      NumVecs = 2; IsLaneOp = true; break;
-    case Intrinsic::aarch64_neon_ld3lane:   NewOpc = AArch64ISD::LD3LANEpost;
-      NumVecs = 3; IsLaneOp = true; break;
-    case Intrinsic::aarch64_neon_ld4lane:   NewOpc = AArch64ISD::LD4LANEpost;
-      NumVecs = 4; IsLaneOp = true; break;
-    case Intrinsic::aarch64_neon_st2lane:   NewOpc = AArch64ISD::ST2LANEpost;
-      NumVecs = 2; IsStore = true; IsLaneOp = true; break;
-    case Intrinsic::aarch64_neon_st3lane:   NewOpc = AArch64ISD::ST3LANEpost;
-      NumVecs = 3; IsStore = true; IsLaneOp = true; break;
-    case Intrinsic::aarch64_neon_st4lane:   NewOpc = AArch64ISD::ST4LANEpost;
-      NumVecs = 4; IsStore = true; IsLaneOp = true; break;
+    default:
+      llvm_unreachable("unexpected intrinsic for Neon base update");
+    case Intrinsic::aarch64_neon_ld2:
+      NewOpc = AArch64ISD::LD2post;
+      NumVecs = 2;
+      break;
+    case Intrinsic::aarch64_neon_ld3:
+      NewOpc = AArch64ISD::LD3post;
+      NumVecs = 3;
+      break;
+    case Intrinsic::aarch64_neon_ld4:
+      NewOpc = AArch64ISD::LD4post;
+      NumVecs = 4;
+      break;
+    case Intrinsic::aarch64_neon_st2:
+      NewOpc = AArch64ISD::ST2post;
+      NumVecs = 2;
+      IsStore = true;
+      break;
+    case Intrinsic::aarch64_neon_st3:
+      NewOpc = AArch64ISD::ST3post;
+      NumVecs = 3;
+      IsStore = true;
+      break;
+    case Intrinsic::aarch64_neon_st4:
+      NewOpc = AArch64ISD::ST4post;
+      NumVecs = 4;
+      IsStore = true;
+      break;
+    case Intrinsic::aarch64_neon_ld1x2:
+      NewOpc = AArch64ISD::LD1x2post;
+      NumVecs = 2;
+      break;
+    case Intrinsic::aarch64_neon_ld1x3:
+      NewOpc = AArch64ISD::LD1x3post;
+      NumVecs = 3;
+      break;
+    case Intrinsic::aarch64_neon_ld1x4:
+      NewOpc = AArch64ISD::LD1x4post;
+      NumVecs = 4;
+      break;
+    case Intrinsic::aarch64_neon_st1x2:
+      NewOpc = AArch64ISD::ST1x2post;
+      NumVecs = 2;
+      IsStore = true;
+      break;
+    case Intrinsic::aarch64_neon_st1x3:
+      NewOpc = AArch64ISD::ST1x3post;
+      NumVecs = 3;
+      IsStore = true;
+      break;
+    case Intrinsic::aarch64_neon_st1x4:
+      NewOpc = AArch64ISD::ST1x4post;
+      NumVecs = 4;
+      IsStore = true;
+      break;
+    case Intrinsic::aarch64_neon_ld2r:
+      NewOpc = AArch64ISD::LD2DUPpost;
+      NumVecs = 2;
+      IsDupOp = true;
+      break;
+    case Intrinsic::aarch64_neon_ld3r:
+      NewOpc = AArch64ISD::LD3DUPpost;
+      NumVecs = 3;
+      IsDupOp = true;
+      break;
+    case Intrinsic::aarch64_neon_ld4r:
+      NewOpc = AArch64ISD::LD4DUPpost;
+      NumVecs = 4;
+      IsDupOp = true;
+      break;
+    case Intrinsic::aarch64_neon_ld2lane:
+      NewOpc = AArch64ISD::LD2LANEpost;
+      NumVecs = 2;
+      IsLaneOp = true;
+      break;
+    case Intrinsic::aarch64_neon_ld3lane:
+      NewOpc = AArch64ISD::LD3LANEpost;
+      NumVecs = 3;
+      IsLaneOp = true;
+      break;
+    case Intrinsic::aarch64_neon_ld4lane:
+      NewOpc = AArch64ISD::LD4LANEpost;
+      NumVecs = 4;
+      IsLaneOp = true;
+      break;
+    case Intrinsic::aarch64_neon_st2lane:
+      NewOpc = AArch64ISD::ST2LANEpost;
+      NumVecs = 2;
+      IsStore = true;
+      IsLaneOp = true;
+      break;
+    case Intrinsic::aarch64_neon_st3lane:
+      NewOpc = AArch64ISD::ST3LANEpost;
+      NumVecs = 3;
+      IsStore = true;
+      IsLaneOp = true;
+      break;
+    case Intrinsic::aarch64_neon_st4lane:
+      NewOpc = AArch64ISD::ST4LANEpost;
+      NumVecs = 4;
+      IsStore = true;
+      IsLaneOp = true;
+      break;
     }
 
     EVT VecTy;
@@ -18038,14 +18109,14 @@
     unsigned n;
     for (n = 0; n < NumResultVecs; ++n)
       Tys[n] = VecTy;
-    Tys[n++] = MVT::i64;  // Type of write back register
-    Tys[n] = MVT::Other;  // Type of the chain
+    Tys[n++] = MVT::i64; // Type of write back register
+    Tys[n] = MVT::Other; // Type of the chain
     SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
 
     MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
-    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
-                                           MemInt->getMemoryVT(),
-                                           MemInt->getMemOperand());
+    SDValue UpdN =
+        DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
+                                MemInt->getMemoryVT(), MemInt->getMemOperand());
 
     // Update the uses.
     std::vector<SDValue> NewResults;
@@ -18063,16 +18134,16 @@
 
 // Checks to see if the value is the prescribed width and returns information
 // about its extension mode.
-static
-bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
+static bool checkValueWidth(SDValue V, unsigned width,
+                            ISD::LoadExtType &ExtType) {
   ExtType = ISD::NON_EXTLOAD;
-  switch(V.getNode()->getOpcode()) {
+  switch (V.getNode()->getOpcode()) {
   default:
     return false;
   case ISD::LOAD: {
     LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
-    if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
-       || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
+    if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8) ||
+        (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
       ExtType = LoadNode->getExtensionType();
       return true;
     }
@@ -18080,8 +18151,8 @@
   }
   case ISD::AssertSext: {
     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
-    if ((TypeNode->getVT() == MVT::i8 && width == 8)
-       || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+    if ((TypeNode->getVT() == MVT::i8 && width == 8) ||
+        (TypeNode->getVT() == MVT::i16 && width == 16)) {
       ExtType = ISD::SEXTLOAD;
       return true;
     }
@@ -18089,8 +18160,8 @@
   }
   case ISD::AssertZext: {
     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
-    if ((TypeNode->getVT() == MVT::i8 && width == 8)
-       || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+    if ((TypeNode->getVT() == MVT::i8 && width == 8) ||
+        (TypeNode->getVT() == MVT::i16 && width == 16)) {
       ExtType = ISD::ZEXTLOAD;
       return true;
     }
@@ -18181,9 +18252,9 @@
   // the whole range we can just adjust the input and avoid writing equations
   // for sign extended inputs.
   if (ExtType == ISD::SEXTLOAD)
-    AddConstant -= (1 << (width-1));
+    AddConstant -= (1 << (width - 1));
 
-  switch(CC) {
+  switch (CC) {
   case AArch64CC::LE:
   case AArch64CC::GT:
     if ((AddConstant == 0) ||
@@ -18194,22 +18265,20 @@
     break;
   case AArch64CC::LT:
   case AArch64CC::GE:
-    if ((AddConstant == 0) ||
-        (AddConstant >= 0 && CompConstant <= 0) ||
+    if ((AddConstant == 0) || (AddConstant >= 0 && CompConstant <= 0) ||
         (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
       return true;
     break;
   case AArch64CC::HI:
   case AArch64CC::LS:
     if ((AddConstant >= 0 && CompConstant < 0) ||
-       (AddConstant <= 0 && CompConstant >= -1 &&
-        CompConstant < AddConstant + MaxUInt))
+        (AddConstant <= 0 && CompConstant >= -1 &&
+         CompConstant < AddConstant + MaxUInt))
       return true;
-   break;
+    break;
   case AArch64CC::PL:
   case AArch64CC::MI:
-    if ((AddConstant == 0) ||
-        (AddConstant > 0 && CompConstant <= 0) ||
+    if ((AddConstant == 0) || (AddConstant > 0 && CompConstant <= 0) ||
         (AddConstant < 0 && CompConstant <= AddConstant))
       return true;
     break;
@@ -18242,11 +18311,10 @@
   return false;
 }
 
-static
-SDValue performCONDCombine(SDNode *N,
-                           TargetLowering::DAGCombinerInfo &DCI,
-                           SelectionDAG &DAG, unsigned CCIndex,
-                           unsigned CmpIndex) {
+static SDValue performCONDCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  SelectionDAG &DAG, unsigned CCIndex,
+                                  unsigned CmpIndex) {
   unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
   SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
   unsigned CondOpcode = SubsNode->getOpcode();
@@ -18296,19 +18364,20 @@
 
   if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
       !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
-      !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
+      !checkValueWidth(AddInputValue1, MaskBits, ExtType))
     return SDValue();
 
-  if(!isEquivalentMaskless(CC, MaskBits, ExtType,
-                cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
-                cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
+  if (!isEquivalentMaskless(
+          CC, MaskBits, ExtType,
+          cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
+          cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
     return SDValue();
 
   // The AND is not necessary, remove it.
 
-  SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
-                               SubsNode->getValueType(1));
-  SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
+  SDVTList VTs =
+      DAG.getVTList(SubsNode->getValueType(0), SubsNode->getValueType(1));
+  SDValue Ops[] = {AddValue, SubsNode->getOperand(1)};
 
   SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
   DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
@@ -18482,7 +18551,7 @@
   // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
   // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
   if (SDValue Folded = foldCSELofCTTZ(N, DAG))
-		return Folded;
+    return Folded;
 
   return performCONDCombine(N, DCI, DAG, 2, 3);
 }
@@ -18913,8 +18982,7 @@
   SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
                        N0.getOperand(0), N0.getOperand(1),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
-  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
-                     IfTrue, IfFalse);
+  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, IfTrue, IfFalse);
 }
 
 /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
@@ -18969,17 +19037,15 @@
   // First perform a vector comparison, where lane 0 is the one we're interested
   // in.
   SDLoc DL(N0);
-  SDValue LHS =
-      DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
-  SDValue RHS =
-      DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
+  SDValue LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
+  SDValue RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
   SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
 
   // Now duplicate the comparison mask we want across all other lanes.
   SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
   SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
-  Mask = DAG.getNode(ISD::BITCAST, DL,
-                     ResVT.changeVectorElementTypeToInteger(), Mask);
+  Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(),
+                     Mask);
 
   return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
 }
@@ -19326,8 +19392,8 @@
   // Sign extend of an unsigned unpack -> signed unpack
   if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
 
-    unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
-                                               : AArch64ISD::SUNPKLO;
+    unsigned SOpc =
+        Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI : AArch64ISD::SUNPKLO;
 
     // Push the sign extend to the operand of the unpack
     // This is necessary where, for example, the operand of the unpack
@@ -20313,16 +20379,17 @@
 static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
   SDLoc dl(V.getNode());
   SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
-  SDValue VHi = DAG.getAnyExtOrTrunc(
-      DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
-      dl, MVT::i64);
+  SDValue VHi =
+      DAG.getAnyExtOrTrunc(DAG.getNode(ISD::SRL, dl, MVT::i128, V,
+                                       DAG.getConstant(64, dl, MVT::i64)),
+                           dl, MVT::i64);
   if (DAG.getDataLayout().isBigEndian())
-    std::swap (VLo, VHi);
+    std::swap(VLo, VHi);
   SDValue RegClass =
       DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
   SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
   SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
-  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
+  const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1};
   return SDValue(
       DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
 }
@@ -20341,8 +20408,8 @@
     SDValue Ops[] = {
         createGPRPairNode(DAG, N->getOperand(2)), // Compare value
         createGPRPairNode(DAG, N->getOperand(3)), // Store value
-        N->getOperand(1), // Ptr
-        N->getOperand(0), // Chain in
+        N->getOperand(1),                         // Ptr
+        N->getOperand(0),                         // Chain in
     };
 
     unsigned Opcode;
@@ -20494,8 +20561,8 @@
 
     if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
         LoadNode->getMemoryVT() != MVT::i128) {
-      // Non-volatile or atomic loads are optimized later in AArch64's load/store
-      // optimizer.
+      // Non-volatile or atomic loads are optimized later in AArch64's
+      // load/store optimizer.
       return;
     }
 
@@ -20534,30 +20601,30 @@
     case Intrinsic::aarch64_sve_clasta_n: {
       SDLoc DL(N);
       auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
-      auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
-                           N->getOperand(1), Op2, N->getOperand(3));
+      auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32, N->getOperand(1),
+                           Op2, N->getOperand(3));
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
       return;
     }
     case Intrinsic::aarch64_sve_clastb_n: {
       SDLoc DL(N);
       auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
-      auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
-                           N->getOperand(1), Op2, N->getOperand(3));
+      auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32, N->getOperand(1),
+                           Op2, N->getOperand(3));
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
       return;
     }
     case Intrinsic::aarch64_sve_lasta: {
       SDLoc DL(N);
-      auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
-                           N->getOperand(1), N->getOperand(2));
+      auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32, N->getOperand(1),
+                           N->getOperand(2));
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
       return;
     }
     case Intrinsic::aarch64_sve_lastb: {
       SDLoc DL(N);
-      auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
-                           N->getOperand(1), N->getOperand(2));
+      auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32, N->getOperand(1),
+                           N->getOperand(2));
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
       return;
     }
@@ -20650,7 +20717,8 @@
     return AtomicExpansionKind::CmpXChg;
 
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  if (Size > 128) return AtomicExpansionKind::None;
+  if (Size > 128)
+    return AtomicExpansionKind::None;
 
   // Nand is not supported in LSE.
   // Leave 128 bits to LLSC or CmpXChg.
@@ -20732,7 +20800,7 @@
         Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
   }
 
-  Type *Tys[] = { Addr->getType() };
+  Type *Tys[] = {Addr->getType()};
   Intrinsic::ID Int =
       IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
   Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
@@ -20776,11 +20844,12 @@
 
   Intrinsic::ID Int =
       IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
-  Type *Tys[] = { Addr->getType() };
+  Type *Tys[] = {Addr->getType()};
   Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
 
   const DataLayout &DL = M->getDataLayout();
-  IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
+  IntegerType *IntValTy =
+      Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
   Val = Builder.CreateBitCast(Val, IntValTy);
 
   CallInst *CI = Builder.CreateCall(
@@ -20893,7 +20962,7 @@
   // may be beneficial to sink in other cases, but we would have to check that
   // the cmp would not get folded into the br to form a cbz for these to be
   // beneficial.
-  ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+  ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
   if (!Mask)
     return false;
   return Mask->getValue().isPowerOf2();
@@ -20952,9 +21021,9 @@
     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
     // nounwind. If we want to generalize this later, we may need to emit
     // CFI pseudo-instructions.
-    assert(Entry->getParent()->getFunction().hasFnAttribute(
-               Attribute::NoUnwind) &&
-           "Function should be nounwind in insertCopiesSplitCSR!");
+    assert(
+        Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+        "Function should be nounwind in insertCopiesSplitCSR!");
     Entry->addLiveIn(*I);
     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
         .addReg(*I);
@@ -21029,9 +21098,7 @@
 }
 
 // Unlike X86, we let frame lowering assign offsets to all catch objects.
-bool AArch64TargetLowering::needsFixedCatchObjects() const {
-  return false;
-}
+bool AArch64TargetLowering::needsFixedCatchObjects() const { return false; }
 
 bool AArch64TargetLowering::shouldLocalize(
     const MachineInstr &MI, const TargetTransformInfo *TTI) const {
@@ -21372,7 +21439,8 @@
     SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);
 
     SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
-    SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
+    SDValue Res =
+        DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
     if (Negated)
       Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
 
@@ -21416,16 +21484,16 @@
   Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
   Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
   Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
-  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
-                                 Op0Lo, Op1Lo);
-  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
-                                 Op0Hi, Op1Hi);
+  SDValue ResultLo =
+      DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, Op0Lo, Op1Lo);
+  SDValue ResultHi =
+      DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, Op0Hi, Op1Hi);
 
   // Convert again to scalable vectors to truncate.
   ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
   ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
-  SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
-                                       ResultLo, ResultHi);
+  SDValue ScalableResult =
+      DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT, ResultLo, ResultHi);
 
   return convertFromScalableVector(DAG, VT, ScalableResult);
 }
@@ -21576,9 +21644,9 @@
 
   SmallVector<SDValue, 4> Operands = {Pg};
   for (const SDValue &V : Op->op_values()) {
-    assert((!V.getValueType().isVector() ||
-            V.getValueType().isScalableVector()) &&
-           "Only scalable vectors are supported!");
+    assert(
+        (!V.getValueType().isVector() || V.getValueType().isScalableVector()) &&
+        "Only scalable vectors are supported!");
     Operands.push_back(V);
   }
 
@@ -21619,8 +21687,9 @@
   return convertFromScalableVector(DAG, VT, ScalableRes);
 }
 
-SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
-    SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
+                                               SelectionDAG &DAG) const {
   SDLoc DL(ScalarOp);
   SDValue AccOp = ScalarOp.getOperand(0);
   SDValue VecOp = ScalarOp.getOperand(1);
@@ -21641,14 +21710,15 @@
                       DAG.getUNDEF(ContainerVT), AccOp, Zero);
 
   // Perform reduction.
-  SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
-                            Pg, AccOp, VecOp);
+  SDValue Rdx =
+      DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT, Pg, AccOp, VecOp);
 
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
 }
 
-SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
-                                                       SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
+                                               SelectionDAG &DAG) const {
   SDLoc DL(ReduceOp);
   SDValue Op = ReduceOp.getOperand(0);
   EVT OpVT = Op.getValueType();
@@ -21705,16 +21775,16 @@
   }
 
   // UADDV always returns an i64 result.
-  EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
-                                                   SrcVT.getVectorElementType();
+  EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64
+                                                 : SrcVT.getVectorElementType();
   EVT RdxVT = SrcVT;
   if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
     RdxVT = getPackedSVEVectorVT(ResVT);
 
   SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
   SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
-  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
-                            Rdx, DAG.getConstant(0, DL, MVT::i64));
+  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx,
+                            DAG.getConstant(0, DL, MVT::i64));
 
   // The VEC_REDUCE nodes expect an element size result.
   if (ResVT != ScalarOp.getValueType())
@@ -21723,9 +21793,8 @@
   return Res;
 }
 
-SDValue
-AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
-    SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
+    SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
 
@@ -21742,8 +21811,7 @@
   Mask = DAG.getNode(ISD::TRUNCATE, DL,
                      MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
 
-  auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
-                                Mask, Op1, Op2);
+  auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT, Mask, Op1, Op2);
 
   return convertFromScalableVector(DAG, VT, ScalableRes);
 }
@@ -21832,16 +21900,16 @@
   SDValue Pg = getPredicateForVector(DAG, DL, VT);
   EVT SrcVT = Val.getValueType();
   EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
-  EVT ExtendVT = ContainerVT.changeVectorElementType(
-      SrcVT.getVectorElementType());
+  EVT ExtendVT =
+      ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
 
   Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
   Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
 
   Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
   Val = getSVESafeBitCast(ExtendVT, Val, DAG);
-  Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
-                    Pg, Val, DAG.getUNDEF(ContainerVT));
+  Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, Pg,
+                    Val, DAG.getUNDEF(ContainerVT));
 
   return convertFromScalableVector(DAG, VT, Val);
 }
@@ -21856,8 +21924,8 @@
   SDValue Val = Op.getOperand(0);
   EVT SrcVT = Val.getValueType();
   EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
-  EVT RoundVT = ContainerSrcVT.changeVectorElementType(
-      VT.getVectorElementType());
+  EVT RoundVT =
+      ContainerSrcVT.changeVectorElementType(VT.getVectorElementType());
   SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
 
   Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
@@ -21934,7 +22002,7 @@
   if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
       ContainerDstVT.getVectorElementType().getSizeInBits()) {
     EVT CvtVT = ContainerDstVT.changeVectorElementType(
-      ContainerSrcVT.getVectorElementType());
+        ContainerSrcVT.getVectorElementType());
     SDValue Pg = getPredicateForVector(DAG, DL, VT);
 
     Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
@@ -22088,8 +22156,8 @@
   SDLoc DL(Op);
   EVT InVT = Op.getValueType();
 
-  assert(VT.isScalableVector() && isTypeLegal(VT) &&
-         InVT.isScalableVector() && isTypeLegal(InVT) &&
+  assert(VT.isScalableVector() && isTypeLegal(VT) && InVT.isScalableVector() &&
+         isTypeLegal(InVT) &&
          "Only expect to cast between legal scalable vector types!");
   assert(VT.getVectorElementType() != MVT::i1 &&
          InVT.getVectorElementType() != MVT::i1 &&
diff --git a/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll b/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -opaque-pointers < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @f_undef(<8 x i64> %a, ptr %dst) {
+; CHECK-LABEL: f_undef:
+; CHECK:       // %bb.0: // %BB
+; CHECK-NEXT:    ret
+BB:
+  %S = shufflevector <8 x i64> %a, <8 x i64> %a, <16 x i32> undef
+  store <16 x i64> %S, ptr %dst, align 64
+  ret void
+}
+
+define void @f_poison(<8 x i64> %a, ptr %dst) {
+; CHECK-LABEL: f_poison:
+; CHECK:       // %bb.0: // %BB
+; CHECK-NEXT:    ret
+BB:
+  %S = shufflevector <8 x i64> %a, <8 x i64> %a, <16 x i32> poison
+  store <16 x i64> %S, ptr %dst, align 64
+  ret void
+}