Index: include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- include/llvm/CodeGen/BasicTTIImpl.h
+++ include/llvm/CodeGen/BasicTTIImpl.h
@@ -196,11 +196,12 @@
 public:
   /// \name Scalar TTI Implementations
   /// @{
-  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
-                                      unsigned BitWidth, unsigned AddressSpace,
-                                      unsigned Alignment, bool *Fast) const {
+  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
+                                      unsigned AddressSpace, unsigned Alignment,
+                                      bool *Fast) const {
     EVT E = EVT::getIntegerVT(Context, BitWidth);
-    return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
+    return getTLI()->allowsMisalignedMemoryAccesses(
+        E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
   }
 
   bool hasBranchDivergence() { return false; }
Index: include/llvm/CodeGen/TargetLowering.h
===================================================================
--- include/llvm/CodeGen/TargetLowering.h
+++ include/llvm/CodeGen/TargetLowering.h
@@ -1415,10 +1415,10 @@
   /// copy/move/set is converted to a sequence of store operations. Its use
   /// helps to ensure that such replacements don't generate code that causes an
   /// alignment error (trap) on the target machine.
-  virtual bool allowsMisalignedMemoryAccesses(EVT,
-                                              unsigned AddrSpace = 0,
-                                              unsigned Align = 1,
-                                              bool * /*Fast*/ = nullptr) const {
+  virtual bool allowsMisalignedMemoryAccesses(
+      EVT, unsigned AddrSpace = 0, unsigned Align = 1,
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool * /*Fast*/ = nullptr) const {
     return false;
   }
 
@@ -1426,8 +1426,18 @@
   /// given address space and alignment. If the access is allowed, the optional
   /// final parameter returns if the access is also fast (as defined by the
   /// target).
+  bool
+  allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+                     unsigned AddrSpace = 0, unsigned Alignment = 1,
+                     MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+                     bool *Fast = nullptr) const;
+
+  /// Return true if the target supports a memory access of this type for the
+  /// given MachineMemOperand. If the access is allowed, the optional
+  /// final parameter returns if the access is also fast (as defined by the
+  /// target).
   bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
-                          unsigned AddrSpace = 0, unsigned Alignment = 1,
+                          const MachineMemOperand &MMO,
                           bool *Fast = nullptr) const;
 
   /// Returns the target specific optimal type for load and store operations as
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4607,7 +4607,8 @@
   // Ensure that this isn't going to produce an unsupported unaligned access.
   if (ShAmt &&
       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
-                              LDST->getAddressSpace(), ShAmt / 8))
+                              LDST->getAddressSpace(), ShAmt / 8,
+                              LDST->getMemOperand()->getFlags()))
     return false;
 
   // It's not possible to generate a constant of extended or untyped type.
@@ -6408,9 +6409,9 @@
 
   // Check that a store of the wide type is both allowed and fast on the target
   bool Fast = false;
-  bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
-                                        VT, FirstStore->getAddressSpace(),
-                                        FirstStore->getAlignment(), &Fast);
+  bool Allowed =
+      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+                             *FirstStore->getMemOperand(), &Fast);
   if (!Allowed || !Fast)
     return SDValue();
 
@@ -6573,8 +6574,7 @@
   // Check that a load of the wide type is both allowed and fast on the target
   bool Fast = false;
   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
-                                        VT, FirstLoad->getAddressSpace(),
-                                        FirstLoad->getAlignment(), &Fast);
+                                        VT, *FirstLoad->getMemOperand(), &Fast);
   if (!Allowed || !Fast)
     return SDValue();
 
@@ -10797,15 +10797,14 @@
        TLI.isOperationLegal(ISD::LOAD, VT)) &&
       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    unsigned OrigAlign = LN0->getAlignment();
 
     bool Fast = false;
     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
-                               LN0->getAddressSpace(), OrigAlign, &Fast) &&
+                               *LN0->getMemOperand(), &Fast) &&
         Fast) {
       SDValue Load =
           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
-                      LN0->getPointerInfo(), OrigAlign,
+                      LN0->getPointerInfo(), LN0->getAlignment(),
                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
       return Load;
@@ -15408,8 +15407,8 @@
 
           if (TLI.isTypeLegal(StoreTy) &&
               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                     FirstStoreAlign, &IsFast) &&
+              TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                     *FirstInChain->getMemOperand(), &IsFast) &&
               IsFast) {
             LastIntegerTrunc = false;
             LastLegalType = i + 1;
@@ -15420,8 +15419,9 @@
                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
-                TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                       FirstStoreAlign, &IsFast) &&
+                TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                       *FirstInChain->getMemOperand(),
+                                       &IsFast) &&
                 IsFast) {
               LastIntegerTrunc = true;
               LastLegalType = i + 1;
@@ -15439,8 +15439,8 @@
             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
-                TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
-                                       FirstStoreAlign, &IsFast) &&
+                TLI.allowsMemoryAccess(
+                    Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
                 IsFast)
               LastLegalVectorType = i + 1;
           }
@@ -15511,8 +15511,8 @@
 
           if (TLI.isTypeLegal(Ty) &&
               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
-              TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
-                                     FirstStoreAlign, &IsFast) &&
+              TLI.allowsMemoryAccess(Context, DL, Ty,
+                                     *FirstInChain->getMemOperand(), &IsFast) &&
               IsFast)
             NumStoresToMerge = i + 1;
         }
@@ -15603,7 +15603,6 @@
       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
       unsigned FirstStoreAlign = FirstInChain->getAlignment();
       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
-      unsigned FirstLoadAS = FirstLoad->getAddressSpace();
       unsigned FirstLoadAlign = FirstLoad->getAlignment();
 
       // Scan the memory operations on the chain and find the first
@@ -15643,11 +15642,11 @@
         bool IsFastSt, IsFastLd;
         if (TLI.isTypeLegal(StoreTy) &&
             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                   FirstStoreAlign, &IsFastSt) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstInChain->getMemOperand(), &IsFastSt) &&
             IsFastSt &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
-                                   FirstLoadAlign, &IsFastLd) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstLoad->getMemOperand(), &IsFastLd) &&
             IsFastLd) {
           LastLegalVectorType = i + 1;
         }
@@ -15657,11 +15656,11 @@
         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
         if (TLI.isTypeLegal(StoreTy) &&
             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                   FirstStoreAlign, &IsFastSt) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstInChain->getMemOperand(), &IsFastSt) &&
             IsFastSt &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
-                                   FirstLoadAlign, &IsFastLd) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstLoad->getMemOperand(), &IsFastLd) &&
             IsFastLd) {
           LastLegalIntegerType = i + 1;
           DoIntegerTruncate = false;
@@ -15676,11 +15675,12 @@
               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
                                  StoreTy) &&
               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                     FirstStoreAlign, &IsFastSt) &&
+              TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                     *FirstInChain->getMemOperand(),
+                                     &IsFastSt) &&
               IsFastSt &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
-                                     FirstLoadAlign, &IsFastLd) &&
+              TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                     *FirstLoad->getMemOperand(), &IsFastLd) &&
               IsFastLd) {
             LastLegalIntegerType = i + 1;
             DoIntegerTruncate = true;
@@ -15931,13 +15931,12 @@
     if (((!LegalOperations && !ST->isVolatile()) ||
          TLI.isOperationLegal(ISD::STORE, SVT)) &&
         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
-      unsigned OrigAlign = ST->getAlignment();
       bool Fast = false;
       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
-                                 ST->getAddressSpace(), OrigAlign, &Fast) &&
+                                 *ST->getMemOperand(), &Fast) &&
           Fast) {
         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
-                            ST->getPointerInfo(), OrigAlign,
+                            ST->getPointerInfo(), ST->getAlignment(),
                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
       }
     }
Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -492,10 +492,9 @@
       // If this is an unaligned store and the target doesn't support it,
       // expand it.
       EVT MemVT = ST->getMemoryVT();
-      unsigned AS = ST->getAddressSpace();
-      unsigned Align = ST->getAlignment();
       const DataLayout &DL = DAG.getDataLayout();
-      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+                                  *ST->getMemOperand())) {
         LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
         SDValue Result = TLI.expandUnalignedStore(ST, DAG);
         ReplaceNode(SDValue(ST, 0), Result);
@@ -607,11 +606,10 @@
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Legal: {
       EVT MemVT = ST->getMemoryVT();
-      unsigned AS = ST->getAddressSpace();
-      unsigned Align = ST->getAlignment();
       // If this is an unaligned store and the target doesn't support it,
       // expand it.
-      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+                                  *ST->getMemOperand())) {
         SDValue Result = TLI.expandUnalignedStore(ST, DAG);
         ReplaceNode(SDValue(ST, 0), Result);
       }
@@ -668,13 +666,12 @@
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Legal: {
       EVT MemVT = LD->getMemoryVT();
-      unsigned AS = LD->getAddressSpace();
-      unsigned Align = LD->getAlignment();
       const DataLayout &DL = DAG.getDataLayout();
       // If this is an unaligned load and the target doesn't support it,
       // expand it.
-      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
-        std::tie(RVal, RChain) =  TLI.expandUnalignedLoad(LD, DAG);
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+                                  *LD->getMemOperand())) {
+        std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
       }
       break;
     }
@@ -860,10 +857,9 @@
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
         EVT MemVT = LD->getMemoryVT();
-        unsigned AS = LD->getAddressSpace();
-        unsigned Align = LD->getAlignment();
         const DataLayout &DL = DAG.getDataLayout();
-        if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+        if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+                                    *LD->getMemOperand())) {
           std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG);
         }
       }
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -238,7 +238,8 @@
       // issuing a (or a pair of) unaligned and overlapping load / store.
       bool Fast;
       if (NumMemOps && AllowOverlap && NewVTSize < Size &&
-          allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
+          allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
+                                         MachineMemOperand::MONone, &Fast) &&
           Fast)
         VTSize = Size;
       else {
Index: lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- lib/CodeGen/TargetLoweringBase.cpp
+++ lib/CodeGen/TargetLoweringBase.cpp
@@ -1464,6 +1464,7 @@
                                             const DataLayout &DL, EVT VT,
                                             unsigned AddrSpace,
                                             unsigned Alignment,
+                                            MachineMemOperand::Flags Flags,
                                             bool *Fast) const {
   // Check if the specified alignment is sufficient based on the data layout.
   // TODO: While using the data layout works in practice, a better solution
@@ -1479,7 +1480,15 @@
   }
 
   // This is a misaligned access.
-  return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
+  return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+                                            const DataLayout &DL, EVT VT,
+                                            const MachineMemOperand &MMO,
+                                            bool *Fast) const {
+  return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
+                            MMO.getAlignment(), MMO.getFlags(), Fast);
 }
 
 BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -262,9 +262,10 @@
 
   /// Returns true if the target allows unaligned memory accesses of the
   /// specified type.
-  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
-                                      unsigned Align = 1,
-                                      bool *Fast = nullptr) const override;
+  bool allowsMisalignedMemoryAccesses(
+      EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool *Fast = nullptr) const override;
 
   /// Provide custom lowering hooks for some operations.
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1074,10 +1074,9 @@
   return MVT::i64;
 }
 
-bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                           unsigned AddrSpace,
-                                                           unsigned Align,
-                                                           bool *Fast) const {
+bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *Fast) const {
   if (Subtarget->requiresStrictAlign())
     return false;
 
@@ -2843,7 +2842,8 @@
   unsigned AS = StoreNode->getAddressSpace();
   unsigned Align = StoreNode->getAlignment();
   if (Align < MemVT.getStoreSize() &&
-      !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
+      !allowsMisalignedMemoryAccesses(
+          MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
     return scalarizeVectorStore(StoreNode, DAG);
   }
 
@@ -8716,7 +8716,9 @@
     if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
       return true;
     bool Fast;
-    return allowsMisalignedMemoryAccesses(VT, 0, 1, &Fast) && Fast;
+    return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
+                                          &Fast) &&
+           Fast;
   };
 
   if (CanUseNEON && IsMemset && !IsSmallMemset &&
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2965,7 +2965,8 @@
     // Expand unaligned loads earlier than legalization. Due to visitation order
     // problems during legalization, the emitted instructions to pack and unpack
     // the bytes again are not eliminated in the case of an unaligned copy.
-    if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+    if (!allowsMisalignedMemoryAccesses(
+            VT, AS, Align, LN->getMemOperand()->getFlags(), &IsFast)) {
       if (VT.isVector())
         return scalarizeVectorLoad(LN, DAG);
 
@@ -3017,7 +3018,8 @@
     // order problems during legalization, the emitted instructions to pack and
     // unpack the bytes again are not eliminated in the case of an unaligned
     // copy.
-    if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+    if (!allowsMisalignedMemoryAccesses(
+            VT, AS, Align, SN->getMemOperand()->getFlags(), &IsFast)) {
       if (VT.isVector())
         return scalarizeVectorStore(SN, DAG);
 
Index: lib/Target/AMDGPU/R600ISelLowering.h
===================================================================
--- lib/Target/AMDGPU/R600ISelLowering.h
+++ lib/Target/AMDGPU/R600ISelLowering.h
@@ -49,9 +49,10 @@
   bool canMergeStoresTo(unsigned AS, EVT MemVT,
                         const SelectionDAG &DAG) const override;
 
-  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
-                                      unsigned Align,
-                                      bool *IsFast) const override;
+  bool allowsMisalignedMemoryAccesses(
+      EVT VT, unsigned AS, unsigned Align,
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool *IsFast = nullptr) const override;
 
 private:
   unsigned Gen;
Index: lib/Target/AMDGPU/R600ISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/R600ISelLowering.cpp
+++ lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1261,7 +1261,8 @@
 
   unsigned Align = StoreNode->getAlignment();
   if (Align < MemVT.getStoreSize() &&
-      !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
+      !allowsMisalignedMemoryAccesses(
+          MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
     return expandUnalignedStore(StoreNode, DAG);
   }
 
@@ -1663,10 +1664,9 @@
   return true;
 }
 
-bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                        unsigned AddrSpace,
-                                                        unsigned Align,
-                                                        bool *IsFast) const {
+bool R600TargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *IsFast) const {
   if (IsFast)
     *IsFast = false;
 
Index: lib/Target/AMDGPU/SIISelLowering.h
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.h
+++ lib/Target/AMDGPU/SIISelLowering.h
@@ -233,9 +233,10 @@
   bool canMergeStoresTo(unsigned AS, EVT MemVT,
                         const SelectionDAG &DAG) const override;
 
-  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
-                                      unsigned Align,
-                                      bool *IsFast) const override;
+  bool allowsMisalignedMemoryAccesses(
+      EVT VT, unsigned AS, unsigned Align,
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool *IsFast = nullptr) const override;
 
   EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
                           unsigned SrcAlign, bool IsMemset,
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1157,10 +1157,9 @@
   return true;
 }
 
-bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                      unsigned AddrSpace,
-                                                      unsigned Align,
-                                                      bool *IsFast) const {
+bool SITargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *IsFast) const {
   if (IsFast)
     *IsFast = false;
 
@@ -6756,14 +6755,15 @@
   assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
          "Custom lowering for non-i32 vectors hasn't been implemented.");
 
-  unsigned Alignment = Load->getAlignment();
-  unsigned AS = Load->getAddressSpace();
   if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
-                          AS, Alignment)) {
+                          *Load->getMemOperand())) {
     SDValue Ops[2];
     std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
     return DAG.getMergeValues(Ops, DL);
   }
+
+  unsigned Alignment = Load->getAlignment();
+  unsigned AS = Load->getAddressSpace();
   if (Subtarget->hasLDSMisalignedBug() &&
       AS == AMDGPUAS::FLAT_ADDRESS &&
       Alignment < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
@@ -7224,12 +7224,12 @@
   assert(VT.isVector() &&
          Store->getValue().getValueType().getScalarType() == MVT::i32);
 
-  unsigned AS = Store->getAddressSpace();
   if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
-                          AS, Store->getAlignment())) {
+                          *Store->getMemOperand())) {
     return expandUnalignedStore(Store, DAG);
   }
 
+  unsigned AS = Store->getAddressSpace();
   if (Subtarget->hasLDSMisalignedBug() &&
       AS == AMDGPUAS::FLAT_ADDRESS &&
       Store->getAlignment() < VT.getStoreSize() && VT.getSizeInBits() > 32) {
Index: lib/Target/ARM/ARMISelLowering.h
===================================================================
--- lib/Target/ARM/ARMISelLowering.h
+++ lib/Target/ARM/ARMISelLowering.h
@@ -321,6 +321,7 @@
     /// is "fast" by reference in the second argument.
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
                                         unsigned Align,
+                                        MachineMemOperand::Flags Flags,
                                         bool *Fast) const override;
 
     EVT getOptimalMemOpType(uint64_t Size,
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -13043,9 +13043,9 @@
   return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
 }
 
-bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                       unsigned,
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
                                                        unsigned,
+                                                       MachineMemOperand::Flags,
                                                        bool *Fast) const {
   // Depends what it gets converted into if the type is weird.
   if (!VT.isSimple())
@@ -13099,11 +13099,14 @@
     bool Fast;
     if (Size >= 16 &&
         (memOpAlign(SrcAlign, DstAlign, 16) ||
-         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
+         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
+                                         MachineMemOperand::MONone, &Fast) &&
+          Fast))) {
       return MVT::v2f64;
     } else if (Size >= 8 &&
                (memOpAlign(SrcAlign, DstAlign, 8) ||
-                (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+                (allowsMisalignedMemoryAccesses(
+                     MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
                  Fast))) {
       return MVT::f64;
     }
Index: lib/Target/Hexagon/HexagonISelLowering.h
===================================================================
--- lib/Target/Hexagon/HexagonISelLowering.h
+++ lib/Target/Hexagon/HexagonISelLowering.h
@@ -298,7 +298,7 @@
         const AttributeList &FuncAttributes) const override;
 
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
-        unsigned Align, bool *Fast) const override;
+        unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override;
 
     /// Returns relocation base for the given PIC jumptable.
     SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
Index: lib/Target/Hexagon/HexagonISelLowering.cpp
===================================================================
--- lib/Target/Hexagon/HexagonISelLowering.cpp
+++ lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3065,8 +3065,9 @@
   return MVT::Other;
 }
 
-bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-      unsigned AS, unsigned Align, bool *Fast) const {
+bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *Fast) const {
   if (Fast)
     *Fast = false;
   return Subtarget.isHVXVectorType(VT.getSimpleVT());
Index: lib/Target/Mips/Mips16ISelLowering.h
===================================================================
--- lib/Target/Mips/Mips16ISelLowering.h
+++ lib/Target/Mips/Mips16ISelLowering.h
@@ -23,6 +23,7 @@
 
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
                                         unsigned Align,
+                                        MachineMemOperand::Flags Flags,
                                         bool *Fast) const override;
 
     MachineBasicBlock *
Index: lib/Target/Mips/Mips16ISelLowering.cpp
===================================================================
--- lib/Target/Mips/Mips16ISelLowering.cpp
+++ lib/Target/Mips/Mips16ISelLowering.cpp
@@ -155,11 +155,8 @@
   return new Mips16TargetLowering(TM, STI);
 }
 
-bool
-Mips16TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                     unsigned,
-                                                     unsigned,
-                                                     bool *Fast) const {
+bool Mips16TargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
   return false;
 }
 
Index: lib/Target/Mips/MipsSEISelLowering.h
===================================================================
--- lib/Target/Mips/MipsSEISelLowering.h
+++ lib/Target/Mips/MipsSEISelLowering.h
@@ -40,9 +40,10 @@
     void addMSAFloatType(MVT::SimpleValueType Ty,
                          const TargetRegisterClass *RC);
 
-    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS = 0,
-                                        unsigned Align = 1,
-                                        bool *Fast = nullptr) const override;
+    bool allowsMisalignedMemoryAccesses(
+        EVT VT, unsigned AS = 0, unsigned Align = 1,
+        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+        bool *Fast = nullptr) const override;
 
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
Index: lib/Target/Mips/MipsSEISelLowering.cpp
===================================================================
--- lib/Target/Mips/MipsSEISelLowering.cpp
+++ lib/Target/Mips/MipsSEISelLowering.cpp
@@ -419,11 +419,8 @@
                      Op->getOperand(2));
 }
 
-bool
-MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                     unsigned,
-                                                     unsigned,
-                                                     bool *Fast) const {
+bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
   MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
 
   if (Subtarget.systemSupportsUnalignedAccess()) {
Index: lib/Target/NVPTX/NVPTXISelLowering.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2231,7 +2231,7 @@
     LoadSDNode *Load = cast<LoadSDNode>(Op);
     EVT MemVT = Load->getMemoryVT();
     if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
-                            Load->getAddressSpace(), Load->getAlignment())) {
+                            *Load->getMemOperand())) {
       SDValue Ops[2];
       std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
       return DAG.getMergeValues(Ops, SDLoc(Op));
Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h
+++ lib/Target/PowerPC/PPCISelLowering.h
@@ -846,10 +846,10 @@
 
     /// Is unaligned memory access allowed for the given type, and is it fast
     /// relative to software emulation.
-    bool allowsMisalignedMemoryAccesses(EVT VT,
-                                        unsigned AddrSpace,
-                                        unsigned Align = 1,
-                                        bool *Fast = nullptr) const override;
+    bool allowsMisalignedMemoryAccesses(
+        EVT VT, unsigned AddrSpace, unsigned Align = 1,
+        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+        bool *Fast = nullptr) const override;
 
     /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14566,6 +14566,7 @@
 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                        unsigned,
                                                        unsigned,
+                                                       MachineMemOperand::Flags,
                                                        bool *Fast) const {
   if (DisablePPCUnaligned)
     return false;
Index: lib/Target/SystemZ/SystemZISelLowering.h
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.h
+++ lib/Target/SystemZ/SystemZISelLowering.h
@@ -409,6 +409,7 @@
                              Instruction *I = nullptr) const override;
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
+                                      MachineMemOperand::Flags Flags,
                                       bool *Fast) const override;
   bool isTruncateFree(Type *, Type *) const override;
   bool isTruncateFree(EVT, EVT) const override;
Index: lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.cpp
+++ lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -761,10 +761,8 @@
   return isUInt<32>(Imm) || isUInt<32>(-Imm);
 }
 
-bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                           unsigned,
-                                                           unsigned,
-                                                           bool *Fast) const {
+bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
   // Unaligned accesses should never be slower than the expanded version.
   // We check specifically for aligned accesses in the few cases where
   // they are required.
Index: lib/Target/WebAssembly/WebAssemblyISelLowering.h
===================================================================
--- lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -60,6 +60,7 @@
                              unsigned AS,
                              Instruction *I = nullptr) const override;
   bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
+                                      MachineMemOperand::Flags Flags,
                                       bool *Fast) const override;
   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
 
Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
===================================================================
--- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -530,7 +530,8 @@
 }
 
 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
-    EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const {
+    EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
+    MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
   // WebAssembly supports unaligned accesses, though it should be declared
   // with the p2align attribute on loads and stores which do so, and there
   // may be a performance impact. We tell LLVM they're "fast" because
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -745,7 +745,8 @@
     /// Returns true if the target allows unaligned memory accesses of the
     /// specified type. Returns whether it is "fast" in the last argument.
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
-                                       bool *Fast) const override;
+                                        MachineMemOperand::Flags Flags,
+                                        bool *Fast) const override;
 
     /// Provide custom lowering hooks for some operations.
     ///
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -178,10 +178,10 @@
   // We don't accept any truncstore of integer registers.
   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
-  setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+  setTruncStoreAction(MVT::i64, MVT::i8, Expand);
   setTruncStoreAction(MVT::i32, MVT::i16, Expand);
-  setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
-  setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
+  setTruncStoreAction(MVT::i32, MVT::i8, Expand);
+  setTruncStoreAction(MVT::i16, MVT::i8, Expand);
 
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
@@ -195,96 +195,96 @@
 
   // Integer absolute.
   if (Subtarget.hasCMov()) {
-    setOperationAction(ISD::ABS            , MVT::i16  , Custom);
-    setOperationAction(ISD::ABS            , MVT::i32  , Custom); 
+    setOperationAction(ISD::ABS, MVT::i16, Custom);
+    setOperationAction(ISD::ABS, MVT::i32, Custom);
   }
-  setOperationAction(ISD::ABS              , MVT::i64  , Custom);
+  setOperationAction(ISD::ABS, MVT::i64, Custom);
 
   // Funnel shifts.
   for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
-    setOperationAction(ShiftOp             , MVT::i16  , Custom);
-    setOperationAction(ShiftOp             , MVT::i32  , Custom);
+    setOperationAction(ShiftOp, MVT::i16, Custom);
+    setOperationAction(ShiftOp, MVT::i32, Custom);
     if (Subtarget.is64Bit())
-      setOperationAction(ShiftOp           , MVT::i64  , Custom);
+      setOperationAction(ShiftOp, MVT::i64, Custom);
   }
 
   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
   // operation.
-  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
-  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
-  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 
   if (Subtarget.is64Bit()) {
     if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
       // f32/f64 are legal, f80 is custom.
-      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
     else
-      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
-    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
   } else if (!Subtarget.useSoftFloat()) {
     // We have an algorithm for SSE2->double, and we turn this into a
     // 64-bit FILD followed by conditional FADD for other targets.
-    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
     // We have an algorithm for SSE2, and we turn this into a 64-bit
     // FILD or VCVTUSI2SS/SD for other targets.
-    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
   } else {
-    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
   }
 
   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
   // this operation.
-  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
-  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 
   if (!Subtarget.useSoftFloat()) {
     // SSE has no i16 to fp conversion, only i32.
     if (X86ScalarSSEf32) {
-      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
+      setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
       // f32 and f64 cases are Legal, f80 case is not
-      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
     } else {
-      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
-      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
     }
   } else {
-    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
-    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Expand);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
   }
 
   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
   // this operation.
-  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
-  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 
   if (!Subtarget.useSoftFloat()) {
     // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
     // are Legal, f80 is custom lowered.
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
-    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
   } else {
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Expand);
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
   }
 
   // Handle FP_TO_UINT by promoting the destination to a larger signed
   // conversion.
-  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
-  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
-  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 
   if (Subtarget.is64Bit()) {
     if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
       // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
     } else {
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Expand);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
     }
   } else if (!Subtarget.useSoftFloat()) {
     // Since AVX is a superset of SSE3, only check for SSE here.
@@ -292,27 +292,27 @@
       // Expand FP_TO_UINT into a select.
       // FIXME: We would like to use a Custom expander here eventually to do
       // the optimal thing for SSE vs. the default expansion in the legalizer.
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
     else
       // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
       // With SSE3 we can use fisttpll to convert to a signed i64; without
       // SSE, we're stuck with a fistpll.
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 
-    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
   }
 
   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
   if (!X86ScalarSSEf64) {
-    setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
-    setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
+    setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+    setOperationAction(ISD::BITCAST, MVT::i32, Expand);
     if (Subtarget.is64Bit()) {
-      setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
+      setOperationAction(ISD::BITCAST, MVT::f64, Expand);
       // Without SSE, i64->f64 goes through memory.
-      setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
+      setOperationAction(ISD::BITCAST, MVT::i64, Expand);
     }
   } else if (!Subtarget.is64Bit())
-    setOperationAction(ISD::BITCAST      , MVT::i64  , Custom);
+    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
 
   // Scalar integer divide and remainder are lowered to use operations that
   // produce two results, to match the available instructions. This exposes
@@ -324,7 +324,7 @@
   // (low) operations are left as Legal, as there are single-result
   // instructions for this in x86. Using the two-result multiply instructions
   // when both high and low results are needed must be arranged by dagcombine.
-  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+  for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
     setOperationAction(ISD::MULHS, VT, Expand);
     setOperationAction(ISD::MULHU, VT, Expand);
     setOperationAction(ISD::SDIV, VT, Expand);
@@ -333,36 +333,36 @@
     setOperationAction(ISD::UREM, VT, Expand);
   }
 
-  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
-  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
-  for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
-                   MVT::i8,  MVT::i16, MVT::i32, MVT::i64 }) {
-    setOperationAction(ISD::BR_CC,     VT, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+  for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128, MVT::i8, MVT::i16,
+                  MVT::i32, MVT::i64}) {
+    setOperationAction(ISD::BR_CC, VT, Expand);
     setOperationAction(ISD::SELECT_CC, VT, Expand);
   }
   if (Subtarget.is64Bit())
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
-  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
-
-  setOperationAction(ISD::FREM             , MVT::f32  , Expand);
-  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
-  setOperationAction(ISD::FREM             , MVT::f80  , Expand);
-  setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  setOperationAction(ISD::FP_ROUND_INREG, MVT::f32, Expand);
+
+  setOperationAction(ISD::FREM, MVT::f32, Expand);
+  setOperationAction(ISD::FREM, MVT::f64, Expand);
+  setOperationAction(ISD::FREM, MVT::f80, Expand);
+  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 
   // Promote the i8 variants and force them on up to i32 which has a shorter
   // encoding.
-  setOperationPromotedToType(ISD::CTTZ           , MVT::i8   , MVT::i32);
-  setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
+  setOperationPromotedToType(ISD::CTTZ, MVT::i8, MVT::i32);
+  setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8, MVT::i32);
   if (!Subtarget.hasBMI()) {
-    setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
-    setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
-    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Legal);
-    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Legal);
+    setOperationAction(ISD::CTTZ, MVT::i16, Custom);
+    setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Legal);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Legal);
     if (Subtarget.is64Bit()) {
-      setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
+      setOperationAction(ISD::CTTZ, MVT::i64, Custom);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
     }
   }
@@ -370,17 +370,17 @@
   if (Subtarget.hasLZCNT()) {
     // When promoting the i8 variants, force them to i32 for a shorter
     // encoding.
-    setOperationPromotedToType(ISD::CTLZ           , MVT::i8   , MVT::i32);
-    setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
+    setOperationPromotedToType(ISD::CTLZ, MVT::i8, MVT::i32);
+    setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8, MVT::i32);
   } else {
-    setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
-    setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
-    setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
-    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
-    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
-    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
+    setOperationAction(ISD::CTLZ, MVT::i8, Custom);
+    setOperationAction(ISD::CTLZ, MVT::i16, Custom);
+    setOperationAction(ISD::CTLZ, MVT::i32, Custom);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Custom);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Custom);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
     if (Subtarget.is64Bit()) {
-      setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
+      setOperationAction(ISD::CTLZ, MVT::i64, Custom);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
     }
   }
@@ -409,39 +409,39 @@
   if (Subtarget.hasPOPCNT()) {
     setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
   } else {
-    setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
-    setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
-    setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
+    setOperationAction(ISD::CTPOP, MVT::i8, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i16, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
     if (Subtarget.is64Bit())
-      setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
+      setOperationAction(ISD::CTPOP, MVT::i64, Expand);
     else
-      setOperationAction(ISD::CTPOP        , MVT::i64  , Custom);
+      setOperationAction(ISD::CTPOP, MVT::i64, Custom);
   }
 
-  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
+  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
 
   if (!Subtarget.hasMOVBE())
-    setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
+    setOperationAction(ISD::BSWAP, MVT::i16, Expand);
 
   // These should be promoted to a larger select which is supported.
-  setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
+  setOperationAction(ISD::SELECT, MVT::i1, Promote);
   // X86 wants to expand cmov itself.
-  for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
+  for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
     setOperationAction(ISD::SELECT, VT, Custom);
     setOperationAction(ISD::SETCC, VT, Custom);
   }
-  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+  for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
     if (VT == MVT::i64 && !Subtarget.is64Bit())
       continue;
     setOperationAction(ISD::SELECT, VT, Custom);
-    setOperationAction(ISD::SETCC,  VT, Custom);
+    setOperationAction(ISD::SETCC, VT, Custom);
   }
 
   // Custom action for SELECT MMX and expand action for SELECT_CC MMX
   setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
 
-  setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
+  setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
   // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
   // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
@@ -451,19 +451,19 @@
     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
 
   // Darwin ABI issue.
-  for (auto VT : { MVT::i32, MVT::i64 }) {
+  for (auto VT : {MVT::i32, MVT::i64}) {
     if (VT == MVT::i64 && !Subtarget.is64Bit())
       continue;
-    setOperationAction(ISD::ConstantPool    , VT, Custom);
-    setOperationAction(ISD::JumpTable       , VT, Custom);
-    setOperationAction(ISD::GlobalAddress   , VT, Custom);
+    setOperationAction(ISD::ConstantPool, VT, Custom);
+    setOperationAction(ISD::JumpTable, VT, Custom);
+    setOperationAction(ISD::GlobalAddress, VT, Custom);
     setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
-    setOperationAction(ISD::ExternalSymbol  , VT, Custom);
-    setOperationAction(ISD::BlockAddress    , VT, Custom);
+    setOperationAction(ISD::ExternalSymbol, VT, Custom);
+    setOperationAction(ISD::BlockAddress, VT, Custom);
   }
 
   // 64-bit shl, sra, srl (iff 32-bit x86)
-  for (auto VT : { MVT::i32, MVT::i64 }) {
+  for (auto VT : {MVT::i32, MVT::i64}) {
     if (VT == MVT::i64 && !Subtarget.is64Bit())
       continue;
     setOperationAction(ISD::SHL_PARTS, VT, Custom);
@@ -472,12 +472,12 @@
   }
 
   if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
-    setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
+    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 
-  setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
+  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 
   // Expand certain atomics
-  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+  for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
     setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
@@ -511,14 +511,14 @@
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
 
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
-  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
-  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
   bool Is64Bit = Subtarget.is64Bit();
-  setOperationAction(ISD::VAARG,  MVT::Other, Is64Bit ? Custom : Expand);
+  setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
   setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
 
-  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
-  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
 
@@ -540,7 +540,7 @@
     // non-optsize case.
     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
 
-    for (auto VT : { MVT::f32, MVT::f64 }) {
+    for (auto VT : {MVT::f32, MVT::f64}) {
       // Use ANDPD to simulate FABS.
       setOperationAction(ISD::FABS, VT, Custom);
 
@@ -555,8 +555,8 @@
       setOperationAction(ISD::FSUB, VT, Custom);
 
       // We don't support sin/cos/fmod
-      setOperationAction(ISD::FSIN   , VT, Expand);
-      setOperationAction(ISD::FCOS   , VT, Expand);
+      setOperationAction(ISD::FSIN, VT, Expand);
+      setOperationAction(ISD::FCOS, VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
     }
 
@@ -572,10 +572,10 @@
       addRegisterClass(MVT::f64, &X86::RFP64RegClass);
 
     // Use ANDPS to simulate FABS.
-    setOperationAction(ISD::FABS , MVT::f32, Custom);
+    setOperationAction(ISD::FABS, MVT::f32, Custom);
 
     // Use XORP to simulate FNEG.
-    setOperationAction(ISD::FNEG , MVT::f32, Custom);
+    setOperationAction(ISD::FNEG, MVT::f32, Custom);
 
     if (UseX87)
       setOperationAction(ISD::UNDEF, MVT::f64, Expand);
@@ -586,8 +586,8 @@
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 
     // We don't support sin/cos/fmod
-    setOperationAction(ISD::FSIN   , MVT::f32, Expand);
-    setOperationAction(ISD::FCOS   , MVT::f32, Expand);
+    setOperationAction(ISD::FSIN, MVT::f32, Expand);
+    setOperationAction(ISD::FCOS, MVT::f32, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 
     if (UseX87) {
@@ -602,13 +602,13 @@
     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
     addRegisterClass(MVT::f32, &X86::RFP32RegClass);
 
-    for (auto VT : { MVT::f32, MVT::f64 }) {
-      setOperationAction(ISD::UNDEF,     VT, Expand);
+    for (auto VT : {MVT::f32, MVT::f64}) {
+      setOperationAction(ISD::UNDEF, VT, Expand);
       setOperationAction(ISD::FCOPYSIGN, VT, Expand);
 
       // Always expand sin/cos functions even though x87 has an instruction.
-      setOperationAction(ISD::FSIN   , VT, Expand);
-      setOperationAction(ISD::FCOS   , VT, Expand);
+      setOperationAction(ISD::FSIN, VT, Expand);
+      setOperationAction(ISD::FCOS, VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
     }
   }
@@ -620,7 +620,7 @@
       addLegalFPImmediate(APFloat(+1.0f)); // FLD1
       addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
       addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
-    } else // SSE immediates.
+    } else                                 // SSE immediates.
       addLegalFPImmediate(APFloat(+0.0f)); // xorps
   }
   // Expand FP64 immediates into loads from the stack, save special cases.
@@ -630,7 +630,7 @@
       addLegalFPImmediate(APFloat(+1.0)); // FLD1
       addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
       addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
-    } else // SSE immediates.
+    } else                                // SSE immediates.
       addLegalFPImmediate(APFloat(+0.0)); // xorpd
   }
 
@@ -644,38 +644,38 @@
       addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
                                                      : &X86::VR128RegClass);
       ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
-      setOperationAction(ISD::FABS , MVT::f128, Custom);
-      setOperationAction(ISD::FNEG , MVT::f128, Custom);
+      setOperationAction(ISD::FABS, MVT::f128, Custom);
+      setOperationAction(ISD::FNEG, MVT::f128, Custom);
       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
     }
 
     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
-    setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
+    setOperationAction(ISD::UNDEF, MVT::f80, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
     {
       APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
-      addLegalFPImmediate(TmpFlt);  // FLD0
+      addLegalFPImmediate(TmpFlt); // FLD0
       TmpFlt.changeSign();
-      addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
+      addLegalFPImmediate(TmpFlt); // FLD0/FCHS
 
       bool ignored;
       APFloat TmpFlt2(+1.0);
-      TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
-                      &ignored);
-      addLegalFPImmediate(TmpFlt2);  // FLD1
+      TmpFlt2.convert(APFloat::x87DoubleExtended(),
+                      APFloat::rmNearestTiesToEven, &ignored);
+      addLegalFPImmediate(TmpFlt2); // FLD1
       TmpFlt2.changeSign();
-      addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
+      addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
     }
 
     // Always expand sin/cos functions even though x87 has an instruction.
-    setOperationAction(ISD::FSIN   , MVT::f80, Expand);
-    setOperationAction(ISD::FCOS   , MVT::f80, Expand);
+    setOperationAction(ISD::FSIN, MVT::f80, Expand);
+    setOperationAction(ISD::FCOS, MVT::f80, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
 
     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
-    setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
+    setOperationAction(ISD::FCEIL, MVT::f80, Expand);
     setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
-    setOperationAction(ISD::FRINT,  MVT::f80, Expand);
+    setOperationAction(ISD::FRINT, MVT::f80, Expand);
     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
     setOperationAction(ISD::FMA, MVT::f80, Expand);
     setOperationAction(ISD::LROUND, MVT::f80, Expand);
@@ -685,9 +685,9 @@
   }
 
   // Always use a library call for pow.
-  setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
-  setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
-  setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
+  setOperationAction(ISD::FPOW, MVT::f32, Expand);
+  setOperationAction(ISD::FPOW, MVT::f64, Expand);
+  setOperationAction(ISD::FPOW, MVT::f80, Expand);
 
   setOperationAction(ISD::FLOG, MVT::f80, Expand);
   setOperationAction(ISD::FLOG2, MVT::f80, Expand);
@@ -698,19 +698,19 @@
   setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
 
   // Some FP actions are always expanded for vector types.
-  for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
-                   MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
-    setOperationAction(ISD::FSIN,      VT, Expand);
-    setOperationAction(ISD::FSINCOS,   VT, Expand);
-    setOperationAction(ISD::FCOS,      VT, Expand);
-    setOperationAction(ISD::FREM,      VT, Expand);
+  for (auto VT : {MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v2f64, MVT::v4f64,
+                  MVT::v8f64}) {
+    setOperationAction(ISD::FSIN, VT, Expand);
+    setOperationAction(ISD::FSINCOS, VT, Expand);
+    setOperationAction(ISD::FCOS, VT, Expand);
+    setOperationAction(ISD::FREM, VT, Expand);
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
-    setOperationAction(ISD::FPOW,      VT, Expand);
-    setOperationAction(ISD::FLOG,      VT, Expand);
-    setOperationAction(ISD::FLOG2,     VT, Expand);
-    setOperationAction(ISD::FLOG10,    VT, Expand);
-    setOperationAction(ISD::FEXP,      VT, Expand);
-    setOperationAction(ISD::FEXP2,     VT, Expand);
+    setOperationAction(ISD::FPOW, VT, Expand);
+    setOperationAction(ISD::FLOG, VT, Expand);
+    setOperationAction(ISD::FLOG2, VT, Expand);
+    setOperationAction(ISD::FLOG10, VT, Expand);
+    setOperationAction(ISD::FEXP, VT, Expand);
+    setOperationAction(ISD::FEXP2, VT, Expand);
   }
 
   // First set operation action for all vector types to either promote
@@ -721,11 +721,11 @@
     setOperationAction(ISD::UDIV, VT, Expand);
     setOperationAction(ISD::SREM, VT, Expand);
     setOperationAction(ISD::UREM, VT, Expand);
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
-    setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
-    setOperationAction(ISD::FMA,  VT, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Expand);
+    setOperationAction(ISD::INSERT_SUBVECTOR, VT, Expand);
+    setOperationAction(ISD::FMA, VT, Expand);
     setOperationAction(ISD::FFLOOR, VT, Expand);
     setOperationAction(ISD::FCEIL, VT, Expand);
     setOperationAction(ISD::FTRUNC, VT, Expand);
@@ -748,7 +748,7 @@
     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
-    setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
     setOperationAction(ISD::TRUNCATE, VT, Expand);
     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
@@ -785,15 +785,15 @@
     addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
                                                     : &X86::VR128RegClass);
 
-    setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
-    setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
-    setOperationAction(ISD::FCOPYSIGN,          MVT::v4f32, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
-    setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
+    setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+    setOperationAction(ISD::FABS, MVT::v4f32, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+    setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
+    setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -811,59 +811,59 @@
     addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
                                                     : &X86::VR128RegClass);
 
-    for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
-                     MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
+    for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16,
+                    MVT::v2i32}) {
       setOperationAction(ISD::SDIV, VT, Custom);
       setOperationAction(ISD::SREM, VT, Custom);
       setOperationAction(ISD::UDIV, VT, Custom);
       setOperationAction(ISD::UREM, VT, Custom);
     }
 
-    setOperationAction(ISD::MUL,                MVT::v2i8,  Custom);
-    setOperationAction(ISD::MUL,                MVT::v2i16, Custom);
-    setOperationAction(ISD::MUL,                MVT::v2i32, Custom);
-    setOperationAction(ISD::MUL,                MVT::v4i8,  Custom);
-    setOperationAction(ISD::MUL,                MVT::v4i16, Custom);
-    setOperationAction(ISD::MUL,                MVT::v8i8,  Custom);
-
-    setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
-    setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
-    setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
-    setOperationAction(ISD::MULHU,              MVT::v4i32, Custom);
-    setOperationAction(ISD::MULHS,              MVT::v4i32, Custom);
-    setOperationAction(ISD::MULHU,              MVT::v16i8, Custom);
-    setOperationAction(ISD::MULHS,              MVT::v16i8, Custom);
-    setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
-    setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
-    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
-    setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
-    setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
-    setOperationAction(ISD::FCOPYSIGN,          MVT::v2f64, Custom);
+    setOperationAction(ISD::MUL, MVT::v2i8, Custom);
+    setOperationAction(ISD::MUL, MVT::v2i16, Custom);
+    setOperationAction(ISD::MUL, MVT::v2i32, Custom);
+    setOperationAction(ISD::MUL, MVT::v4i8, Custom);
+    setOperationAction(ISD::MUL, MVT::v4i16, Custom);
+    setOperationAction(ISD::MUL, MVT::v8i8, Custom);
+
+    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+    setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
+    setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
+    setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
+    setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
+    setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
+    setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
+    setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+    setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+    setOperationAction(ISD::FABS, MVT::v2f64, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
 
-    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+    for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
       setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
       setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
       setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
       setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
     }
 
-    setOperationAction(ISD::UADDSAT,            MVT::v16i8, Legal);
-    setOperationAction(ISD::SADDSAT,            MVT::v16i8, Legal);
-    setOperationAction(ISD::USUBSAT,            MVT::v16i8, Legal);
-    setOperationAction(ISD::SSUBSAT,            MVT::v16i8, Legal);
-    setOperationAction(ISD::UADDSAT,            MVT::v8i16, Legal);
-    setOperationAction(ISD::SADDSAT,            MVT::v8i16, Legal);
-    setOperationAction(ISD::USUBSAT,            MVT::v8i16, Legal);
-    setOperationAction(ISD::SSUBSAT,            MVT::v8i16, Legal);
-    setOperationAction(ISD::UADDSAT,            MVT::v4i32, Custom);
-    setOperationAction(ISD::USUBSAT,            MVT::v4i32, Custom);
-    setOperationAction(ISD::UADDSAT,            MVT::v2i64, Custom);
-    setOperationAction(ISD::USUBSAT,            MVT::v2i64, Custom);
+    setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
+    setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
+    setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
+    setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
+    setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
+    setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
+    setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
+    setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
+    setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
+    setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
+    setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
+    setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
 
     if (!ExperimentalVectorWideningLegalization) {
       // Use widening instead of promotion.
-      for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
-                       MVT::v4i16, MVT::v2i16 }) {
+      for (auto VT :
+           {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16}) {
         setOperationAction(ISD::UADDSAT, VT, Custom);
         setOperationAction(ISD::SADDSAT, VT, Custom);
         setOperationAction(ISD::USUBSAT, VT, Custom);
@@ -871,21 +871,21 @@
       }
     }
 
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
 
     // Provide custom widening for v2f32 setcc. This is really for VLX when
     // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to
     // type legalization changing the result type to v4i1 during widening.
     // It works fine for SSE2 and is probably faster so no need to qualify with
     // VLX support.
-    setOperationAction(ISD::SETCC,               MVT::v2i32, Custom);
+    setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
 
-    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
-      setOperationAction(ISD::SETCC,              VT, Custom);
-      setOperationAction(ISD::CTPOP,              VT, Custom);
-      setOperationAction(ISD::ABS,                VT, Custom);
+    for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+      setOperationAction(ISD::SETCC, VT, Custom);
+      setOperationAction(ISD::CTPOP, VT, Custom);
+      setOperationAction(ISD::ABS, VT, Custom);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -893,11 +893,11 @@
       setCondCodeAction(ISD::SETLE, VT, Custom);
     }
 
-    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
-      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
-      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
-      setOperationAction(ISD::VSELECT,            VT, Custom);
+    for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+      setOperationAction(ISD::VSELECT, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     }
 
@@ -914,40 +914,40 @@
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
     }
 
-    for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
-      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
-      setOperationAction(ISD::VSELECT,            VT, Custom);
+    for (auto VT : {MVT::v2f64, MVT::v2i64}) {
+      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+      setOperationAction(ISD::VSELECT, VT, Custom);
 
       if (VT == MVT::v2i64 && !Subtarget.is64Bit())
         continue;
 
-      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     }
 
     // Custom lower v2i64 and v2f64 selects.
-    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v4i32, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v8i16, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v16i8, Custom);
-
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i16, Custom);
+    setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+    setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+    setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
+    setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
+    setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
+
+    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
 
     // Custom legalize these to avoid over promotion or custom promotion.
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i8,  Custom);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i8,  Custom);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i8,  Custom);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i16, Custom);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i16, Custom);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i8,  Custom);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i8,  Custom);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v8i8,  Custom);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i16, Custom);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
 
     // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
     // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
@@ -955,34 +955,34 @@
     // be emitted instead of an AssertZExt. This will allow packssdw followed by
     // packuswb to be used to truncate to v8i8. This is necessary since packusdw
     // isn't available until sse4.1.
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
 
-    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
-    setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
 
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v2i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
 
     // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v2f32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
 
-    setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
-    setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
+    setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+    setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
 
     // We want to legalize this to an f64 load rather than an i64 load on
     // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
     // store.
-    setOperationAction(ISD::LOAD,               MVT::v2f32, Custom);
-    setOperationAction(ISD::LOAD,               MVT::v2i32, Custom);
-    setOperationAction(ISD::LOAD,               MVT::v4i16, Custom);
-    setOperationAction(ISD::LOAD,               MVT::v8i8,  Custom);
-    setOperationAction(ISD::STORE,              MVT::v2f32, Custom);
-    setOperationAction(ISD::STORE,              MVT::v2i32, Custom);
-    setOperationAction(ISD::STORE,              MVT::v4i16, Custom);
-    setOperationAction(ISD::STORE,              MVT::v8i8,  Custom);
-
-    setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
-    setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
-    setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
+    setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+    setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+    setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+    setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
+    setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+    setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+    setOperationAction(ISD::STORE, MVT::v4i16, Custom);
+    setOperationAction(ISD::STORE, MVT::v8i8, Custom);
+
+    setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
+    setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
+    setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
     if (!Subtarget.hasAVX512())
       setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
 
@@ -993,77 +993,77 @@
     if (ExperimentalVectorWideningLegalization) {
       setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
 
-      setOperationAction(ISD::TRUNCATE,    MVT::v2i8,  Custom);
-      setOperationAction(ISD::TRUNCATE,    MVT::v2i16, Custom);
-      setOperationAction(ISD::TRUNCATE,    MVT::v2i32, Custom);
-      setOperationAction(ISD::TRUNCATE,    MVT::v4i8,  Custom);
-      setOperationAction(ISD::TRUNCATE,    MVT::v4i16, Custom);
-      setOperationAction(ISD::TRUNCATE,    MVT::v8i8,  Custom);
+      setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+      setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+      setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
+      setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+      setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+      setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
     } else {
       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
     }
 
     // In the customized shift lowering, the legal v4i32/v2i64 cases
     // in AVX2 will be recognized.
-    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
-      setOperationAction(ISD::SRL,              VT, Custom);
-      setOperationAction(ISD::SHL,              VT, Custom);
-      setOperationAction(ISD::SRA,              VT, Custom);
+    for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+      setOperationAction(ISD::SRL, VT, Custom);
+      setOperationAction(ISD::SHL, VT, Custom);
+      setOperationAction(ISD::SRA, VT, Custom);
     }
 
-    setOperationAction(ISD::ROTL,               MVT::v4i32, Custom);
-    setOperationAction(ISD::ROTL,               MVT::v8i16, Custom);
+    setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
+    setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
 
     // With AVX512, expanding (and promoting the shifts) is better.
     if (!Subtarget.hasAVX512())
-      setOperationAction(ISD::ROTL,             MVT::v16i8, Custom);
+      setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
-    setOperationAction(ISD::ABS,                MVT::v16i8, Legal);
-    setOperationAction(ISD::ABS,                MVT::v8i16, Legal);
-    setOperationAction(ISD::ABS,                MVT::v4i32, Legal);
-    setOperationAction(ISD::BITREVERSE,         MVT::v16i8, Custom);
-    setOperationAction(ISD::CTLZ,               MVT::v16i8, Custom);
-    setOperationAction(ISD::CTLZ,               MVT::v8i16, Custom);
-    setOperationAction(ISD::CTLZ,               MVT::v4i32, Custom);
-    setOperationAction(ISD::CTLZ,               MVT::v2i64, Custom);
+    setOperationAction(ISD::ABS, MVT::v16i8, Legal);
+    setOperationAction(ISD::ABS, MVT::v8i16, Legal);
+    setOperationAction(ISD::ABS, MVT::v4i32, Legal);
+    setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
+    setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
+    setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
+    setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
+    setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
 
     // These might be better off as horizontal vector ops.
-    setOperationAction(ISD::ADD,                MVT::i16, Custom);
-    setOperationAction(ISD::ADD,                MVT::i32, Custom);
-    setOperationAction(ISD::SUB,                MVT::i16, Custom);
-    setOperationAction(ISD::SUB,                MVT::i32, Custom);
+    setOperationAction(ISD::ADD, MVT::i16, Custom);
+    setOperationAction(ISD::ADD, MVT::i32, Custom);
+    setOperationAction(ISD::SUB, MVT::i16, Custom);
+    setOperationAction(ISD::SUB, MVT::i32, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
     for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
-      setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal);
-      setOperationAction(ISD::FCEIL,            RoundedTy,  Legal);
-      setOperationAction(ISD::FTRUNC,           RoundedTy,  Legal);
-      setOperationAction(ISD::FRINT,            RoundedTy,  Legal);
-      setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal);
+      setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
+      setOperationAction(ISD::FCEIL, RoundedTy, Legal);
+      setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
+      setOperationAction(ISD::FRINT, RoundedTy, Legal);
+      setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
     }
 
-    setOperationAction(ISD::SMAX,               MVT::v16i8, Legal);
-    setOperationAction(ISD::SMAX,               MVT::v4i32, Legal);
-    setOperationAction(ISD::UMAX,               MVT::v8i16, Legal);
-    setOperationAction(ISD::UMAX,               MVT::v4i32, Legal);
-    setOperationAction(ISD::SMIN,               MVT::v16i8, Legal);
-    setOperationAction(ISD::SMIN,               MVT::v4i32, Legal);
-    setOperationAction(ISD::UMIN,               MVT::v8i16, Legal);
-    setOperationAction(ISD::UMIN,               MVT::v4i32, Legal);
+    setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
+    setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
+    setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
+    setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
+    setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
+    setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
+    setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
+    setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
 
     // FIXME: Do we need to handle scalar-to-vector here?
-    setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
+    setOperationAction(ISD::MUL, MVT::v4i32, Legal);
 
     // We directly match byte blends in the backend as they match the VSELECT
     // condition form.
-    setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
+    setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
 
     // SSE41 brings specific instructions for doing vector sign extend even in
     // cases where we don't have SRA.
-    for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+    for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
     }
@@ -1079,12 +1079,12 @@
     }
 
     // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
-    for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
-      setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8,  Legal);
-      setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8,  Legal);
+    for (auto LoadExtOp : {ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
+      setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
+      setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
       if (!ExperimentalVectorWideningLegalization)
-        setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8,  Legal);
-      setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8,  Legal);
+        setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
+      setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
       setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
       setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
       setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
@@ -1092,105 +1092,105 @@
 
     // i8 vectors are custom because the source register and source
     // source memory operand types are not the same width.
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
-    for (auto VT : { MVT::v16i8, MVT::v8i16,  MVT::v4i32, MVT::v2i64,
-                     MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
+    for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v32i8,
+                    MVT::v16i16, MVT::v8i32, MVT::v4i64})
       setOperationAction(ISD::ROTL, VT, Custom);
 
     // XOP can efficiently perform BITREVERSE with VPPERM.
-    for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
+    for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64})
       setOperationAction(ISD::BITREVERSE, VT, Custom);
 
-    for (auto VT : { MVT::v16i8, MVT::v8i16,  MVT::v4i32, MVT::v2i64,
-                     MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
+    for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v32i8,
+                    MVT::v16i16, MVT::v8i32, MVT::v4i64})
       setOperationAction(ISD::BITREVERSE, VT, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
     bool HasInt256 = Subtarget.hasInt256();
 
-    addRegisterClass(MVT::v32i8,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
+    addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                    : &X86::VR256RegClass);
     addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
                                                      : &X86::VR256RegClass);
-    addRegisterClass(MVT::v8i32,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-    addRegisterClass(MVT::v8f32,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-    addRegisterClass(MVT::v4i64,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-    addRegisterClass(MVT::v4f64,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-
-    for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
-      setOperationAction(ISD::FFLOOR,     VT, Legal);
-      setOperationAction(ISD::FCEIL,      VT, Legal);
-      setOperationAction(ISD::FTRUNC,     VT, Legal);
-      setOperationAction(ISD::FRINT,      VT, Legal);
+    addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                    : &X86::VR256RegClass);
+    addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                    : &X86::VR256RegClass);
+    addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                    : &X86::VR256RegClass);
+    addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                    : &X86::VR256RegClass);
+
+    for (auto VT : {MVT::v8f32, MVT::v4f64}) {
+      setOperationAction(ISD::FFLOOR, VT, Legal);
+      setOperationAction(ISD::FCEIL, VT, Legal);
+      setOperationAction(ISD::FTRUNC, VT, Legal);
+      setOperationAction(ISD::FRINT, VT, Legal);
       setOperationAction(ISD::FNEARBYINT, VT, Legal);
-      setOperationAction(ISD::FNEG,       VT, Custom);
-      setOperationAction(ISD::FABS,       VT, Custom);
-      setOperationAction(ISD::FCOPYSIGN,  VT, Custom);
+      setOperationAction(ISD::FNEG, VT, Custom);
+      setOperationAction(ISD::FABS, VT, Custom);
+      setOperationAction(ISD::FCOPYSIGN, VT, Custom);
     }
 
     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
     // even though v8i16 is a legal type.
     setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
     setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
 
-    setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
-    setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+    setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
 
     if (!Subtarget.hasAVX512())
       setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
 
     // In the customized shift lowering, the legal v8i32/v4i64 cases
     // in AVX2 will be recognized.
-    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+    for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
       setOperationAction(ISD::SRL, VT, Custom);
       setOperationAction(ISD::SHL, VT, Custom);
       setOperationAction(ISD::SRA, VT, Custom);
     }
 
     // These types need custom splitting if their input is a 128-bit vector.
-    setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i64,  Custom);
-    setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i32, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i64,  Custom);
-    setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i32, Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
 
-    setOperationAction(ISD::ROTL,              MVT::v8i32,  Custom);
-    setOperationAction(ISD::ROTL,              MVT::v16i16, Custom);
+    setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
+    setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
 
     // With BWI, expanding (and promoting the shifts) is the better.
     if (!Subtarget.hasBWI())
-      setOperationAction(ISD::ROTL,            MVT::v32i8,  Custom);
+      setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
+
+    setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
+    setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+    setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
+    setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
+    setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
+    setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
+
+    for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+      setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
+      setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+      setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+    }
 
-    setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
-    setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
-    setOperationAction(ISD::SELECT,            MVT::v8i32, Custom);
-    setOperationAction(ISD::SELECT,            MVT::v16i16, Custom);
-    setOperationAction(ISD::SELECT,            MVT::v32i8, Custom);
-    setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
-
-    for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
-      setOperationAction(ISD::SIGN_EXTEND,     VT, Custom);
-      setOperationAction(ISD::ZERO_EXTEND,     VT, Custom);
-      setOperationAction(ISD::ANY_EXTEND,      VT, Custom);
-    }
-
-    setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
-    setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
-    setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
-    setOperationAction(ISD::BITREVERSE,        MVT::v32i8, Custom);
-
-    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
-      setOperationAction(ISD::SETCC,           VT, Custom);
-      setOperationAction(ISD::CTPOP,           VT, Custom);
-      setOperationAction(ISD::CTLZ,            VT, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
+    setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
+
+    for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+      setOperationAction(ISD::SETCC, VT, Custom);
+      setOperationAction(ISD::CTPOP, VT, Custom);
+      setOperationAction(ISD::CTLZ, VT, Custom);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1199,45 +1199,45 @@
     }
 
     if (Subtarget.hasAnyFMA()) {
-      for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
-                       MVT::v2f64, MVT::v4f64 })
+      for (auto VT :
+           {MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64})
         setOperationAction(ISD::FMA, VT, Legal);
     }
 
-    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+    for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
       setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
       setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
     }
 
-    setOperationAction(ISD::MUL,       MVT::v4i64,  Custom);
-    setOperationAction(ISD::MUL,       MVT::v8i32,  HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::MUL,       MVT::v16i16, HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::MUL,       MVT::v32i8,  Custom);
-
-    setOperationAction(ISD::MULHU,     MVT::v8i32,  Custom);
-    setOperationAction(ISD::MULHS,     MVT::v8i32,  Custom);
-    setOperationAction(ISD::MULHU,     MVT::v16i16, HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::MULHS,     MVT::v16i16, HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::MULHU,     MVT::v32i8,  Custom);
-    setOperationAction(ISD::MULHS,     MVT::v32i8,  Custom);
-
-    setOperationAction(ISD::ABS,       MVT::v4i64,  Custom);
-    setOperationAction(ISD::SMAX,      MVT::v4i64,  Custom);
-    setOperationAction(ISD::UMAX,      MVT::v4i64,  Custom);
-    setOperationAction(ISD::SMIN,      MVT::v4i64,  Custom);
-    setOperationAction(ISD::UMIN,      MVT::v4i64,  Custom);
-
-    setOperationAction(ISD::UADDSAT,   MVT::v32i8,  HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::SADDSAT,   MVT::v32i8,  HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::USUBSAT,   MVT::v32i8,  HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::SSUBSAT,   MVT::v32i8,  HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::UADDSAT,   MVT::v16i16, HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::SADDSAT,   MVT::v16i16, HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::USUBSAT,   MVT::v16i16, HasInt256 ? Legal : Custom);
-    setOperationAction(ISD::SSUBSAT,   MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+    setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::MUL, MVT::v32i8, Custom);
+
+    setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
+    setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
+    setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
+    setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
+
+    setOperationAction(ISD::ABS, MVT::v4i64, Custom);
+    setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
+    setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
+    setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
+    setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
+
+    setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
 
-    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
-      setOperationAction(ISD::ABS,  VT, HasInt256 ? Legal : Custom);
+    for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) {
+      setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
       setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
       setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
       setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
@@ -1255,54 +1255,54 @@
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
 
       // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
-      for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
+      for (auto LoadExtOp : {ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
         setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
-        setLoadExtAction(LoadExtOp, MVT::v8i32,  MVT::v8i8,  Legal);
-        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i8,  Legal);
-        setLoadExtAction(LoadExtOp, MVT::v8i32,  MVT::v8i16, Legal);
-        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i16, Legal);
-        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i32, Legal);
+        setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
+        setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
+        setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
+        setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
+        setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
       }
     }
 
-    for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
-                     MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
-      setOperationAction(ISD::MLOAD,  VT, Legal);
+    for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32,
+                    MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
+      setOperationAction(ISD::MLOAD, VT, Legal);
       setOperationAction(ISD::MSTORE, VT, Legal);
     }
 
     // Extract subvector is special because the value type
     // (result) is 128-bit but the source is 256-bit wide.
-    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
-                     MVT::v4f32, MVT::v2f64 }) {
+    for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
+                    MVT::v2f64}) {
       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
     }
 
     // Custom lower several nodes for 256-bit types.
-    for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
-                    MVT::v8f32, MVT::v4f64 }) {
-      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
-      setOperationAction(ISD::VSELECT,            VT, Custom);
-      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
+    for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32,
+                   MVT::v4f64}) {
+      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+      setOperationAction(ISD::VSELECT, VT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
-      setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Legal);
-      setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
-      setOperationAction(ISD::STORE,              VT, Custom);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+      setOperationAction(ISD::STORE, VT, Custom);
     }
 
     if (HasInt256)
-      setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
+      setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
 
     if (HasInt256) {
       // Custom legalize 2x32 to get a little better code.
       setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
       setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
 
-      for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
-                       MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
-        setOperationAction(ISD::MGATHER,  VT, Custom);
+      for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
+                      MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64})
+        setOperationAction(ISD::MGATHER, VT, Custom);
     }
   }
 
@@ -1310,22 +1310,22 @@
   // available with AVX512. 512-bit vectors are in a separate block controlled
   // by useAVX512Regs.
   if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
-    addRegisterClass(MVT::v1i1,   &X86::VK1RegClass);
-    addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
-    addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
-    addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
-    addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
+    addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
+    addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
+    addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
+    addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
+    addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
 
-    setOperationAction(ISD::SELECT,             MVT::v1i1, Custom);
+    setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v1i1, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
 
-    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1,  MVT::v8i32);
-    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1,  MVT::v8i32);
-    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1,  MVT::v4i32);
-    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1,  MVT::v4i32);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i1,  Custom);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i1,  Custom);
+    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
+    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
+    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
+    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
 
     // There is no byte sized k-register load or store without AVX512DQ.
     if (!Subtarget.hasDQI()) {
@@ -1341,34 +1341,34 @@
     }
 
     // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
-    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+    for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
-      setOperationAction(ISD::ANY_EXTEND,  VT, Custom);
+      setOperationAction(ISD::ANY_EXTEND, VT, Custom);
     }
 
-    for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
-      setOperationAction(ISD::ADD,              VT, Custom);
-      setOperationAction(ISD::SUB,              VT, Custom);
-      setOperationAction(ISD::MUL,              VT, Custom);
-      setOperationAction(ISD::SETCC,            VT, Custom);
-      setOperationAction(ISD::SELECT,           VT, Custom);
-      setOperationAction(ISD::TRUNCATE,         VT, Custom);
-      setOperationAction(ISD::UADDSAT,          VT, Custom);
-      setOperationAction(ISD::SADDSAT,          VT, Custom);
-      setOperationAction(ISD::USUBSAT,          VT, Custom);
-      setOperationAction(ISD::SSUBSAT,          VT, Custom);
+    for (auto VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1}) {
+      setOperationAction(ISD::ADD, VT, Custom);
+      setOperationAction(ISD::SUB, VT, Custom);
+      setOperationAction(ISD::MUL, VT, Custom);
+      setOperationAction(ISD::SETCC, VT, Custom);
+      setOperationAction(ISD::SELECT, VT, Custom);
+      setOperationAction(ISD::TRUNCATE, VT, Custom);
+      setOperationAction(ISD::UADDSAT, VT, Custom);
+      setOperationAction(ISD::SADDSAT, VT, Custom);
+      setOperationAction(ISD::USUBSAT, VT, Custom);
+      setOperationAction(ISD::SSUBSAT, VT, Custom);
 
-      setOperationAction(ISD::BUILD_VECTOR,     VT, Custom);
-      setOperationAction(ISD::CONCAT_VECTORS,   VT, Custom);
+      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,   VT,  Custom);
-      setOperationAction(ISD::VSELECT,          VT,  Expand);
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+      setOperationAction(ISD::VSELECT, VT, Expand);
     }
 
-    for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
+    for (auto VT : {MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1})
       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
   }
 
@@ -1378,74 +1378,74 @@
   if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
-    addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
-    addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
+    addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
+    addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
 
     for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
-      setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8,  Legal);
+      setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
       setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
-      setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i8,   Legal);
-      setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i16,  Legal);
-      setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i32,  Legal);
+      setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
+      setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
+      setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
     }
 
-    for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
-      setOperationAction(ISD::FNEG,  VT, Custom);
-      setOperationAction(ISD::FABS,  VT, Custom);
-      setOperationAction(ISD::FMA,   VT, Legal);
+    for (MVT VT : {MVT::v16f32, MVT::v8f64}) {
+      setOperationAction(ISD::FNEG, VT, Custom);
+      setOperationAction(ISD::FABS, VT, Custom);
+      setOperationAction(ISD::FMA, VT, Legal);
       setOperationAction(ISD::FCOPYSIGN, VT, Custom);
     }
 
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
     setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
     setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
     setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
     setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
     setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
     setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
-    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
 
-    setTruncStoreAction(MVT::v8i64,   MVT::v8i8,   Legal);
-    setTruncStoreAction(MVT::v8i64,   MVT::v8i16,  Legal);
-    setTruncStoreAction(MVT::v8i64,   MVT::v8i32,  Legal);
-    setTruncStoreAction(MVT::v16i32,  MVT::v16i8,  Legal);
-    setTruncStoreAction(MVT::v16i32,  MVT::v16i16, Legal);
+    setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
+    setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
+    setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
+    setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
+    setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
 
     if (!Subtarget.hasVLX()) {
       // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
       // to 512-bit rather than use the AVX2 instructions so that we can use
       // k-masks.
       for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
-           MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
-        setOperationAction(ISD::MLOAD,  VT, Custom);
+                      MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
+        setOperationAction(ISD::MLOAD, VT, Custom);
         setOperationAction(ISD::MSTORE, VT, Custom);
       }
     }
 
-    setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
-    setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
-    setOperationAction(ISD::ANY_EXTEND,         MVT::v16i32, Custom);
-    setOperationAction(ISD::ANY_EXTEND,         MVT::v8i64, Custom);
-    setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
-    setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
 
     if (ExperimentalVectorWideningLegalization) {
       // Need to custom widen this if we don't have AVX512BW.
-      setOperationAction(ISD::ANY_EXTEND,         MVT::v8i8, Custom);
-      setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i8, Custom);
-      setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i8, Custom);
+      setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
+      setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
+      setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
     }
 
-    for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
-      setOperationAction(ISD::FFLOOR,           VT, Legal);
-      setOperationAction(ISD::FCEIL,            VT, Legal);
-      setOperationAction(ISD::FTRUNC,           VT, Legal);
-      setOperationAction(ISD::FRINT,            VT, Legal);
-      setOperationAction(ISD::FNEARBYINT,       VT, Legal);
+    for (auto VT : {MVT::v16f32, MVT::v8f64}) {
+      setOperationAction(ISD::FFLOOR, VT, Legal);
+      setOperationAction(ISD::FCEIL, VT, Legal);
+      setOperationAction(ISD::FTRUNC, VT, Legal);
+      setOperationAction(ISD::FRINT, VT, Legal);
+      setOperationAction(ISD::FNEARBYINT, VT, Legal);
     }
 
     // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
@@ -1454,37 +1454,37 @@
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
     }
 
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
-
-    setOperationAction(ISD::MUL,                MVT::v8i64, Custom);
-    setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
-
-    setOperationAction(ISD::MULHU,              MVT::v16i32,  Custom);
-    setOperationAction(ISD::MULHS,              MVT::v16i32,  Custom);
-
-    setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v16i32, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v32i16, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v64i8, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
-
-    for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
-      setOperationAction(ISD::SMAX,             VT, Legal);
-      setOperationAction(ISD::UMAX,             VT, Legal);
-      setOperationAction(ISD::SMIN,             VT, Legal);
-      setOperationAction(ISD::UMIN,             VT, Legal);
-      setOperationAction(ISD::ABS,              VT, Legal);
-      setOperationAction(ISD::SRL,              VT, Custom);
-      setOperationAction(ISD::SHL,              VT, Custom);
-      setOperationAction(ISD::SRA,              VT, Custom);
-      setOperationAction(ISD::CTPOP,            VT, Custom);
-      setOperationAction(ISD::ROTL,             VT, Custom);
-      setOperationAction(ISD::ROTR,             VT, Custom);
-      setOperationAction(ISD::SETCC,            VT, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
+
+    setOperationAction(ISD::MUL, MVT::v8i64, Custom);
+    setOperationAction(ISD::MUL, MVT::v16i32, Legal);
+
+    setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
+    setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
+
+    setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
+    setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
+    setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
+    setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
+    setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
+    setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
+
+    for (auto VT : {MVT::v16i32, MVT::v8i64}) {
+      setOperationAction(ISD::SMAX, VT, Legal);
+      setOperationAction(ISD::UMAX, VT, Legal);
+      setOperationAction(ISD::SMIN, VT, Legal);
+      setOperationAction(ISD::UMIN, VT, Legal);
+      setOperationAction(ISD::ABS, VT, Legal);
+      setOperationAction(ISD::SRL, VT, Custom);
+      setOperationAction(ISD::SHL, VT, Custom);
+      setOperationAction(ISD::SRA, VT, Custom);
+      setOperationAction(ISD::CTPOP, VT, Custom);
+      setOperationAction(ISD::ROTL, VT, Custom);
+      setOperationAction(ISD::ROTR, VT, Custom);
+      setOperationAction(ISD::SETCC, VT, Custom);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1498,54 +1498,54 @@
       setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
       setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
 
-      setOperationAction(ISD::MUL,        MVT::v8i64, Legal);
+      setOperationAction(ISD::MUL, MVT::v8i64, Legal);
     }
 
     if (Subtarget.hasCDI()) {
       // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
-      for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
-        setOperationAction(ISD::CTLZ,            VT, Legal);
+      for (auto VT : {MVT::v16i32, MVT::v8i64}) {
+        setOperationAction(ISD::CTLZ, VT, Legal);
       }
     } // Subtarget.hasCDI()
 
     if (Subtarget.hasVPOPCNTDQ()) {
-      for (auto VT : { MVT::v16i32, MVT::v8i64 })
+      for (auto VT : {MVT::v16i32, MVT::v8i64})
         setOperationAction(ISD::CTPOP, VT, Legal);
     }
 
     // Extract subvector is special because the value type
     // (result) is 256-bit but the source is 512-bit wide.
     // 128-bit was made Legal under AVX1.
-    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
-                     MVT::v8f32, MVT::v4f64 })
+    for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32,
+                    MVT::v4f64})
       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
 
-    for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
-      setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
-      setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
-      setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
-      setOperationAction(ISD::VSELECT,             VT, Custom);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
-      setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
-      setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Legal);
-      setOperationAction(ISD::MLOAD,               VT, Legal);
-      setOperationAction(ISD::MSTORE,              VT, Legal);
-      setOperationAction(ISD::MGATHER,             VT, Custom);
-      setOperationAction(ISD::MSCATTER,            VT, Custom);
+    for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64}) {
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::VSELECT, VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
+      setOperationAction(ISD::MLOAD, VT, Legal);
+      setOperationAction(ISD::MSTORE, VT, Legal);
+      setOperationAction(ISD::MGATHER, VT, Custom);
+      setOperationAction(ISD::MSCATTER, VT, Custom);
     }
     // Need to custom split v32i16/v64i8 bitcasts.
     if (!Subtarget.hasBWI()) {
       setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
-      setOperationAction(ISD::BITCAST, MVT::v64i8,  Custom);
+      setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
     }
 
     if (Subtarget.hasVBMI2()) {
-      for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+      for (auto VT : {MVT::v16i32, MVT::v8i64}) {
         setOperationAction(ISD::FSHL, VT, Custom);
         setOperationAction(ISD::FSHR, VT, Custom);
       }
     }
-  }// has  AVX-512
+  } // has  AVX-512
 
   // This block controls legalization for operations that don't have
   // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
@@ -1555,52 +1555,52 @@
     // isel patterns.
     // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
 
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i32, Custom);
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
 
-    for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
+    for (auto VT : {MVT::v2i64, MVT::v4i64}) {
       setOperationAction(ISD::SMAX, VT, Legal);
       setOperationAction(ISD::UMAX, VT, Legal);
       setOperationAction(ISD::SMIN, VT, Legal);
       setOperationAction(ISD::UMIN, VT, Legal);
-      setOperationAction(ISD::ABS,  VT, Legal);
+      setOperationAction(ISD::ABS, VT, Legal);
     }
 
-    for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
-      setOperationAction(ISD::ROTL,     VT, Custom);
-      setOperationAction(ISD::ROTR,     VT, Custom);
+    for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) {
+      setOperationAction(ISD::ROTL, VT, Custom);
+      setOperationAction(ISD::ROTR, VT, Custom);
     }
 
     // Custom legalize 2x32 to get a little better code.
     setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
     setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
 
-    for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
-                     MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
+    for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32,
+                    MVT::v8f32, MVT::v2f64, MVT::v4f64})
       setOperationAction(ISD::MSCATTER, VT, Custom);
 
     if (Subtarget.hasDQI()) {
-      for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
-        setOperationAction(ISD::SINT_TO_FP,     VT, Legal);
-        setOperationAction(ISD::UINT_TO_FP,     VT, Legal);
-        setOperationAction(ISD::FP_TO_SINT,     VT, Legal);
-        setOperationAction(ISD::FP_TO_UINT,     VT, Legal);
+      for (auto VT : {MVT::v2i64, MVT::v4i64}) {
+        setOperationAction(ISD::SINT_TO_FP, VT, Legal);
+        setOperationAction(ISD::UINT_TO_FP, VT, Legal);
+        setOperationAction(ISD::FP_TO_SINT, VT, Legal);
+        setOperationAction(ISD::FP_TO_UINT, VT, Legal);
 
-        setOperationAction(ISD::MUL,            VT, Legal);
+        setOperationAction(ISD::MUL, VT, Legal);
       }
     }
 
     if (Subtarget.hasCDI()) {
-      for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
-        setOperationAction(ISD::CTLZ,            VT, Legal);
+      for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) {
+        setOperationAction(ISD::CTLZ, VT, Legal);
       }
     } // Subtarget.hasCDI()
 
     if (Subtarget.hasVPOPCNTDQ()) {
-      for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
+      for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64})
         setOperationAction(ISD::CTPOP, VT, Legal);
     }
   }
@@ -1609,39 +1609,39 @@
   // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
   // useBWIRegs.
   if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
-    addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
-    addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
+    addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
+    addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
 
-    for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
-      setOperationAction(ISD::ADD,                VT, Custom);
-      setOperationAction(ISD::SUB,                VT, Custom);
-      setOperationAction(ISD::MUL,                VT, Custom);
-      setOperationAction(ISD::VSELECT,            VT, Expand);
-      setOperationAction(ISD::UADDSAT,            VT, Custom);
-      setOperationAction(ISD::SADDSAT,            VT, Custom);
-      setOperationAction(ISD::USUBSAT,            VT, Custom);
-      setOperationAction(ISD::SSUBSAT,            VT, Custom);
+    for (auto VT : {MVT::v32i1, MVT::v64i1}) {
+      setOperationAction(ISD::ADD, VT, Custom);
+      setOperationAction(ISD::SUB, VT, Custom);
+      setOperationAction(ISD::MUL, VT, Custom);
+      setOperationAction(ISD::VSELECT, VT, Expand);
+      setOperationAction(ISD::UADDSAT, VT, Custom);
+      setOperationAction(ISD::SADDSAT, VT, Custom);
+      setOperationAction(ISD::USUBSAT, VT, Custom);
+      setOperationAction(ISD::SSUBSAT, VT, Custom);
 
-      setOperationAction(ISD::TRUNCATE,           VT, Custom);
-      setOperationAction(ISD::SETCC,              VT, Custom);
+      setOperationAction(ISD::TRUNCATE, VT, Custom);
+      setOperationAction(ISD::SETCC, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
-      setOperationAction(ISD::SELECT,             VT, Custom);
-      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::SELECT, VT, Custom);
+      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
     }
 
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
-    for (auto VT : { MVT::v16i1, MVT::v32i1 })
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
+    for (auto VT : {MVT::v16i1, MVT::v32i1})
       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
 
     // Extends from v32i1 masks to 256-bit vectors.
-    setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i8, Custom);
-    setOperationAction(ISD::ANY_EXTEND,         MVT::v32i8, Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
   }
 
   // This block controls legalization for v32i16 and v64i8. 512-bits can be
@@ -1649,62 +1649,62 @@
   // attributes.
   if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
-    addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
+    addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
 
     // Extends from v64i1 masks to 512-bit vectors.
-    setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,        MVT::v64i8, Custom);
-    setOperationAction(ISD::ANY_EXTEND,         MVT::v64i8, Custom);
-
-    setOperationAction(ISD::MUL,                MVT::v32i16, Legal);
-    setOperationAction(ISD::MUL,                MVT::v64i8, Custom);
-    setOperationAction(ISD::MULHS,              MVT::v32i16, Legal);
-    setOperationAction(ISD::MULHU,              MVT::v32i16, Legal);
-    setOperationAction(ISD::MULHS,              MVT::v64i8, Custom);
-    setOperationAction(ISD::MULHU,              MVT::v64i8, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i16, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i8, Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i16, Legal);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i8, Legal);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
+
+    setOperationAction(ISD::MUL, MVT::v32i16, Legal);
+    setOperationAction(ISD::MUL, MVT::v64i8, Custom);
+    setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
+    setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
+    setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
+    setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
+    setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v32i16, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v64i8, Custom);
-    setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i16, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i16, Custom);
-    setOperationAction(ISD::ANY_EXTEND,         MVT::v32i16, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v32i16, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v64i8, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i16, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i8, Custom);
-    setOperationAction(ISD::TRUNCATE,           MVT::v32i8, Custom);
-    setOperationAction(ISD::BITREVERSE,         MVT::v64i8, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
+    setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
 
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
     setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
 
-    setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
+    setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
 
-    for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
+    for (auto VT : {MVT::v64i8, MVT::v32i16}) {
       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
-      setOperationAction(ISD::VSELECT,      VT, Custom);
-      setOperationAction(ISD::ABS,          VT, Legal);
-      setOperationAction(ISD::SRL,          VT, Custom);
-      setOperationAction(ISD::SHL,          VT, Custom);
-      setOperationAction(ISD::SRA,          VT, Custom);
-      setOperationAction(ISD::MLOAD,        VT, Legal);
-      setOperationAction(ISD::MSTORE,       VT, Legal);
-      setOperationAction(ISD::CTPOP,        VT, Custom);
-      setOperationAction(ISD::CTLZ,         VT, Custom);
-      setOperationAction(ISD::SMAX,         VT, Legal);
-      setOperationAction(ISD::UMAX,         VT, Legal);
-      setOperationAction(ISD::SMIN,         VT, Legal);
-      setOperationAction(ISD::UMIN,         VT, Legal);
-      setOperationAction(ISD::SETCC,        VT, Custom);
-      setOperationAction(ISD::UADDSAT,      VT, Legal);
-      setOperationAction(ISD::SADDSAT,      VT, Legal);
-      setOperationAction(ISD::USUBSAT,      VT, Legal);
-      setOperationAction(ISD::SSUBSAT,      VT, Legal);
+      setOperationAction(ISD::VSELECT, VT, Custom);
+      setOperationAction(ISD::ABS, VT, Legal);
+      setOperationAction(ISD::SRL, VT, Custom);
+      setOperationAction(ISD::SHL, VT, Custom);
+      setOperationAction(ISD::SRA, VT, Custom);
+      setOperationAction(ISD::MLOAD, VT, Legal);
+      setOperationAction(ISD::MSTORE, VT, Legal);
+      setOperationAction(ISD::CTPOP, VT, Custom);
+      setOperationAction(ISD::CTLZ, VT, Custom);
+      setOperationAction(ISD::SMAX, VT, Legal);
+      setOperationAction(ISD::UMAX, VT, Legal);
+      setOperationAction(ISD::SMIN, VT, Legal);
+      setOperationAction(ISD::UMIN, VT, Legal);
+      setOperationAction(ISD::SETCC, VT, Custom);
+      setOperationAction(ISD::UADDSAT, VT, Legal);
+      setOperationAction(ISD::SADDSAT, VT, Legal);
+      setOperationAction(ISD::USUBSAT, VT, Legal);
+      setOperationAction(ISD::SSUBSAT, VT, Legal);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1717,7 +1717,7 @@
     }
 
     if (Subtarget.hasBITALG()) {
-      for (auto VT : { MVT::v64i8, MVT::v32i16 })
+      for (auto VT : {MVT::v64i8, MVT::v32i16})
         setOperationAction(ISD::CTPOP, VT, Legal);
     }
 
@@ -1728,8 +1728,8 @@
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
-    for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
-      setOperationAction(ISD::MLOAD,  VT, Subtarget.hasVLX() ? Legal : Custom);
+    for (auto VT : {MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16}) {
+      setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
       setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
     }
 
@@ -1738,44 +1738,44 @@
     // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
 
     if (Subtarget.hasBITALG()) {
-      for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
+      for (auto VT : {MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16})
         setOperationAction(ISD::CTPOP, VT, Legal);
     }
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
-    setTruncStoreAction(MVT::v4i64, MVT::v4i8,  Legal);
+    setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
     setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
     setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
-    setTruncStoreAction(MVT::v8i32, MVT::v8i8,  Legal);
+    setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
     setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
 
-    setTruncStoreAction(MVT::v2i64, MVT::v2i8,  Legal);
+    setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
     setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
     setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
-    setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);
+    setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
     setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
 
     if (Subtarget.hasDQI()) {
       // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
       // v2f32 UINT_TO_FP is already custom under SSE2.
-      setOperationAction(ISD::SINT_TO_FP,    MVT::v2f32, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
       assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
              "Unexpected operation action!");
       // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
-      setOperationAction(ISD::FP_TO_SINT,    MVT::v2f32, Custom);
-      setOperationAction(ISD::FP_TO_UINT,    MVT::v2f32, Custom);
+      setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
     }
 
     if (Subtarget.hasBWI()) {
-      setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
-      setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
+      setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
+      setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
     }
 
     if (Subtarget.hasVBMI2()) {
       // TODO: Make these legal even without VLX?
-      for (auto VT : { MVT::v8i16,  MVT::v4i32, MVT::v2i64,
-                       MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+      for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16,
+                      MVT::v8i32, MVT::v4i64}) {
         setOperationAction(ISD::FSHL, VT, Custom);
         setOperationAction(ISD::FSHR, VT, Custom);
       }
@@ -1796,7 +1796,7 @@
   // FIXME: We really should do custom legalization for addition and
   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
   // than generic legalization for 64-bit multiplication-with-overflow, though.
-  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+  for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
     if (VT == MVT::i64 && !Subtarget.is64Bit())
       continue;
     // Add/Sub/Mul with overflow operations are custom lowered.
@@ -1942,8 +1942,7 @@
     return TypeSplitVector;
 
   if (ExperimentalVectorWideningLegalization &&
-      VT.getVectorNumElements() != 1 &&
-      VT.getVectorElementType() != MVT::i1)
+      VT.getVectorNumElements() != 1 && VT.getVectorElementType() != MVT::i1)
     return TypeWidenVector;
 
   return TargetLoweringBase::getPreferredVectorAction(VT);
@@ -1966,8 +1965,7 @@
 }
 
 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
-                                          LLVMContext& Context,
-                                          EVT VT) const {
+                                          LLVMContext &Context, EVT VT) const {
   if (!VT.isVector())
     return MVT::i8;
 
@@ -2106,11 +2104,10 @@
   return true;
 }
 
-bool
-X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                  unsigned,
-                                                  unsigned,
-                                                  bool *Fast) const {
+bool X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
+                                                       unsigned,
+                                                       MachineMemOperand::Flags,
+                                                       bool *Fast) const {
   if (Fast) {
     switch (VT.getSizeInBits()) {
     default:
@@ -2123,7 +2120,7 @@
     case 256:
       *Fast = !Subtarget.isUnalignedMem32Slow();
       break;
-    // TODO: What about AVX-512 (512-bit) accesses?
+      // TODO: What about AVX-512 (512-bit) accesses?
     }
   }
   // Misaligned accesses of any size are always allowed.
@@ -2175,15 +2172,14 @@
   }
 }
 
-const MCExpr *
-X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
-                                             const MachineBasicBlock *MBB,
-                                             unsigned uid,MCContext &Ctx) const{
+const MCExpr *X86TargetLowering::LowerCustomJumpTableEntry(
+    const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
+    unsigned uid, MCContext &Ctx) const {
   assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
   // entries.
-  return MCSymbolRefExpr::create(MBB->getSymbol(),
-                                 MCSymbolRefExpr::VK_GOTOFF, Ctx);
+  return MCSymbolRefExpr::create(MBB->getSymbol(), MCSymbolRefExpr::VK_GOTOFF,
+                                 Ctx);
 }
 
 /// Returns relocation base for the given PIC jumptable.
@@ -2199,9 +2195,8 @@
 
 /// This returns the relocation base for the given PIC jumptable,
 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
-const MCExpr *X86TargetLowering::
-getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
-                             MCContext &Ctx) const {
+const MCExpr *X86TargetLowering::getPICJumpTableRelocBaseExpr(
+    const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const {
   // X86-64 uses RIP relative addressing based on the jump table label.
   if (Subtarget.isPICStyleRIPRel())
     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
@@ -2218,19 +2213,35 @@
   switch (VT.SimpleTy) {
   default:
     return TargetLowering::findRepresentativeClass(TRI, VT);
-  case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+  case MVT::i64:
     RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
     break;
   case MVT::x86mmx:
     RRC = &X86::VR64RegClass;
     break;
-  case MVT::f32: case MVT::f64:
-  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
-  case MVT::v4f32: case MVT::v2f64:
-  case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
-  case MVT::v8f32: case MVT::v4f64:
-  case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
-  case MVT::v16f32: case MVT::v8f64:
+  case MVT::f32:
+  case MVT::f64:
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v2i64:
+  case MVT::v4f32:
+  case MVT::v2f64:
+  case MVT::v32i8:
+  case MVT::v16i16:
+  case MVT::v8i32:
+  case MVT::v4i64:
+  case MVT::v8f32:
+  case MVT::v4f64:
+  case MVT::v64i8:
+  case MVT::v32i16:
+  case MVT::v16i32:
+  case MVT::v8i64:
+  case MVT::v16f32:
+  case MVT::v8f64:
     RRC = &X86::VR128XRegClass;
     break;
   }
@@ -2248,8 +2259,8 @@
          (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
 }
 
-static Constant* SegmentOffset(IRBuilder<> &IRB,
-                               unsigned Offset, unsigned AddressSpace) {
+static Constant *SegmentOffset(IRBuilder<> &IRB, unsigned Offset,
+                               unsigned AddressSpace) {
   return ConstantExpr::getIntToPtr(
       ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
       Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
@@ -2359,7 +2370,7 @@
 }
 
 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
-  static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
+  static const MCPhysReg ScratchRegs[] = {X86::R11, 0};
   return ScratchRegs;
 }
 
@@ -2447,8 +2458,8 @@
   SmallVector<SDValue, 6> RetOps;
   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   // Operand #1 = Bytes To Pop
-  RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
-                   MVT::i32));
+  RetOps.push_back(
+      DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, MVT::i32));
 
   // Copy the result values into the output registers.
   for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
@@ -2473,8 +2484,7 @@
         ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
       else
         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
-    }
-    else if (VA.getLocInfo() == CCValAssign::BCvt)
+    } else if (VA.getLocInfo() == CCValAssign::BCvt)
       ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
 
     assert(VA.getLocInfo() != CCValAssign::FPExt &&
@@ -2498,8 +2508,7 @@
 
     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
     // the RET instruction and handled by the FP Stackifier.
-    if (VA.getLocReg() == X86::FP0 ||
-        VA.getLocReg() == X86::FP1) {
+    if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) {
       // If this is a copy from an xmm register to ST(0), use an FPExtend to
       // change the value to the FP stack register class.
       if (isScalarFPTypeInSSEReg(VA.getValVT()))
@@ -2515,8 +2524,8 @@
       if (ValVT == MVT::x86mmx) {
         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
-          ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
-                                  ValToCopy);
+          ValToCopy =
+              DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
           // If we don't have SSE2 available, convert to v4f32 so the generated
           // register is legal.
           if (!Subtarget.hasSSE2())
@@ -2587,9 +2596,9 @@
     SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
                                      getPointerTy(MF.getDataLayout()));
 
-    unsigned RetValReg
-        = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
-          X86::RAX : X86::EAX;
+    unsigned RetValReg =
+        (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? X86::RAX
+                                                                 : X86::EAX;
     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
     Flag = Chain.getValue(1);
 
@@ -2614,7 +2623,7 @@
     }
   }
 
-  RetOps[0] = Chain;  // Update chain.
+  RetOps[0] = Chain; // Update chain.
 
   // Add the flag if we have it.
   if (Flag.getNode())
@@ -2635,7 +2644,8 @@
   if (Copy->getOpcode() == ISD::CopyToReg) {
     // If the copy has a glue operand, we conservatively assume it isn't safe to
     // perform a tail call.
-    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+    if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
+        MVT::Glue)
       return false;
     TCChain = Copy->getOperand(0);
   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
@@ -2651,7 +2661,7 @@
     if (UI->getNumOperands() > 4)
       return false;
     if (UI->getNumOperands() == 4 &&
-        UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
+        UI->getOperand(UI->getNumOperands() - 1).getValueType() != MVT::Glue)
       return false;
     HasRet = true;
   }
@@ -2721,10 +2731,10 @@
     // When a physical register is available read the value from it and glue
     // the reads together.
     ArgValueLo =
-      DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
+        DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
     *InFlag = ArgValueLo.getValue(2);
     ArgValueHi =
-      DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
+        DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
     *InFlag = ArgValueHi.getValue(2);
   }
 
@@ -2869,13 +2879,9 @@
 
 /// CallIsStructReturn - Determines whether a call uses struct return
 /// semantics.
-enum StructReturnType {
-  NotStructReturn,
-  RegStructReturn,
-  StackStructReturn
-};
-static StructReturnType
-callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
+enum StructReturnType { NotStructReturn, RegStructReturn, StackStructReturn };
+static StructReturnType callIsStructReturn(ArrayRef<ISD::OutputArg> Outs,
+                                           bool IsMCU) {
   if (Outs.empty())
     return NotStructReturn;
 
@@ -2888,8 +2894,8 @@
 }
 
 /// Determines whether a function uses struct return semantics.
-static StructReturnType
-argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
+static StructReturnType argsAreStructReturn(ArrayRef<ISD::InputArg> Ins,
+                                            bool IsMCU) {
   if (Ins.empty())
     return NotStructReturn;
 
@@ -2910,9 +2916,9 @@
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
 
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       /*isVolatile*/false, /*AlwaysInline=*/true,
-                       /*isTailCall*/false,
-                       MachinePointerInfo(), MachinePointerInfo());
+                       /*isVolatile*/ false, /*AlwaysInline=*/true,
+                       /*isTailCall*/ false, MachinePointerInfo(),
+                       MachinePointerInfo());
 }
 
 /// Return true if the calling convention is one that we can guarantee TCO for.
@@ -2994,7 +3000,8 @@
   // could be overwritten by lowering of arguments in case of a tail call.
   if (Flags.isByVal()) {
     unsigned Bytes = Flags.getByValSize();
-    if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+    if (Bytes == 0)
+      Bytes = 1; // Don't create zero-sized stack objects.
 
     // FIXME: For now, all byval parameter objects are marked as aliasing. This
     // can be improved with deeper analysis.
@@ -3075,16 +3082,16 @@
   assert(Subtarget.is64Bit());
 
   if (Subtarget.isCallingConvWin64(CallConv)) {
-    static const MCPhysReg GPR64ArgRegsWin64[] = {
-      X86::RCX, X86::RDX, X86::R8,  X86::R9
-    };
-    return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
+    static const MCPhysReg GPR64ArgRegsWin64[] = {X86::RCX, X86::RDX, X86::R8,
+                                                  X86::R9};
+    return makeArrayRef(std::begin(GPR64ArgRegsWin64),
+                        std::end(GPR64ArgRegsWin64));
   }
 
-  static const MCPhysReg GPR64ArgRegs64Bit[] = {
-    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
-  };
-  return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
+  static const MCPhysReg GPR64ArgRegs64Bit[] = {X86::RDI, X86::RSI, X86::RDX,
+                                                X86::RCX, X86::R8,  X86::R9};
+  return makeArrayRef(std::begin(GPR64ArgRegs64Bit),
+                      std::end(GPR64ArgRegs64Bit));
 }
 
 // FIXME: Get this from tablegen.
@@ -3110,10 +3117,9 @@
     // registers.
     return None;
 
-  static const MCPhysReg XMMArgRegs64Bit[] = {
-    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
-    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
-  };
+  static const MCPhysReg XMMArgRegs64Bit[] = {X86::XMM0, X86::XMM1, X86::XMM2,
+                                              X86::XMM3, X86::XMM4, X86::XMM5,
+                                              X86::XMM6, X86::XMM7};
   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
 }
 
@@ -3327,8 +3333,7 @@
     SDValue ALVal;
     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
       unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
-      LiveGPRs.push_back(
-          DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
+      LiveGPRs.push_back(DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
     }
     if (!ArgXMMs.empty()) {
       unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
@@ -3381,10 +3386,10 @@
       SmallVector<SDValue, 12> SaveXMMOps;
       SaveXMMOps.push_back(Chain);
       SaveXMMOps.push_back(ALVal);
-      SaveXMMOps.push_back(DAG.getIntPtrConstant(
-                             FuncInfo->getRegSaveFrameIndex(), dl));
-      SaveXMMOps.push_back(DAG.getIntPtrConstant(
-                             FuncInfo->getVarArgsFPOffset(), dl));
+      SaveXMMOps.push_back(
+          DAG.getIntPtrConstant(FuncInfo->getRegSaveFrameIndex(), dl));
+      SaveXMMOps.push_back(
+          DAG.getIntPtrConstant(FuncInfo->getVarArgsFPOffset(), dl));
       SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
                         LiveXMMRegs.end());
       MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
@@ -3528,11 +3533,11 @@
                                         EVT PtrVT, unsigned SlotSize,
                                         int FPDiff, const SDLoc &dl) {
   // Store the return address to the appropriate stack slot.
-  if (!FPDiff) return Chain;
+  if (!FPDiff)
+    return Chain;
   // Calculate the new stack slot for the return address.
-  int NewReturnAddrFI =
-    MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
-                                         false);
+  int NewReturnAddrFI = MF.getFrameInfo().CreateFixedObject(
+      SlotSize, (int64_t)FPDiff - SlotSize, false);
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
                        MachinePointerInfo::getFixedStack(
@@ -3552,25 +3557,24 @@
   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
 }
 
-SDValue
-X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                             SmallVectorImpl<SDValue> &InVals) const {
-  SelectionDAG &DAG                     = CLI.DAG;
-  SDLoc &dl                             = CLI.DL;
+SDValue X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  SDLoc &dl = CLI.DL;
   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
-  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
-  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
-  SDValue Chain                         = CLI.Chain;
-  SDValue Callee                        = CLI.Callee;
-  CallingConv::ID CallConv              = CLI.CallConv;
-  bool &isTailCall                      = CLI.IsTailCall;
-  bool isVarArg                         = CLI.IsVarArg;
+  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  CallingConv::ID CallConv = CLI.CallConv;
+  bool &isTailCall = CLI.IsTailCall;
+  bool isVarArg = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  bool Is64Bit        = Subtarget.is64Bit();
-  bool IsWin64        = Subtarget.isCallingConvWin64(CallConv);
+  bool Is64Bit = Subtarget.is64Bit();
+  bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
   StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
-  bool IsSibcall      = false;
+  bool IsSibcall = false;
   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
   auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
   const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
@@ -3610,10 +3614,10 @@
     isTailCall = true;
   } else if (isTailCall) {
     // Check if it's really possible to do a tail call.
-    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                    isVarArg, SR != NotStructReturn,
-                    MF.getFunction().hasStructRetAttr(), CLI.RetTy,
-                    Outs, OutVals, Ins, DAG);
+    isTailCall = IsEligibleForTailCallOptimization(
+        Callee, CallConv, isVarArg, SR != NotStructReturn,
+        MF.getFunction().hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins,
+        DAG);
 
     // Sibcalls are automatically detected tailcalls which do not require
     // ABI changes.
@@ -3719,8 +3723,10 @@
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-    default: llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full: break;
+    default:
+      llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full:
+      break;
     case CCValAssign::SExt:
       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
       break;
@@ -3783,10 +3789,18 @@
         // shadow reg if callee is a varargs function.
         unsigned ShadowReg = 0;
         switch (VA.getLocReg()) {
-        case X86::XMM0: ShadowReg = X86::RCX; break;
-        case X86::XMM1: ShadowReg = X86::RDX; break;
-        case X86::XMM2: ShadowReg = X86::R8; break;
-        case X86::XMM3: ShadowReg = X86::R9; break;
+        case X86::XMM0:
+          ShadowReg = X86::RCX;
+          break;
+        case X86::XMM1:
+          ShadowReg = X86::RDX;
+          break;
+        case X86::XMM2:
+          ShadowReg = X86::R8;
+          break;
+        case X86::XMM3:
+          ShadowReg = X86::R9;
+          break;
         }
         if (ShadowReg)
           RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
@@ -3796,8 +3810,8 @@
       if (!StackPtr.getNode())
         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
                                       getPointerTy(DAG.getDataLayout()));
-      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
-                                             dl, DAG, VA, Flags));
+      MemOpChains.push_back(
+          LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags));
     }
   }
 
@@ -3840,17 +3854,15 @@
     // registers used and is in the range 0 - 8 inclusive.
 
     // Count the number of XMM registers allocated.
-    static const MCPhysReg XMMArgRegs[] = {
-      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
-      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
-    };
+    static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2,
+                                           X86::XMM3, X86::XMM4, X86::XMM5,
+                                           X86::XMM6, X86::XMM7};
     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
-    assert((Subtarget.hasSSE1() || !NumXMMRegs)
-           && "SSE registers cannot be used when SSE is disabled");
+    assert((Subtarget.hasSSE1() || !NumXMMRegs) &&
+           "SSE registers cannot be used when SSE is disabled");
 
-    RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
-                                        DAG.getConstant(NumXMMRegs, dl,
-                                                        MVT::i8)));
+    RegsToPass.push_back(std::make_pair(
+        unsigned(X86::AL), DAG.getConstant(NumXMMRegs, dl, MVT::i8)));
   }
 
   if (isVarArg && IsMustTail) {
@@ -3899,8 +3911,8 @@
       if (Flags.isInAlloca())
         continue;
       // Create frame index.
-      int32_t Offset = VA.getLocMemOffset()+FPDiff;
-      uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+      int32_t Offset = VA.getLocMemOffset() + FPDiff;
+      uint32_t OpSize = (VA.getLocVT().getSizeInBits() + 7) / 8;
       FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
       FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
 
@@ -3913,9 +3925,8 @@
         Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
                              StackPtr, Source);
 
-        MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
-                                                         ArgChain,
-                                                         Flags, DAG, dl));
+        MemOpChains2.push_back(
+            CreateCopyOfByValArgument(Source, FIN, ArgChain, Flags, DAG, dl));
       } else {
         // Store relative to framepointer.
         MemOpChains2.push_back(DAG.getStore(
@@ -4059,17 +4070,16 @@
   unsigned NumBytesForCalleeToPop;
   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
                        DAG.getTarget().Options.GuaranteedTailCallOpt))
-    NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
+    NumBytesForCalleeToPop = NumBytes; // Callee pops everything
   else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
-           !Subtarget.getTargetTriple().isOSMSVCRT() &&
-           SR == StackStructReturn)
+           !Subtarget.getTargetTriple().isOSMSVCRT() && SR == StackStructReturn)
     // If this is a call to a struct-return function, the callee
     // pops the hidden struct pointer, so we have to push it back.
     // This is common for Darwin/X86, Linux & Mingw32 targets.
     // For MSVC Win32 targets, the caller pops the hidden struct pointer.
     NumBytesForCalleeToPop = 4;
   else
-    NumBytesForCalleeToPop = 0;  // Callee pops nothing.
+    NumBytesForCalleeToPop = 0; // Callee pops nothing.
 
   if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
     // No need to reset the stack after the call if the call doesn't return. To
@@ -4079,11 +4089,9 @@
 
   // Returns a flag for retval copy to use.
   if (!IsSibcall) {
-    Chain = DAG.getCALLSEQ_END(Chain,
-                               DAG.getIntPtrConstant(NumBytesToPop, dl, true),
-                               DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
-                                                     true),
-                               InFlag, dl);
+    Chain = DAG.getCALLSEQ_END(
+        Chain, DAG.getIntPtrConstant(NumBytesToPop, dl, true),
+        DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, true), InFlag, dl);
     InFlag = Chain.getValue(1);
   }
 
@@ -4128,30 +4136,31 @@
 /// requirement.
 unsigned
 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
-                                               SelectionDAG& DAG) const {
+                                               SelectionDAG &DAG) const {
   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
   unsigned StackAlignment = TFI.getStackAlignment();
   uint64_t AlignMask = StackAlignment - 1;
   int64_t Offset = StackSize;
   unsigned SlotSize = RegInfo->getSlotSize();
-  if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+  if ((Offset & AlignMask) <= (StackAlignment - SlotSize)) {
     // Number smaller than 12 so just add the difference.
     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
   } else {
     // Mask out lower bits, add stackalignment once plus the 12 bytes.
-    Offset = ((~AlignMask) & Offset) + StackAlignment +
-      (StackAlignment-SlotSize);
+    Offset =
+        ((~AlignMask) & Offset) + StackAlignment + (StackAlignment - SlotSize);
   }
   return Offset;
 }
 
 /// Return true if the given stack call argument is already available in the
 /// same position (relatively) of the caller's incoming argument stack.
-static
-bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
-                         MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
-                         const X86InstrInfo *TII, const CCValAssign &VA) {
+static bool MatchingStackOffset(SDValue Arg, unsigned Offset,
+                                ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI,
+                                const MachineRegisterInfo *MRI,
+                                const X86InstrInfo *TII,
+                                const CCValAssign &VA) {
   unsigned Bytes = Arg.getValueSizeInBits() / 8;
 
   for (;;) {
@@ -4332,8 +4341,8 @@
   }
 
   // Check that the call results are passed in the same way.
-  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
-                                  RetCC_X86, RetCC_X86))
+  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, RetCC_X86,
+                                  RetCC_X86))
     return false;
   // The callee has to preserve all registers the caller needs to preserve.
   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -4374,8 +4383,8 @@
         if (VA.getLocInfo() == CCValAssign::Indirect)
           return false;
         if (!VA.isRegLoc()) {
-          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
-                                   MFI, MRI, TII, VA))
+          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
+                                   TII, VA))
             return false;
         }
       }
@@ -4401,8 +4410,11 @@
           continue;
         unsigned Reg = VA.getLocReg();
         switch (Reg) {
-        default: break;
-        case X86::EAX: case X86::EDX: case X86::ECX:
+        default:
+          break;
+        case X86::EAX:
+        case X86::EDX:
+        case X86::ECX:
           if (++NumInRegs == MaxInRegs)
             return false;
           break;
@@ -4460,8 +4472,9 @@
 }
 
 static bool isTargetShuffle(unsigned Opcode) {
-  switch(Opcode) {
-  default: return false;
+  switch (Opcode) {
+  default:
+    return false;
   case X86ISD::BLENDI:
   case X86ISD::PSHUFB:
   case X86ISD::PSHUFD:
@@ -4500,7 +4513,8 @@
 
 static bool isTargetShuffleVariableMask(unsigned Opcode) {
   switch (Opcode) {
-  default: return false;
+  default:
+    return false;
   // Target Shuffles.
   case X86ISD::PSHUFB:
   case X86ISD::VPERMILPV:
@@ -4526,9 +4540,8 @@
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
     unsigned SlotSize = RegInfo->getSlotSize();
-    ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
-                                                          -(int64_t)SlotSize,
-                                                          false);
+    ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(
+        SlotSize, -(int64_t)SlotSize, false);
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
 
@@ -4553,7 +4566,7 @@
   // For small code model we assume that latest object is 16MB before end of 31
   // bits boundary. We may also accept pretty large negative constants knowing
   // that all objects are in the positive half of address space.
-  if (M == CodeModel::Small && Offset < 16*1024*1024)
+  if (M == CodeModel::Small && Offset < 16 * 1024 * 1024)
     return true;
 
   // For kernel code model we know that all object resist in the negative half
@@ -4567,8 +4580,8 @@
 
 /// Determines whether the callee is required to pop its own arguments.
 /// Callee pop is necessary to support tail calls.
-bool X86::isCalleePop(CallingConv::ID CallingConv,
-                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
+bool X86::isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg,
+                      bool GuaranteeTCO) {
   // If GuaranteeTCO is true, we force some calls to be callee pop so that we
   // can guarantee TCO.
   if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
@@ -4607,17 +4620,28 @@
 
 static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
   switch (SetCCOpcode) {
-  default: llvm_unreachable("Invalid integer condition!");
-  case ISD::SETEQ:  return X86::COND_E;
-  case ISD::SETGT:  return X86::COND_G;
-  case ISD::SETGE:  return X86::COND_GE;
-  case ISD::SETLT:  return X86::COND_L;
-  case ISD::SETLE:  return X86::COND_LE;
-  case ISD::SETNE:  return X86::COND_NE;
-  case ISD::SETULT: return X86::COND_B;
-  case ISD::SETUGT: return X86::COND_A;
-  case ISD::SETULE: return X86::COND_BE;
-  case ISD::SETUGE: return X86::COND_AE;
+  default:
+    llvm_unreachable("Invalid integer condition!");
+  case ISD::SETEQ:
+    return X86::COND_E;
+  case ISD::SETGT:
+    return X86::COND_G;
+  case ISD::SETGE:
+    return X86::COND_GE;
+  case ISD::SETLT:
+    return X86::COND_L;
+  case ISD::SETLE:
+    return X86::COND_LE;
+  case ISD::SETNE:
+    return X86::COND_NE;
+  case ISD::SETULT:
+    return X86::COND_B;
+  case ISD::SETUGT:
+    return X86::COND_A;
+  case ISD::SETULE:
+    return X86::COND_BE;
+  case ISD::SETUGE:
+    return X86::COND_AE;
   }
 }
 
@@ -4625,8 +4649,8 @@
 /// condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
 static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
-                               bool isFP, SDValue &LHS, SDValue &RHS,
-                               SelectionDAG &DAG) {
+                                    bool isFP, SDValue &LHS, SDValue &RHS,
+                                    SelectionDAG &DAG) {
   if (!isFP) {
     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
@@ -4651,14 +4675,14 @@
   // First determine if it is required or is profitable to flip the operands.
 
   // If LHS is a foldable load, but RHS is not, flip the condition.
-  if (ISD::isNON_EXTLoad(LHS.getNode()) &&
-      !ISD::isNON_EXTLoad(RHS.getNode())) {
+  if (ISD::isNON_EXTLoad(LHS.getNode()) && !ISD::isNON_EXTLoad(RHS.getNode())) {
     SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
     std::swap(LHS, RHS);
   }
 
   switch (SetCCOpcode) {
-  default: break;
+  default:
+    break;
   case ISD::SETOLT:
   case ISD::SETOLE:
   case ISD::SETUGT:
@@ -4674,27 +4698,37 @@
   //  1 | 0 | 0 | X == Y
   //  1 | 1 | 1 | unordered
   switch (SetCCOpcode) {
-  default: llvm_unreachable("Condcode should be pre-legalized away");
+  default:
+    llvm_unreachable("Condcode should be pre-legalized away");
   case ISD::SETUEQ:
-  case ISD::SETEQ:   return X86::COND_E;
-  case ISD::SETOLT:              // flipped
+  case ISD::SETEQ:
+    return X86::COND_E;
+  case ISD::SETOLT: // flipped
   case ISD::SETOGT:
-  case ISD::SETGT:   return X86::COND_A;
-  case ISD::SETOLE:              // flipped
+  case ISD::SETGT:
+    return X86::COND_A;
+  case ISD::SETOLE: // flipped
   case ISD::SETOGE:
-  case ISD::SETGE:   return X86::COND_AE;
-  case ISD::SETUGT:              // flipped
+  case ISD::SETGE:
+    return X86::COND_AE;
+  case ISD::SETUGT: // flipped
   case ISD::SETULT:
-  case ISD::SETLT:   return X86::COND_B;
-  case ISD::SETUGE:              // flipped
+  case ISD::SETLT:
+    return X86::COND_B;
+  case ISD::SETUGE: // flipped
   case ISD::SETULE:
-  case ISD::SETLE:   return X86::COND_BE;
+  case ISD::SETLE:
+    return X86::COND_BE;
   case ISD::SETONE:
-  case ISD::SETNE:   return X86::COND_NE;
-  case ISD::SETUO:   return X86::COND_P;
-  case ISD::SETO:    return X86::COND_NP;
+  case ISD::SETNE:
+    return X86::COND_NE;
+  case ISD::SETUO:
+    return X86::COND_P;
+  case ISD::SETO:
+    return X86::COND_NP;
   case ISD::SETOEQ:
-  case ISD::SETUNE:  return X86::COND_INVALID;
+  case ISD::SETUNE:
+    return X86::COND_INVALID;
   }
 }
 
@@ -4717,13 +4751,12 @@
   }
 }
 
-
 bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                            const CallInst &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
 
-  const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
+  const IntrinsicData *IntrData = getIntrinsicWithChain(Intrinsic);
   if (!IntrData)
     return false;
 
@@ -4736,7 +4769,7 @@
   case TRUNCATE_TO_MEM_VI32: {
     Info.opc = ISD::INTRINSIC_VOID;
     Info.ptrVal = I.getArgOperand(0);
-    MVT VT  = MVT::getVT(I.getArgOperand(1)->getType());
+    MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
     MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
     if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
       ScalarVT = MVT::i8;
@@ -4756,8 +4789,8 @@
     Info.ptrVal = nullptr;
     MVT DataVT = MVT::getVT(I.getType());
     MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
-    unsigned NumElts = std::min(DataVT.getVectorNumElements(),
-                                IndexVT.getVectorNumElements());
+    unsigned NumElts =
+        std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements());
     Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
     Info.align = 1;
     Info.flags |= MachineMemOperand::MOLoad;
@@ -4768,8 +4801,8 @@
     Info.ptrVal = nullptr;
     MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
     MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
-    unsigned NumElts = std::min(DataVT.getVectorNumElements(),
-                                IndexVT.getVectorNumElements());
+    unsigned NumElts =
+        std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements());
     Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
     Info.align = 1;
     Info.flags |= MachineMemOperand::MOStore;
@@ -4868,8 +4901,9 @@
   // Mask vectors support all subregister combinations and operations that
   // extract half of vector.
   if (ResVT.getVectorElementType() == MVT::i1)
-    return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
-                          (Index == ResVT.getVectorNumElements()));
+    return Index == 0 ||
+           ((ResVT.getSizeInBits() == SrcVT.getSizeInBits() * 2) &&
+            (Index == ResVT.getVectorNumElements()));
 
   return (Index % ResVT.getVectorNumElements()) == 0;
 }
@@ -4933,9 +4967,7 @@
   return true;
 }
 
-bool X86TargetLowering::isCtlzFast() const {
-  return Subtarget.hasFastLZCNT();
-}
+bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); }
 
 bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
     const Instruction &AndI) const {
@@ -5086,8 +5118,7 @@
 
 /// Return true if every element in Mask is undef or if its value
 /// falls within the specified range (L, H].
-static bool isUndefOrInRange(ArrayRef<int> Mask,
-                             int Low, int Hi) {
+static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
   for (int M : Mask)
     if (!isUndefOrInRange(M, Low, Hi))
       return false;
@@ -5200,8 +5231,7 @@
   return true;
 }
 
-static bool canWidenShuffleElements(ArrayRef<int> Mask,
-                                    const APInt &Zeroable,
+static bool canWidenShuffleElements(ArrayRef<int> Mask, const APInt &Zeroable,
                                     SmallVectorImpl<int> &WidenedMask) {
   SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
   for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
@@ -5229,7 +5259,7 @@
 static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
                               const SDLoc &dl, bool IsMask = false) {
 
-  SmallVector<SDValue, 32>  Ops;
+  SmallVector<SDValue, 32> Ops;
   bool Split = false;
 
   MVT ConstVecVT = VT;
@@ -5243,12 +5273,12 @@
   MVT EltVT = ConstVecVT.getVectorElementType();
   for (unsigned i = 0; i < NumElts; ++i) {
     bool IsUndef = Values[i] < 0 && IsMask;
-    SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
-      DAG.getConstant(Values[i], dl, EltVT);
+    SDValue OpNode =
+        IsUndef ? DAG.getUNDEF(EltVT) : DAG.getConstant(Values[i], dl, EltVT);
     Ops.push_back(OpNode);
     if (Split)
-      Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
-                    DAG.getConstant(0, dl, EltVT));
+      Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT)
+                            : DAG.getConstant(0, dl, EltVT));
   }
   SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
   if (Split)
@@ -5256,8 +5286,8 @@
   return ConstsNode;
 }
 
-static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
-                              MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
+static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs, MVT VT,
+                              SelectionDAG &DAG, const SDLoc &dl) {
   assert(Bits.size() == Undefs.getBitWidth() &&
          "Unequal constant and undef arrays");
   SmallVector<SDValue, 32> Ops;
@@ -5325,9 +5355,9 @@
                                 const SDLoc &dl, unsigned vectorWidth) {
   EVT VT = Vec.getValueType();
   EVT ElVT = VT.getVectorElementType();
-  unsigned Factor = VT.getSizeInBits()/vectorWidth;
+  unsigned Factor = VT.getSizeInBits() / vectorWidth;
   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
-                                  VT.getVectorNumElements()/Factor);
+                                  VT.getVectorNumElements() / Factor);
 
   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
@@ -5355,7 +5385,8 @@
 static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
                                    SelectionDAG &DAG, const SDLoc &dl) {
   assert((Vec.getValueType().is256BitVector() ||
-          Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
+          Vec.getValueType().is512BitVector()) &&
+         "Unexpected vector size!");
   return extractSubVector(Vec, IdxVal, DAG, dl, 128);
 }
 
@@ -5379,7 +5410,7 @@
   EVT ResultVT = Result.getValueType();
 
   // Insert the relevant vectorWidth bits.
-  unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+  unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
   assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
 
   // This is the index of the first element of the vectorWidth-bit chunk
@@ -5545,8 +5576,7 @@
   if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
     // May need to promote to a legal type.
     Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
-                     getZeroVector(WideOpVT, Subtarget, DAG, dl),
-                     SubVec, Idx);
+                     getZeroVector(WideOpVT, Subtarget, DAG, dl), SubVec, Idx);
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
   }
 
@@ -5562,20 +5592,19 @@
   if (IdxVal == 0) {
     // Zero lower bits of the Vec
     SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
-    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
-                      ZeroIdx);
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
     Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
     Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
     // Merge them together, SubVec should be zero extended.
     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
-                         getZeroVector(WideOpVT, Subtarget, DAG, dl),
-                         SubVec, ZeroIdx);
+                         getZeroVector(WideOpVT, Subtarget, DAG, dl), SubVec,
+                         ZeroIdx);
     Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
   }
 
-  SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
-                       Undef, SubVec, ZeroIdx);
+  SubVec =
+      DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, SubVec, ZeroIdx);
 
   if (Vec.isUndef()) {
     assert(IdxVal != 0 && "Unexpected index");
@@ -5606,12 +5635,12 @@
       // isel to opimitize when bits are known zero.
       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
-                        getZeroVector(WideOpVT, Subtarget, DAG, dl),
-                        Vec, ZeroIdx);
+                        getZeroVector(WideOpVT, Subtarget, DAG, dl), Vec,
+                        ZeroIdx);
     } else {
       // Otherwise use explicit shifts to zero the bits.
-      Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
-                        Undef, Vec, ZeroIdx);
+      Vec =
+          DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
       NumElems = WideOpVT.getVectorNumElements();
       SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
       Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
@@ -5639,7 +5668,7 @@
   // Shift to the final position, filling upper bits with 0.
   unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
   Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
-                       DAG.getConstant(ShiftRight, dl, MVT::i8));
+                   DAG.getConstant(ShiftRight, dl, MVT::i8));
   // Xor with original vector leaving the new value.
   Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
   // Reduce to original width if needed.
@@ -5683,8 +5712,8 @@
   }
 
   if (VT.getVectorNumElements() == InVT.getVectorNumElements())
-    return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
-                       DL, VT, In);
+    return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
+                       In);
 
   return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
                             : ISD::ZERO_EXTEND_VECTOR_INREG,
@@ -5711,13 +5740,12 @@
 /// This produces a shuffle where the low element of V2 is swizzled into the
 /// zero/undef vector, landing at element Idx.
 /// This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
-static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
-                                           bool IsZero,
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, bool IsZero,
                                            const X86Subtarget &Subtarget,
                                            SelectionDAG &DAG) {
   MVT VT = V2.getSimpleValueType();
-  SDValue V1 = IsZero
-    ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
+  SDValue V1 =
+      IsZero ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
   int NumElems = VT.getVectorNumElements();
   SmallVector<int, 16> MaskVec(NumElems);
   for (int i = 0; i != NumElems; ++i)
@@ -6434,7 +6462,8 @@
     }
     return false;
   }
-  default: llvm_unreachable("unknown target shuffle node");
+  default:
+    llvm_unreachable("unknown target shuffle node");
   }
 
   // Empty mask indicates the decode failed.
@@ -6469,8 +6498,7 @@
 /// SM_SentinelZero - this is for elements that are known to be zero
 /// (not just zeroable) from their inputs.
 /// Returns true if the target shuffle mask was decoded.
-static bool setTargetShuffleZeroElements(SDValue N,
-                                         SmallVectorImpl<int> &Mask,
+static bool setTargetShuffleZeroElements(SDValue N, SmallVectorImpl<int> &Mask,
                                          SmallVectorImpl<SDValue> &Ops) {
   bool IsUnary;
   if (!isTargetShuffle(N.getOpcode()))
@@ -6656,10 +6684,11 @@
     int InsertIdx = N.getConstantOperandVal(2);
     if (SubMask.size() != NumSubElts) {
       assert(((SubMask.size() % NumSubElts) == 0 ||
-              (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale");
+              (NumSubElts % SubMask.size()) == 0) &&
+             "Illegal submask scale");
       if ((NumSubElts % SubMask.size()) == 0) {
         int Scale = NumSubElts / SubMask.size();
-        SmallVector<int,64> ScaledSubMask;
+        SmallVector<int, 64> ScaledSubMask;
         scaleShuffleMask<int>(Scale, SubMask, ScaledSubMask);
         SubMask = ScaledSubMask;
       } else {
@@ -6877,7 +6906,8 @@
   return false;
 }
 
-/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
+/// Removes unused shuffle source inputs and adjusts the shuffle mask
+/// accordingly.
 static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
                                               SmallVectorImpl<int> &Mask) {
   int MaskWidth = Mask.size();
@@ -6926,7 +6956,7 @@
 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
                                    unsigned Depth) {
   if (Depth == 6)
-    return SDValue();  // Limit search depth.
+    return SDValue(); // Limit search depth.
 
   SDValue V = SDValue(N, 0);
   EVT VT = V.getValueType();
@@ -6940,9 +6970,9 @@
       return DAG.getUNDEF(VT.getVectorElementType());
 
     unsigned NumElems = VT.getVectorNumElements();
-    SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
-                                         : SV->getOperand(1);
-    return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
+    SDValue NewV =
+        (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth + 1);
   }
 
   // Recurse into target specific vector shuffles to find scalars.
@@ -6954,7 +6984,8 @@
     SmallVector<SDValue, 16> ShuffleOps;
     bool IsUnary;
 
-    if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
+    if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask,
+                              IsUnary))
       return SDValue();
 
     int Elt = ShuffleMask[Index];
@@ -6964,10 +6995,9 @@
     if (Elt == SM_SentinelUndef)
       return DAG.getUNDEF(ShufSVT);
 
-    assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
+    assert(0 <= Elt && Elt < (2 * NumElems) && "Shuffle index out of range");
     SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
-    return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
-                               Depth+1);
+    return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth + 1);
   }
 
   // Recurse into insert_subvector base/sub vector to find scalars.
@@ -7148,8 +7178,8 @@
     MVT EltVT = VT.getVectorElementType();
     // Create a new build vector with the first 2 elements followed by undef
     // padding, bitcast to v2f64, duplicate, and bitcast back.
-    SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
-                       DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+    SDValue Ops[4] = {Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT),
+                      DAG.getUNDEF(EltVT)};
     SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
     SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
     return DAG.getBitcast(VT, Dup);
@@ -7197,7 +7227,7 @@
   for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
     if (Zeroable[EltIdx]) {
       // The zero vector will be on the right hand side.
-      Mask[EltIdx] = EltIdx+4;
+      Mask[EltIdx] = EltIdx + 4;
       continue;
     }
 
@@ -7268,7 +7298,7 @@
   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
   SrcOp = DAG.getBitcast(ShVT, SrcOp);
   assert(NumBits % 8 == 0 && "Only support byte sized shifts");
-  SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8);
+  SDValue ShiftVal = DAG.getConstant(NumBits / 8, dl, MVT::i8);
   return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
 }
 
@@ -7302,7 +7332,7 @@
 
     // FIXME: 256-bit vector instructions don't require a strict alignment,
     // improve this code to support it better.
-    unsigned RequiredAlign = VT.getSizeInBits()/8;
+    unsigned RequiredAlign = VT.getSizeInBits() / 8;
     SDValue Chain = LD->getChain();
     // Make sure the stack object alignment is at least 16 or 32.
     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -7469,7 +7499,8 @@
 
     // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
     // vector and a zero vector to clear out the zero elements.
-    if (!isAfterLegalize && VT.isVector() && NumElems == VT.getVectorNumElements()) {
+    if (!isAfterLegalize && VT.isVector() &&
+        NumElems == VT.getVectorNumElements()) {
       SmallVector<int, 4> ClearMask(NumElems, -1);
       for (unsigned i = 0; i < NumElems; ++i) {
         if (ZeroMask[i])
@@ -7511,12 +7542,10 @@
     MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
     if (TLI.isTypeLegal(VecVT)) {
       SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
-      SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
-      SDValue ResNode =
-          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
-                                  LDBase->getPointerInfo(),
-                                  LDBase->getAlignment(),
-                                  MachineMemOperand::MOLoad);
+      SDValue Ops[] = {LDBase->getChain(), LDBase->getBasePtr()};
+      SDValue ResNode = DAG.getMemIntrinsicNode(
+          X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(),
+          LDBase->getAlignment(), MachineMemOperand::MOLoad);
       for (auto *LD : Loads)
         DAG.makeEquivalentMemoryOrdering(LD, ResNode);
       return DAG.getBitcast(VT, ResNode);
@@ -7719,7 +7748,7 @@
       else
         BOperand = Ld.getOperand(0).getOperand(0);
       MVT MaskVT = BOperand.getSimpleValueType();
-      if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || // for broadcastmb2q
+      if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) ||  // for broadcastmb2q
           (EltType == MVT::i32 && MaskVT == MVT::v16i1)) { // for broadcastmw2d
         SDValue Brdcst =
             DAG.getNode(X86ISD::VBROADCASTM, dl,
@@ -7793,8 +7822,8 @@
         } else if (SplatBitSize > 64) {
           // Load the vector of constants and broadcast it.
           MVT CVT = VT.getScalarType();
-          Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
-                                             *Ctx);
+          Constant *VecC =
+              getConstantVector(VT, SplatValue, SplatBitSize, *Ctx);
           SDValue VCP = DAG.getConstantPool(VecC, PVT);
           unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
           unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
@@ -8055,7 +8084,7 @@
       return DAG.getBitcast(VT, Imm);
     SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
-                        DAG.getIntPtrConstant(0, dl));
+                       DAG.getIntPtrConstant(0, dl));
   }
 
   // Vector has one or more non-const elements
@@ -8092,8 +8121,7 @@
   if (Immediate) {
     MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
     Imm = DAG.getConstant(Immediate, dl, ImmVT);
-  }
-  else if (HasConstElts)
+  } else if (HasConstElts)
     Imm = DAG.getConstant(0, dl, VT);
   else
     Imm = DAG.getUNDEF(VT);
@@ -8135,9 +8163,8 @@
 /// See the corrected implementation in isHopBuildVector(). Can we reduce this
 /// code because it is only used for partial h-op matching now?
 static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
-                                  SelectionDAG &DAG,
-                                  unsigned BaseIdx, unsigned LastIdx,
-                                  SDValue &V0, SDValue &V1) {
+                                  SelectionDAG &DAG, unsigned BaseIdx,
+                                  unsigned LastIdx, SDValue &V0, SDValue &V1) {
   EVT VT = N->getValueType(0);
   assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops");
   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
@@ -8175,10 +8202,10 @@
     // Try to match the following pattern:
     // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
     CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-        Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-        Op0.getOperand(0) == Op1.getOperand(0) &&
-        isa<ConstantSDNode>(Op0.getOperand(1)) &&
-        isa<ConstantSDNode>(Op1.getOperand(1)));
+               Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+               Op0.getOperand(0) == Op1.getOperand(0) &&
+               isa<ConstantSDNode>(Op0.getOperand(1)) &&
+               isa<ConstantSDNode>(Op1.getOperand(1)));
     if (!CanFold)
       break;
 
@@ -8258,9 +8285,9 @@
 
   unsigned NumElts = VT.getVectorNumElements();
   SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
-  SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
+  SDValue V0_HI = extract128BitVector(V0, NumElts / 2, DAG, DL);
   SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
-  SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
+  SDValue V1_HI = extract128BitVector(V1, NumElts / 2, DAG, DL);
   MVT NewVT = V0_LO.getSimpleValueType();
 
   SDValue LO = DAG.getUNDEF(NewVT);
@@ -8292,8 +8319,7 @@
 static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
                              const X86Subtarget &Subtarget, SelectionDAG &DAG,
                              SDValue &Opnd0, SDValue &Opnd1,
-                             unsigned &NumExtracts,
-                             bool &IsSubAdd) {
+                             unsigned &NumExtracts, bool &IsSubAdd) {
 
   MVT VT = BV->getSimpleValueType(0);
   if (!Subtarget.hasSSE3() || !VT.isFloatingPoint())
@@ -8380,8 +8406,8 @@
   // Ensure we have found an opcode for both parities and that they are
   // different. Don't try to fold this build_vector into an ADDSUB/SUBADD if the
   // inputs are undef.
-  if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] ||
-      InVec0.isUndef() || InVec1.isUndef())
+  if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || InVec0.isUndef() ||
+      InVec1.isUndef())
     return false;
 
   IsSubAdd = Opc[0] == ISD::FADD;
@@ -8394,7 +8420,8 @@
 /// Returns true if is possible to fold MUL and an idiom that has already been
 /// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
 /// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the
-/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2.
+/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1,
+/// \p Opnd2.
 ///
 /// Prior to calling this function it should be known that there is some
 /// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
@@ -8418,8 +8445,8 @@
 /// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
 /// FMADDSUB is.
 static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
-                                 SelectionDAG &DAG,
-                                 SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
+                                 SelectionDAG &DAG, SDValue &Opnd0,
+                                 SDValue &Opnd1, SDValue &Opnd2,
                                  unsigned ExpectedUses) {
   if (Opnd0.getOpcode() != ISD::FMUL ||
       !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA())
@@ -8511,11 +8538,20 @@
       if (HOpcode == ISD::DELETED_NODE) {
         GenericOpcode = Op.getOpcode();
         switch (GenericOpcode) {
-        case ISD::ADD: HOpcode = X86ISD::HADD; break;
-        case ISD::SUB: HOpcode = X86ISD::HSUB; break;
-        case ISD::FADD: HOpcode = X86ISD::FHADD; break;
-        case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
-        default: return false;
+        case ISD::ADD:
+          HOpcode = X86ISD::HADD;
+          break;
+        case ISD::SUB:
+          HOpcode = X86ISD::HSUB;
+          break;
+        case ISD::FADD:
+          HOpcode = X86ISD::FHADD;
+          break;
+        case ISD::FSUB:
+          HOpcode = X86ISD::FHSUB;
+          break;
+        default:
+          return false;
         }
       }
 
@@ -8545,8 +8581,7 @@
       // op (extract_vector_elt A, I), (extract_vector_elt A, I+1)
       unsigned ExtIndex0 = Op0.getConstantOperandVal(1);
       unsigned ExtIndex1 = Op1.getConstantOperandVal(1);
-      unsigned ExpectedIndex = i * NumEltsIn128Bits +
-                               (j % NumEltsIn64Bits) * 2;
+      unsigned ExpectedIndex = i * NumEltsIn128Bits + (j % NumEltsIn64Bits) * 2;
       if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
         continue;
 
@@ -9132,8 +9167,8 @@
   return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
 }
 
-SDValue
-X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op,
+                                             SelectionDAG &DAG) const {
   SDLoc dl(Op);
 
   MVT VT = Op.getSimpleValueType();
@@ -9159,7 +9194,7 @@
 
   unsigned EVTBits = EltVT.getSizeInBits();
 
-  unsigned NumZero  = 0;
+  unsigned NumZero = 0;
   unsigned NumNonZero = 0;
   uint64_t NonZeros = 0;
   bool IsAllConstants = true;
@@ -9275,7 +9310,7 @@
       if (EltVT == MVT::i16 || EltVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
         if (VT.getSizeInBits() >= 256) {
-          MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
+          MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
           if (Subtarget.hasAVX()) {
             Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item);
             Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -9296,14 +9331,13 @@
     }
 
     // Is it a vector logical left shift?
-    if (NumElems == 2 && Idx == 1 &&
-        X86::isZeroNode(Op.getOperand(0)) &&
+    if (NumElems == 2 && Idx == 1 && X86::isZeroNode(Op.getOperand(0)) &&
         !X86::isZeroNode(Op.getOperand(1))) {
       unsigned NumBits = VT.getSizeInBits();
-      return getVShift(true, VT,
-                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
-                                   VT, Op.getOperand(1)),
-                       NumBits/2, DAG, *this, dl);
+      return getVShift(
+          true, VT,
+          DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(1)),
+          NumBits / 2, DAG, *this, dl);
     }
 
     if (IsAllConstants) // Otherwise, it's better to do a constpool load.
@@ -9316,7 +9350,8 @@
     // place.
     if (EVTBits == 32) {
       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
-      return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
+      return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget,
+                                         DAG);
     }
   }
 
@@ -9341,7 +9376,7 @@
     return SDValue();
 
   if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget))
-      return V;
+    return V;
 
   // See if we can use a vector load to get all of the elements.
   {
@@ -9355,8 +9390,8 @@
   // build_vector and broadcast it.
   // TODO: We could probably generalize this more.
   if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) {
-    SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
-                       DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+    SDValue Ops[4] = {Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT),
+                      DAG.getUNDEF(EltVT)};
     auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) {
       // Make sure all the even/odd operands match.
       for (unsigned i = 2; i != NumElems; ++i)
@@ -9371,22 +9406,22 @@
       SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2),
                                      DAG.getBuildVector(NarrowVT, dl, Ops));
       // Broadcast from v2i64/v2f64 and cast to final VT.
-      MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems/2);
-      return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT,
-                                            NewBV));
+      MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems / 2);
+      return DAG.getBitcast(
+          VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, NewBV));
     }
   }
 
   // For AVX-length vectors, build the individual 128-bit pieces and use
   // shuffles to put them in place.
   if (VT.getSizeInBits() > 128) {
-    MVT HVT = MVT::getVectorVT(EltVT, NumElems/2);
+    MVT HVT = MVT::getVectorVT(EltVT, NumElems / 2);
 
     // Build both the lower and upper subvector.
     SDValue Lower =
         DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2));
     SDValue Upper = DAG.getBuildVector(
-        HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2));
+        HVT, dl, Op->ops().slice(NumElems / 2, NumElems / 2));
 
     // Recreate the wider vector with the lower and upper part.
     return concatSubVectors(Lower, Upper, VT, NumElems, DAG, dl,
@@ -9398,8 +9433,8 @@
     if (NumNonZero == 1) {
       // One half is zero or undef.
       unsigned Idx = countTrailingZeros(NonZeros);
-      SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
-                               Op.getOperand(Idx));
+      SDValue V2 =
+          DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx));
       return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
     }
     return SDValue();
@@ -9433,31 +9468,29 @@
     }
 
     for (unsigned i = 0; i < 2; ++i) {
-      switch ((NonZeros >> (i*2)) & 0x3) {
-        default: llvm_unreachable("Unexpected NonZero count");
-        case 0:
-          Ops[i] = Ops[i*2];  // Must be a zero vector.
-          break;
-        case 1:
-          Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]);
-          break;
-        case 2:
-          Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
-          break;
-        case 3:
-          Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
-          break;
+      switch ((NonZeros >> (i * 2)) & 0x3) {
+      default:
+        llvm_unreachable("Unexpected NonZero count");
+      case 0:
+        Ops[i] = Ops[i * 2]; // Must be a zero vector.
+        break;
+      case 1:
+        Ops[i] = getMOVL(DAG, dl, VT, Ops[i * 2 + 1], Ops[i * 2]);
+        break;
+      case 2:
+        Ops[i] = getMOVL(DAG, dl, VT, Ops[i * 2], Ops[i * 2 + 1]);
+        break;
+      case 3:
+        Ops[i] = getUnpackl(DAG, dl, VT, Ops[i * 2], Ops[i * 2 + 1]);
+        break;
       }
     }
 
     bool Reverse1 = (NonZeros & 0x3) == 2;
     bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
-    int MaskVec[] = {
-      Reverse1 ? 1 : 0,
-      Reverse1 ? 0 : 1,
-      static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
-      static_cast<int>(Reverse2 ? NumElems   : NumElems+1)
-    };
+    int MaskVec[] = {Reverse1 ? 1 : 0, Reverse1 ? 0 : 1,
+                     static_cast<int>(Reverse2 ? NumElems + 1 : NumElems),
+                     static_cast<int>(Reverse2 ? NumElems : NumElems + 1)};
     return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec);
   }
 
@@ -9476,7 +9509,8 @@
       Result = DAG.getUNDEF(VT);
 
     for (unsigned i = 1; i < NumElems; ++i) {
-      if (Op.getOperand(i).isUndef()) continue;
+      if (Op.getOperand(i).isUndef())
+        continue;
       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
                            Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
     }
@@ -9501,14 +9535,14 @@
   for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
     // Generate scaled UNPCKL shuffle mask.
     SmallVector<int, 16> Mask;
-    for(unsigned i = 0; i != Scale; ++i)
+    for (unsigned i = 0; i != Scale; ++i)
       Mask.push_back(i);
     for (unsigned i = 0; i != Scale; ++i)
-      Mask.push_back(NumElems+i);
+      Mask.push_back(NumElems + i);
     Mask.append(NumElems - Mask.size(), SM_SentinelUndef);
 
     for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i)
-      Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask);
+      Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2 * i], Ops[(2 * i) + 1], Mask);
   }
   return Ops[0];
 }
@@ -9521,8 +9555,8 @@
   SDLoc dl(Op);
   MVT ResVT = Op.getSimpleValueType();
 
-  assert((ResVT.is256BitVector() ||
-          ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
+  assert((ResVT.is256BitVector() || ResVT.is512BitVector()) &&
+         "Value type must be 256-/512-bit wide");
 
   unsigned NumOperands = Op.getNumOperands();
   unsigned NumZero = 0;
@@ -9544,18 +9578,18 @@
   // If we have more than 2 non-zeros, build each half separately.
   if (NumNonZero > 2) {
     MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
-                                  ResVT.getVectorNumElements()/2);
+                                  ResVT.getVectorNumElements() / 2);
     ArrayRef<SDUse> Ops = Op->ops();
     SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
-                             Ops.slice(0, NumOperands/2));
+                             Ops.slice(0, NumOperands / 2));
     SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
-                             Ops.slice(NumOperands/2));
+                             Ops.slice(NumOperands / 2));
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
   }
 
   // Otherwise, build it up through insert_subvectors.
-  SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl)
-                        : DAG.getUNDEF(ResVT);
+  SDValue Vec =
+      NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) : DAG.getUNDEF(ResVT);
 
   MVT SubVT = Op.getOperand(0).getSimpleValueType();
   unsigned NumSubElems = SubVT.getVectorNumElements();
@@ -9563,8 +9597,7 @@
     if ((NonZeros & (1 << i)) == 0)
       continue;
 
-    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec,
-                      Op.getOperand(i),
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(i),
                       DAG.getIntPtrConstant(i * NumSubElems, dl));
   }
 
@@ -9628,7 +9661,7 @@
 // TODO: Merge this with LowerAVXCONCAT_VECTORS?
 static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
                                        const X86Subtarget &Subtarget,
-                                       SelectionDAG & DAG) {
+                                       SelectionDAG &DAG) {
   SDLoc dl(Op);
   MVT ResVT = Op.getSimpleValueType();
   unsigned NumOperands = Op.getNumOperands();
@@ -9659,7 +9692,6 @@
     }
   }
 
-
   // If there are zero or one non-zeros we can handle this very simply.
   if (NumNonZero <= 1) {
     SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl)
@@ -9675,12 +9707,12 @@
 
   if (NumOperands > 2) {
     MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
-                                  ResVT.getVectorNumElements()/2);
+                                  ResVT.getVectorNumElements() / 2);
     ArrayRef<SDUse> Ops = Op->ops();
     SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
-                             Ops.slice(0, NumOperands/2));
+                             Ops.slice(0, NumOperands / 2));
     SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
-                             Ops.slice(NumOperands/2));
+                             Ops.slice(NumOperands / 2));
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
   }
 
@@ -9689,24 +9721,23 @@
   if (ResVT.getVectorNumElements() >= 16)
     return Op; // The operation is legal with KUNPCK
 
-  SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT,
-                            DAG.getUNDEF(ResVT), Op.getOperand(0),
-                            DAG.getIntPtrConstant(0, dl));
+  SDValue Vec =
+      DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, DAG.getUNDEF(ResVT),
+                  Op.getOperand(0), DAG.getIntPtrConstant(0, dl));
   unsigned NumElems = ResVT.getVectorNumElements();
   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1),
-                     DAG.getIntPtrConstant(NumElems/2, dl));
+                     DAG.getIntPtrConstant(NumElems / 2, dl));
 }
 
-static SDValue LowerCONCAT_VECTORS(SDValue Op,
-                                   const X86Subtarget &Subtarget,
+static SDValue LowerCONCAT_VECTORS(SDValue Op, const X86Subtarget &Subtarget,
                                    SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   if (VT.getVectorElementType() == MVT::i1)
     return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG);
 
   assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
-         (VT.is512BitVector() && (Op.getNumOperands() == 2 ||
-          Op.getNumOperands() == 4)));
+         (VT.is512BitVector() &&
+          (Op.getNumOperands() == 2 || Op.getNumOperands() == 4)));
 
   // AVX can use the vinsertf128 instruction to create 256-bit vectors
   // from two other 128-bit ones.
@@ -9783,8 +9814,8 @@
 
     // Ok, handle the in-lane shuffles by detecting if and when they repeat.
     // Adjust second vector indices to start at LaneSize instead of Size.
-    int LocalM = Mask[i] < Size ? Mask[i] % LaneSize
-                                : Mask[i] % LaneSize + LaneSize;
+    int LocalM =
+        Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
     if (RepeatedMask[i % LaneSize] < 0)
       // This is the first non-undef entry in this slot of a 128-bit lane.
       RepeatedMask[i % LaneSize] = LocalM;
@@ -9802,8 +9833,7 @@
   return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
 }
 
-static bool
-is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
+static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
   SmallVector<int, 32> RepeatedMask;
   return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
 }
@@ -9895,7 +9925,8 @@
 /// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding
 /// value in ExpectedMask is always accepted. Otherwise the indices must match.
 ///
-/// SM_SentinelZero is accepted as a valid negative index but must match in both.
+/// SM_SentinelZero is accepted as a valid negative index but must match in
+/// both.
 static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
                                       ArrayRef<int> ExpectedMask) {
   int Size = Mask.size();
@@ -10040,8 +10071,8 @@
 /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
 /// as many lanes with this technique as possible to simplify the remaining
 /// shuffle.
-static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
-                                            SDValue V1, SDValue V2) {
+static APInt computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
+                                            SDValue V2) {
   APInt Zeroable(Mask.size(), 0);
   V1 = peekThroughBitcasts(V1);
   V2 = peekThroughBitcasts(V2);
@@ -10117,8 +10148,8 @@
 //
 // The function looks for a sub-mask that the nonzero elements are in
 // increasing order. If such sub-mask exist. The function returns true.
-static bool isNonZeroElementsInOrder(const APInt &Zeroable,
-                                     ArrayRef<int> Mask, const EVT &VectorType,
+static bool isNonZeroElementsInOrder(const APInt &Zeroable, ArrayRef<int> Mask,
+                                     const EVT &VectorType,
                                      bool &IsZeroSideLeft) {
   int NextElement = -1;
   // Check if the Mask's nonzero elements are in increasing order.
@@ -10202,9 +10233,8 @@
 
 // X86 has dedicated shuffle that can be lowered to VEXPAND
 static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT,
-                                    const APInt &Zeroable,
-                                    ArrayRef<int> Mask, SDValue &V1,
-                                    SDValue &V2, SelectionDAG &DAG,
+                                    const APInt &Zeroable, ArrayRef<int> Mask,
+                                    SDValue &V1, SDValue &V2, SelectionDAG &DAG,
                                     const X86Subtarget &Subtarget) {
   bool IsLeftZeroSide = true;
   if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
@@ -10337,7 +10367,7 @@
 }
 
 static bool matchVectorShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
-                                         int Delta) {
+                                      int Delta) {
   int Size = (int)Mask.size();
   int Split = Size / Delta;
   int TruncatedVectorStart = SwappedOps ? Size : 0;
@@ -10753,9 +10783,9 @@
     for (int i = 0, Size = Mask.size(); i < Size; ++i)
       for (int j = 0; j < Scale; ++j)
         VSELECTMask.push_back(
-            Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
-                        : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL,
-                                          MVT::i8));
+            Mask[i] < 0
+                ? DAG.getUNDEF(MVT::i8)
+                : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, MVT::i8));
 
     V1 = DAG.getBitcast(BlendVT, V1);
     V2 = DAG.getBitcast(BlendVT, V2);
@@ -10897,9 +10927,11 @@
 
 /// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
 /// permuting the elements of the result in place.
-static SDValue lowerShuffleAsByteRotateAndPermute(
-    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+static SDValue lowerShuffleAsByteRotateAndPermute(const SDLoc &DL, MVT VT,
+                                                  SDValue V1, SDValue V2,
+                                                  ArrayRef<int> Mask,
+                                                  const X86Subtarget &Subtarget,
+                                                  SelectionDAG &DAG) {
   if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||
       (VT.is256BitVector() && !Subtarget.hasAVX2()) ||
       (VT.is512BitVector() && !Subtarget.hasBWI()))
@@ -11012,18 +11044,18 @@
   // pre-shuffle first is a better strategy.
   if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) {
     // Only prefer immediate blends to unpack/rotate.
-    if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
-                                                          DAG, true))
+    if (SDValue BlendPerm =
+            lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG, true))
       return BlendPerm;
-    if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask,
-                                                           DAG))
+    if (SDValue UnpackPerm =
+            lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG))
       return UnpackPerm;
     if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute(
             DL, VT, V1, V2, Mask, Subtarget, DAG))
       return RotatePerm;
     // Unpack/rotate failed - try again with variable blends.
-    if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
-                                                          DAG))
+    if (SDValue BlendPerm =
+            lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
       return BlendPerm;
   }
 
@@ -11049,7 +11081,7 @@
   SDValue Lo, Hi;
   for (int i = 0; i < NumElts; ++i) {
     int M = Mask[i];
-    assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) &&
+    assert((M == SM_SentinelUndef || (0 <= M && M < (2 * NumElts))) &&
            "Unexpected mask index.");
     if (M < 0)
       continue;
@@ -11171,8 +11203,7 @@
          "Rotate-based lowering only supports 128-bit lowering!");
   assert(Mask.size() <= 16 &&
          "Can shuffle at most 16 bytes in a 128-bit vector!");
-  assert(ByteVT == MVT::v16i8 &&
-         "SSE2 rotate lowering only needed for v16i8!");
+  assert(ByteVT == MVT::v16i8 && "SSE2 rotate lowering only needed for v16i8!");
 
   // Default SSE2 implementation
   int LoByteShift = 16 - ByteRotation;
@@ -11204,8 +11235,9 @@
          "Only 32-bit and 64-bit elements are supported!");
 
   // 128/256-bit vectors are only supported with VLX.
-  assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector()))
-         && "VLX required for 128/256-bit vectors");
+  assert(
+      (Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) &&
+      "VLX required for 128/256-bit vectors");
 
   SDValue Lo = V1, Hi = V2;
   int Rotation = matchShuffleAsRotate(Lo, Hi, Mask);
@@ -11592,7 +11624,7 @@
     MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
                                  NumElements / Scale);
     InputV = ShuffleOffset(InputV);
-    InputV = getExtendInVec(/*Signed*/false, DL, ExtVT, InputV, DAG);
+    InputV = getExtendInVec(/*Signed*/ false, DL, ExtVT, InputV, DAG);
     return DAG.getBitcast(VT, InputV);
   }
 
@@ -11708,8 +11740,7 @@
 /// are both incredibly common and often quite performance sensitive.
 static SDValue lowerShuffleAsZeroOrAnyExtend(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const APInt &Zeroable, const X86Subtarget &Subtarget,
-    SelectionDAG &DAG) {
+    const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   int Bits = VT.getSizeInBits();
   int NumLanes = Bits / 128;
   int NumElements = VT.getVectorNumElements();
@@ -11834,7 +11865,8 @@
   // If the bitcasts shift the element size, we can't extract an equivalent
   // element from it.
   MVT NewVT = V.getSimpleValueType();
-  if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
+  if (!NewVT.isVector() ||
+      NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
     return SDValue();
 
   if (V.getOpcode() == ISD::BUILD_VECTOR ||
@@ -11864,8 +11896,7 @@
 /// across all subtarget feature sets.
 static SDValue lowerShuffleAsElementInsertion(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const APInt &Zeroable, const X86Subtarget &Subtarget,
-    SelectionDAG &DAG) {
+    const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   MVT ExtVT = VT;
   MVT EltVT = VT.getVectorElementType();
 
@@ -11884,8 +11915,8 @@
   // all the smarts here sunk into that routine. However, the current
   // lowering of BUILD_VECTOR makes that nearly impossible until the old
   // vector shuffle lowering is dead.
-  SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
-                                               DAG);
+  SDValue V2S =
+      getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), DAG);
   if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
     // We need to zext the scalar if it is smaller than an i32.
     V2S = DAG.getBitcast(EltVT, V2S);
@@ -12077,8 +12108,8 @@
   NewMask.append(NumElts, -1);
 
   // shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0
-  SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT),
-                                      NewMask);
+  SDValue Shuf =
+      DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), NewMask);
   // This is free: ymm -> xmm.
   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf,
                      DAG.getIntPtrConstant(0, DL));
@@ -12282,8 +12313,8 @@
 // elements are zeroable.
 static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2,
                                    unsigned &InsertPSMask,
-                                   const APInt &Zeroable,
-                                   ArrayRef<int> Mask, SelectionDAG &DAG) {
+                                   const APInt &Zeroable, ArrayRef<int> Mask,
+                                   SelectionDAG &DAG) {
   assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
   assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -12393,13 +12424,14 @@
 /// because for floating point vectors we have a generalized SHUFPS lowering
 /// strategy that handles everything that doesn't *exactly* match an unpack,
 /// making this clever lowering unnecessary.
-static SDValue lowerShuffleAsPermuteAndUnpack(
-    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
+                                              SDValue V1, SDValue V2,
+                                              ArrayRef<int> Mask,
+                                              const X86Subtarget &Subtarget,
+                                              SelectionDAG &DAG) {
   assert(!VT.isFloatingPoint() &&
          "This routine only supports integer vectors.");
-  assert(VT.is128BitVector() &&
-         "This routine only works on 128-bit vectors.");
+  assert(VT.is128BitVector() && "This routine only works on 128-bit vectors.");
   assert(!V2.isUndef() &&
          "This routine should only be used when blending two inputs.");
   assert(Mask.size() >= 2 && "Single element masks are invalid.");
@@ -12447,7 +12479,8 @@
     V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
 
     // Cast the inputs to the type we will use to unpack them.
-    MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale);
+    MVT UnpackVT =
+        MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale);
     V1 = DAG.getBitcast(UnpackVT, V1);
     V2 = DAG.getBitcast(UnpackVT, V2);
 
@@ -12493,8 +12526,9 @@
           2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1);
     }
     return DAG.getVectorShuffle(
-        VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL,
-                            DL, VT, V1, V2),
+        VT, DL,
+        DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, DL, VT,
+                    V1, V2),
         DAG.getUNDEF(VT), PermMask);
   }
 
@@ -12833,8 +12867,8 @@
   // when the V2 input is targeting element 0 of the mask -- that is the fast
   // case here.
   if (NumV2Elements == 1 && Mask[0] >= 4)
-    if (SDValue V = lowerShuffleAsElementInsertion(
-            DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+    if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v4f32, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
       return V;
 
   if (Subtarget.hasSSE41()) {
@@ -12847,8 +12881,8 @@
       return V;
 
     if (!isSingleSHUFPSMask(Mask))
-      if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1,
-                                                            V2, Mask, DAG))
+      if (SDValue BlendPerm =
+              lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, V2, Mask, DAG))
         return BlendPerm;
   }
 
@@ -12923,8 +12957,8 @@
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Elements == 1)
-    if (SDValue V = lowerShuffleAsElementInsertion(
-            DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+    if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v4i32, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
       return V;
 
   // We have different paths for blend lowering, but they all must use the
@@ -13056,7 +13090,7 @@
   };
 
   if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) {
-    int PSHUFDMask[4] = { -1, -1, -1, -1 };
+    int PSHUFDMask[4] = {-1, -1, -1, -1};
     SmallVector<std::pair<int, int>, 4> DWordPairs;
     int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);
 
@@ -13152,7 +13186,8 @@
     int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
     int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
     int TripleNonInputIdx =
-        TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
+        TripleInputSum -
+        std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
     TripleDWord = TripleNonInputIdx / 2;
 
     // We use xor with one to compute the adjacent DWord to whichever one the
@@ -13232,9 +13267,9 @@
 
     // Adjust the mask to match the new locations of A and B.
     for (int &M : Mask)
-      if (M >= 0 && M/2 == ADWord)
+      if (M >= 0 && M / 2 == ADWord)
         M = 2 * BDWord + M % 2;
-      else if (M >= 0 && M/2 == BDWord)
+      else if (M >= 0 && M / 2 == BDWord)
         M = 2 * ADWord + M % 2;
 
     // Recurse back into this routine to re-compute state now that this isn't
@@ -13258,10 +13293,11 @@
   // First fix the masks for all the inputs that are staying in their
   // original halves. This will then dictate the targets of the cross-half
   // shuffles.
-  auto fixInPlaceInputs =
-      [&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
-                    MutableArrayRef<int> SourceHalfMask,
-                    MutableArrayRef<int> HalfMask, int HalfOffset) {
+  auto fixInPlaceInputs = [&PSHUFDMask](ArrayRef<int> InPlaceInputs,
+                                        ArrayRef<int> IncomingInputs,
+                                        MutableArrayRef<int> SourceHalfMask,
+                                        MutableArrayRef<int> HalfMask,
+                                        int HalfOffset) {
     if (InPlaceInputs.empty())
       return;
     if (InPlaceInputs.size() == 1) {
@@ -13297,10 +13333,12 @@
   // FIXME: This operation could almost certainly be simplified dramatically to
   // look more like the 3-1 fixing operation.
   auto moveInputsToRightHalf = [&PSHUFDMask](
-      MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
-      MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
-      MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
-      int DestOffset) {
+                                   MutableArrayRef<int> IncomingInputs,
+                                   ArrayRef<int> ExistingInputs,
+                                   MutableArrayRef<int> SourceHalfMask,
+                                   MutableArrayRef<int> HalfMask,
+                                   MutableArrayRef<int> FinalSourceHalfMask,
+                                   int SourceOffset, int DestOffset) {
     auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
       return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word;
     };
@@ -13497,9 +13535,11 @@
 
 /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
 /// blend if only one input is used.
-static SDValue lowerShuffleAsBlendOfPSHUFBs(
-    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
+static SDValue lowerShuffleAsBlendOfPSHUFBs(const SDLoc &DL, MVT VT, SDValue V1,
+                                            SDValue V2, ArrayRef<int> Mask,
+                                            const APInt &Zeroable,
+                                            SelectionDAG &DAG, bool &V1InUse,
+                                            bool &V2InUse) {
   assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
          "Lane crossing shuffle masks not supported");
 
@@ -13592,8 +13632,8 @@
       return V;
 
     // Use dedicated pack instructions for masks that match their pattern.
-    if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
-                                         Subtarget))
+    if (SDValue V =
+            lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, Subtarget))
       return V;
 
     // Try to use byte rotation instructions.
@@ -13618,14 +13658,14 @@
 
   // See if we can use SSE4A Extraction / Insertion.
   if (Subtarget.hasSSE4A())
-    if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
-                                          Zeroable, DAG))
+    if (SDValue V =
+            lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, Zeroable, DAG))
       return V;
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Inputs == 1)
-    if (SDValue V = lowerShuffleAsElementInsertion(
-            DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+    if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
       return V;
 
   // We have different paths for blend lowering, but they all must use the
@@ -13645,8 +13685,8 @@
     return V;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
-                                       Subtarget))
+  if (SDValue V =
+          lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, Subtarget))
     return V;
 
   // Try to use byte rotation instructions.
@@ -13672,14 +13712,14 @@
   // can both shuffle and set up the inefficient blend.
   if (!IsBlendSupported && Subtarget.hasSSSE3()) {
     bool V1InUse, V2InUse;
-    return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
-                                        Zeroable, DAG, V1InUse, V2InUse);
+    return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
+                                        DAG, V1InUse, V2InUse);
   }
 
   // We can always bit-blend if we have to so the fallback strategy is to
   // decompose into single-input permutes and blends.
-  return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
-                                              Mask, Subtarget, DAG);
+  return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, Mask,
+                                              Subtarget, DAG);
 }
 
 /// Check whether a compaction lowering can be done by dropping even
@@ -13749,8 +13789,8 @@
 }
 
 static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,
-                                     ArrayRef<int> Mask, SDValue V1,
-                                     SDValue V2, SelectionDAG &DAG) {
+                                     ArrayRef<int> Mask, SDValue V1, SDValue V2,
+                                     SelectionDAG &DAG) {
   MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
   MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
 
@@ -13787,8 +13827,8 @@
     return Rotate;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG,
-                                       Subtarget))
+  if (SDValue V =
+          lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, Subtarget))
     return V;
 
   // Try to use a zext lowering.
@@ -13798,8 +13838,8 @@
 
   // See if we can use SSE4A Extraction / Insertion.
   if (Subtarget.hasSSE4A())
-    if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
-                                          Zeroable, DAG))
+    if (SDValue V =
+            lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG))
       return V;
 
   int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
@@ -13851,7 +13891,7 @@
       int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
       SmallDenseMap<int, int, 8> LaneMap;
       for (int I : InPlaceInputs) {
-        PreDupI16Shuffle[I/2] = I/2;
+        PreDupI16Shuffle[I / 2] = I / 2;
         LaneMap[I] = I;
       }
       int j = TargetLo ? 0 : 4, je = j + 4;
@@ -13865,7 +13905,8 @@
             ++j;
 
           if (j == je)
-            // We can't place the inputs into a single half with a simple i16 shuffle, so bail.
+            // We can't place the inputs into a single half with a simple i16
+            // shuffle, so bail.
             return SDValue();
 
           // Map this input with the i16 shuffle.
@@ -13964,8 +14005,8 @@
 
       // Use PALIGNR+Permute if possible - permute might become PSHUFB but the
       // PALIGNR will be cheaper than the second PSHUFB+OR.
-      if (SDValue V = lowerShuffleAsByteRotateAndPermute(
-              DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+      if (SDValue V = lowerShuffleAsByteRotateAndPermute(DL, MVT::v16i8, V1, V2,
+                                                         Mask, Subtarget, DAG))
         return V;
     }
 
@@ -13974,8 +14015,8 @@
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Elements == 1)
-    if (SDValue V = lowerShuffleAsElementInsertion(
-            DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+    if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v16i8, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
       return V;
 
   if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
@@ -13999,7 +14040,7 @@
            "No support for dropping even elements more than 3 times.");
     // We use the mask type to pick which bytes are preserved based on how many
     // elements are dropped.
-    MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
+    MVT MaskVTs[] = {MVT::v8i16, MVT::v4i32, MVT::v2i64};
     SDValue ByteClearMask = DAG.getBitcast(
         MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
     V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
@@ -14056,8 +14097,8 @@
       if (M >= 0)
         M /= 2;
   } else {
-    // Otherwise just unpack the low half of V into VLoHalf and the high half into
-    // VHiHalf so that we can blend them as i16s.
+    // Otherwise just unpack the low half of V into VLoHalf and the high half
+    // into VHiHalf so that we can blend them as i16s.
     SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
 
     VLoHalf = DAG.getBitcast(
@@ -14066,8 +14107,10 @@
         MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
   }
 
-  SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
-  SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
+  SDValue LoV =
+      DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
+  SDValue HiV =
+      DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
 
   return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
 }
@@ -14076,9 +14119,8 @@
 ///
 /// This routine breaks down the specific type of 128-bit shuffle and
 /// dispatches to the lowering routines accordingly.
-static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                  MVT VT, SDValue V1, SDValue V2,
-                                  const APInt &Zeroable,
+static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+                                  SDValue V1, SDValue V2, const APInt &Zeroable,
                                   const X86Subtarget &Subtarget,
                                   SelectionDAG &DAG) {
   switch (VT.SimpleTy) {
@@ -14197,8 +14239,7 @@
 
     SDValue V1Blend, V2Blend;
     if (UseLoV1 && UseHiV1) {
-      V1Blend =
-        DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
+      V1Blend = DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
     } else {
       // We only use half of V1 so map the usage down into the final blend mask.
       V1Blend = UseLoV1 ? LoV1 : HiV1;
@@ -14207,8 +14248,7 @@
           BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
     }
     if (UseLoV2 && UseHiV2) {
-      V2Blend =
-        DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
+      V2Blend = DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
     } else {
       // We only use half of V2 so map the usage down into the final blend mask.
       V2Blend = UseLoV2 ? LoV2 : HiV2;
@@ -14236,7 +14276,7 @@
                                           const X86Subtarget &Subtarget,
                                           SelectionDAG &DAG) {
   assert(!V2.isUndef() && "This routine must not be used to lower single-input "
-         "shuffles as it could then recurse on itself.");
+                          "shuffles as it could then recurse on itself.");
   int Size = Mask.size();
 
   // If this can be modeled as a broadcast of two elements followed by a blend,
@@ -14259,8 +14299,8 @@
     return true;
   };
   if (DoBothBroadcast())
-    return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
-                                                Subtarget, DAG);
+    return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget,
+                                                DAG);
 
   // If the inputs all stem from a single 128-bit lane of each input, then we
   // split them rather than blending because the split will decompose to
@@ -14387,16 +14427,17 @@
   SmallVector<int, 32> FlippedBlendMask(Size);
   for (int i = 0; i < Size; ++i)
     FlippedBlendMask[i] =
-        Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
-                                ? Mask[i]
-                                : Mask[i] % LaneSize +
-                                      (i / LaneSize) * LaneSize + Size);
+        Mask[i] < 0
+            ? -1
+            : (((Mask[i] % Size) / LaneSize == i / LaneSize)
+                   ? Mask[i]
+                   : Mask[i] % LaneSize + (i / LaneSize) * LaneSize + Size);
 
   // Flip the vector, and blend the results which should now be in-lane.
   MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
   SDValue Flipped = DAG.getBitcast(PVT, V1);
-  Flipped = DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT),
-                                 { 2, 3, 0, 1 });
+  Flipped =
+      DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1});
   Flipped = DAG.getBitcast(VT, Flipped);
   return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
 }
@@ -14433,8 +14474,8 @@
   // instruction bytes needed to explicitly generate the zero vector.
 
   // Blends are faster and handle all the non-lane-crossing cases.
-  if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,
-                                          Subtarget, DAG))
+  if (SDValue Blend =
+          lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return Blend;
 
   // If either input operand is a zero vector, use VPERM2X128 because its mask
@@ -14449,9 +14490,9 @@
       // this will likely become vinsertf128 which can't fold a 256-bit memop.
       if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) {
         MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
-        SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
-                                     OnlyUsesV1 ? V1 : V2,
-                                     DAG.getIntPtrConstant(0, DL));
+        SDValue SubVec =
+            DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2,
+                        DAG.getIntPtrConstant(0, DL));
         return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec,
                            DAG.getIntPtrConstant(2, DL));
       }
@@ -14460,10 +14501,10 @@
     // Try to use SHUF128 if possible.
     if (Subtarget.hasVLX()) {
       if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
-        unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
-                            ((WidenedMask[1] % 2) << 1);
-      return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
-                         DAG.getConstant(PermMask, DL, MVT::i8));
+        unsigned PermMask =
+            ((WidenedMask[0] % 2) << 0) | ((WidenedMask[1] % 2) << 1);
+        return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+                           DAG.getConstant(PermMask, DL, MVT::i8));
       }
     }
   }
@@ -14485,7 +14526,7 @@
          (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?");
 
   unsigned PermMask = 0;
-  PermMask |= IsLowZero  ? 0x08 : (WidenedMask[0] << 0);
+  PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
   PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4);
 
   // Check the immediate mask and replace unused sources with undef.
@@ -14522,7 +14563,7 @@
   // First pass will try to fill in the RepeatMask from lanes that need two
   // sources.
   for (int Lane = 0; Lane != NumLanes; ++Lane) {
-    int Srcs[2] = { -1, -1 };
+    int Srcs[2] = {-1, -1};
     SmallVector<int, 16> InLaneMask(LaneSize, -1);
     for (int i = 0; i != LaneSize; ++i) {
       int M = Mask[(Lane * LaneSize) + i];
@@ -14673,9 +14714,9 @@
 /// adjusted to access the extracted halves of the original shuffle operands is
 /// returned in HalfMask. HalfIdx1 and HalfIdx2 return whether the upper or
 /// lower half of each input operand is accessed.
-static bool
-getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
-                   int &HalfIdx1, int &HalfIdx2) {
+static bool getHalfShuffleMask(ArrayRef<int> Mask,
+                               MutableArrayRef<int> HalfMask, int &HalfIdx1,
+                               int &HalfIdx2) {
   assert((Mask.size() == HalfMask.size() * 2) &&
          "Expected input mask to be twice as long as output");
 
@@ -15090,7 +15131,7 @@
 static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
                                       ArrayRef<int> Mask, SDValue V1,
                                       SDValue V2, SelectionDAG &DAG) {
-  assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&&
+  assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) &&
          "Unexpected data type for VSHUFPD");
 
   unsigned Immediate = 0;
@@ -15204,8 +15245,8 @@
                                                 Subtarget, DAG);
 
   // Otherwise fall back on generic lowering.
-  return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
-                                    Subtarget, DAG);
+  return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget,
+                                    DAG);
 }
 
 /// Handle lowering of 4-lane 64-bit integer shuffles.
@@ -15230,8 +15271,8 @@
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask,
-                                                  Subtarget, DAG))
+  if (SDValue Broadcast =
+          lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
     return Broadcast;
 
   if (V2.isUndef()) {
@@ -15261,8 +15302,8 @@
 
   // If we have VLX support, we can use VALIGN or VEXPAND.
   if (Subtarget.hasVLX()) {
-    if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask,
-                                              Subtarget, DAG))
+    if (SDValue Rotate =
+            lowerShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
       return Rotate;
 
     if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2,
@@ -15323,8 +15364,8 @@
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask,
-                                                  Subtarget, DAG))
+  if (SDValue Broadcast =
+          lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
     return Broadcast;
 
   // If the shuffle mask is repeated in each 128-bit lane, we have many more
@@ -15370,8 +15411,8 @@
       return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
 
     // Otherwise, fall back.
-    return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
-                                             DAG, Subtarget);
+    return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, DAG,
+                                             Subtarget);
   }
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
@@ -15401,8 +15442,8 @@
                                                 Subtarget, DAG);
 
   // Otherwise fall back on generic lowering.
-  return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
-                                    Subtarget, DAG);
+  return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget,
+                                    DAG);
 }
 
 /// Handle lowering of 8-lane 32-bit integer shuffles.
@@ -15439,8 +15480,8 @@
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask,
-                                                  Subtarget, DAG))
+  if (SDValue Broadcast =
+          lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
     return Broadcast;
 
   // If the shuffle mask is repeated in each 128-bit lane we can use more
@@ -15467,8 +15508,8 @@
 
   // If we have VLX support, we can use VALIGN or EXPAND.
   if (Subtarget.hasVLX()) {
-    if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask,
-                                              Subtarget, DAG))
+    if (SDValue Rotate =
+            lowerShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
       return Rotate;
 
     if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2,
@@ -15550,8 +15591,8 @@
     return V;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG,
-                                       Subtarget))
+  if (SDValue V =
+          lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, Subtarget))
     return V;
 
   // Try to use shift instructions.
@@ -15587,8 +15628,8 @@
       // As this is a single-input shuffle, the repeated mask should be
       // a strictly valid v8i16 mask that we can pass through to the v8i16
       // lowering to handle even the v16 case.
-      return lowerV8I16GeneralSingleInputShuffle(
-          DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
+      return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v16i16, V1,
+                                                 RepeatedMask, Subtarget, DAG);
     }
   }
 
@@ -15607,13 +15648,13 @@
     return Result;
 
   // Try to permute the lanes and then use a per-lane permute.
-  if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
-          DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+  if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v16i16, V1, V2,
+                                                      Mask, DAG, Subtarget))
     return V;
 
   // Otherwise fall back on generic lowering.
-  return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
-                                    Subtarget, DAG);
+  return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, Subtarget,
+                                    DAG);
 }
 
 /// Handle lowering of 32-lane 8-bit integer shuffles.
@@ -15637,8 +15678,8 @@
     return ZExt;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask,
-                                                  Subtarget, DAG))
+  if (SDValue Broadcast =
+          lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
     return Broadcast;
 
   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
@@ -15650,13 +15691,13 @@
     return V;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG,
-                                       Subtarget))
+  if (SDValue V =
+          lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, Subtarget))
     return V;
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
@@ -15673,8 +15714,8 @@
   // There are no generalized cross-lane shuffle operations available on i8
   // element types.
   if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
-    if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
-            DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+    if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v32i8, V1, V2,
+                                                        Mask, DAG, Subtarget))
       return V;
 
     return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG,
@@ -15696,13 +15737,13 @@
     return Result;
 
   // Try to permute the lanes and then use a per-lane permute.
-  if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
-          DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+  if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v32i8, V1, V2,
+                                                      Mask, DAG, Subtarget))
     return V;
 
   // Otherwise fall back on generic lowering.
-  return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
-                                    Subtarget, DAG);
+  return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, Subtarget,
+                                    DAG);
 }
 
 /// High-level routine to lower various 256-bit x86 vector shuffles.
@@ -15805,14 +15846,13 @@
 
   // Check for patterns which can be matched with a single insert of a 256-bit
   // subvector.
-  bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
-                                        {0, 1, 2, 3, 0, 1, 2, 3});
-  if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
-                                        {0, 1, 2, 3, 8, 9, 10, 11})) {
+  bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 0, 1, 2, 3});
+  if (OnlyUsesV1 ||
+      isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 8, 9, 10, 11})) {
     MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
-    SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
-                                 OnlyUsesV1 ? V1 : V2,
-                              DAG.getIntPtrConstant(0, DL));
+    SDValue SubVec =
+        DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2,
+                    DAG.getIntPtrConstant(0, DL));
     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec,
                        DAG.getIntPtrConstant(4, DL));
   }
@@ -15966,15 +16006,14 @@
 
   // If we have a single input shuffle with different shuffle patterns in the
   // 128-bit lanes and don't lane cross, use variable mask VPERMILPS.
-  if (V2.isUndef() &&
-      !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) {
+  if (V2.isUndef() && !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) {
     SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true);
     return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask);
   }
 
   // If we have AVX512F support, we can use VEXPAND.
-  if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
-                                             V1, V2, DAG, Subtarget))
+  if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, V1, V2,
+                                       DAG, Subtarget))
     return V;
 
   return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
@@ -16020,8 +16059,8 @@
     return Shift;
 
   // Try to use VALIGN.
-  if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask,
-                                            Subtarget, DAG))
+  if (SDValue Rotate =
+          lowerShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask, Subtarget, DAG))
     return Rotate;
 
   // Try to use PALIGNR.
@@ -16082,8 +16121,8 @@
     return Shift;
 
   // Try to use VALIGN.
-  if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask,
-                                            Subtarget, DAG))
+  if (SDValue Rotate =
+          lowerShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
     return Rotate;
 
   // Try to use byte rotation instructions.
@@ -16149,13 +16188,13 @@
       // As this is a single-input shuffle, the repeated mask should be
       // a strictly valid v8i16 mask that we can pass through to the v8i16
       // lowering to handle even the v32 case.
-      return lowerV8I16GeneralSingleInputShuffle(
-          DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG);
+      return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v32i16, V1,
+                                                 RepeatedMask, Subtarget, DAG);
     }
   }
 
   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
   if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2,
@@ -16178,8 +16217,8 @@
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
-          DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v64i8, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
     return ZExt;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -16187,8 +16226,8 @@
     return V;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
-                                       Subtarget))
+  if (SDValue V =
+          lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, Subtarget))
     return V;
 
   // Try to use shift instructions.
@@ -16235,13 +16274,11 @@
 /// This routine either breaks down the specific type of a 512-bit x86 vector
 /// shuffle or splits it into two 256-bit shuffles and fuses the results back
 /// together based on the available instructions.
-static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                  MVT VT, SDValue V1, SDValue V2,
-                                  const APInt &Zeroable,
+static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+                                  SDValue V1, SDValue V2, const APInt &Zeroable,
                                   const X86Subtarget &Subtarget,
                                   SelectionDAG &DAG) {
-  assert(Subtarget.hasAVX512() &&
-         "Cannot lower 512-bit vectors w/ basic ISA!");
+  assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!");
 
   // If we have a single input to the zero element, insert that into V1 if we
   // can do so cheaply.
@@ -16259,8 +16296,8 @@
     return V;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
-                                                  Subtarget, DAG))
+  if (SDValue Broadcast =
+          lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
     return Broadcast;
 
   // Dispatch to each element type for lowering. If we don't have support for
@@ -16318,14 +16355,12 @@
   return -1;
 }
 
-
 // Lower vXi1 vector shuffles.
 // There is no a dedicated instruction on AVX-512 that shuffles the masks.
 // The only way to shuffle bits is to sign-extend the mask vector to SIMD
 // vector, shuffle and then truncate it back.
-static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                MVT VT, SDValue V1, SDValue V2,
-                                const APInt &Zeroable,
+static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+                                SDValue V1, SDValue V2, const APInt &Zeroable,
                                 const X86Subtarget &Subtarget,
                                 SelectionDAG &DAG) {
   assert(Subtarget.hasAVX512() &&
@@ -16350,18 +16385,18 @@
   // not covered by the subvector.
   if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
     MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts);
-    SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT,
-                                  V1, DAG.getIntPtrConstant(0, DL));
+    SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1,
+                                  DAG.getIntPtrConstant(0, DL));
     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
-                       getZeroVector(VT, Subtarget, DAG, DL),
-                       Extract, DAG.getIntPtrConstant(0, DL));
+                       getZeroVector(VT, Subtarget, DAG, DL), Extract,
+                       DAG.getIntPtrConstant(0, DL));
   }
 
   // Try to match KSHIFTs.
   // TODO: Support narrower than legal shifts by widening and extracting.
   if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) {
     unsigned Offset = 0;
-    for (SDValue V : { V1, V2 }) {
+    for (SDValue V : {V1, V2}) {
       unsigned Opcode;
       int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
       if (ShiftAmt >= 0)
@@ -16371,7 +16406,6 @@
     }
   }
 
-
   MVT ExtVT;
   switch (VT.SimpleTy) {
   default:
@@ -16411,8 +16445,8 @@
   int NumElems = VT.getVectorNumElements();
   if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
       (Subtarget.hasDQI() && (NumElems < 32)))
-    return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT),
-                       Shuffle, ISD::SETGT);
+    return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), Shuffle,
+                        ISD::SETGT);
 
   return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
 }
@@ -16526,7 +16560,8 @@
   }
 
   // Check for illegal shuffle mask element index values.
-  int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit;
+  int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2);
+  (void)MaskUpperLimit;
   assert(llvm::all_of(Mask,
                       [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
          "Out of bounds shuffle index");
@@ -16561,8 +16596,8 @@
     // by obfuscating the operands with bitcasts.
     // TODO: Avoid lowering directly from this top-level function: make this
     // a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
-    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
-                                                    Subtarget, DAG))
+    if (SDValue Broadcast =
+            lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
       return Broadcast;
 
     MVT NewEltVT = VT.isFloatingPoint()
@@ -16677,8 +16712,7 @@
     // Build a mask by testing the condition against zero.
     MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
     SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
-                                DAG.getConstant(0, dl, CondVT),
-                                ISD::SETNE);
+                                DAG.getConstant(0, dl, CondVT), ISD::SETNE);
     // Now return a new VSELECT using the mask.
     return DAG.getSelect(dl, VT, Mask, LHS, RHS);
   }
@@ -16745,8 +16779,7 @@
     if (!Op.hasOneUse())
       return SDValue();
     SDNode *User = *Op.getNode()->use_begin();
-    if ((User->getOpcode() != ISD::STORE ||
-         isNullConstant(Op.getOperand(1))) &&
+    if ((User->getOpcode() != ISD::STORE || isNullConstant(Op.getOperand(1))) &&
         (User->getOpcode() != ISD::BITCAST ||
          User->getValueType(0) != MVT::i32))
       return SDValue();
@@ -16800,9 +16833,9 @@
   MVT WideVecVT = VecVT;
   if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
     WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
-    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
-                      DAG.getUNDEF(WideVecVT), Vec,
-                      DAG.getIntPtrConstant(0, dl));
+    Vec =
+        DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+                    DAG.getUNDEF(WideVecVT), Vec, DAG.getIntPtrConstant(0, dl));
   }
 
   // Use kshiftr instruction to move to the lower element.
@@ -16813,9 +16846,8 @@
                      DAG.getIntPtrConstant(0, dl));
 }
 
-SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
-                                           SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                   SelectionDAG &DAG) const {
   SDLoc dl(Op);
   SDValue Vec = Op.getOperand(0);
   MVT VecVT = Vec.getSimpleValueType();
@@ -16850,10 +16882,10 @@
     // |    |  Ports pressure in cycles   |  |
     // |Uops| 1 | 2 - D  |3 -  D  | 4 | 5 |  |
     // ---------------------------------------------------------
-    // |2^  |   | 0.5    | 0.5    |1.0|   |CP| vmovaps xmmword ptr [rsp-0x18], xmm0
-    // |1   |0.5|        |        |   |0.5|  | lea rax, ptr [rsp-0x18]
-    // |1   |   |0.5, 0.5|0.5, 0.5|   |   |CP| mov al, byte ptr [rdi+rax*1]
-    // Total Num Of Uops: 4
+    // |2^  |   | 0.5    | 0.5    |1.0|   |CP| vmovaps xmmword ptr [rsp-0x18],
+    // xmm0 |1   |0.5|        |        |   |0.5|  | lea rax, ptr [rsp-0x18] |1
+    // |   |0.5, 0.5|0.5, 0.5|   |   |CP| mov al, byte ptr [rdi+rax*1] Total Num
+    // Of Uops: 4
 
     return SDValue();
   }
@@ -16933,7 +16965,7 @@
       return Op;
 
     // SHUFPS the element to the lowest double word, then movss.
-    int Mask[4] = { static_cast<int>(IdxVal), -1, -1, -1 };
+    int Mask[4] = {static_cast<int>(IdxVal), -1, -1, -1};
     Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
                        DAG.getIntPtrConstant(0, dl));
@@ -16949,7 +16981,7 @@
     // UNPCKHPD the element to the lowest double word, then movsd.
     // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
     // to a f64mem, the whole operation is folded into a single MOVHPDmr.
-    int Mask[2] = { 1, -1 };
+    int Mask[2] = {1, -1};
     Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
                        DAG.getIntPtrConstant(0, dl));
@@ -16974,9 +17006,10 @@
     unsigned NumElts = VecVT.getVectorNumElements();
     MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
     MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
-    SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
-      DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
-      DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
+    SDValue ExtOp =
+        DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
+                    DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
+                    DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
     return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
   }
 
@@ -17188,9 +17221,9 @@
   MVT WideVecVT = VecVT;
   if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
     WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
-    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
-                      DAG.getUNDEF(WideVecVT), Vec,
-                      DAG.getIntPtrConstant(0, dl));
+    Vec =
+        DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+                    DAG.getUNDEF(WideVecVT), Vec, DAG.getIntPtrConstant(0, dl));
   }
 
   // Shift to the LSB.
@@ -17202,8 +17235,9 @@
 }
 
 // Returns the appropriate wrapper opcode for a global reference.
-unsigned X86TargetLowering::getGlobalWrapperKind(
-    const GlobalValue *GV, const unsigned char OpFlags) const {
+unsigned
+X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV,
+                                        const unsigned char OpFlags) const {
   // References to absolute symbols are never PC-relative.
   if (GV && GV->isAbsoluteSymbolRef())
     return X86ISD::Wrapper;
@@ -17226,8 +17260,8 @@
 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
 // be used to form addressing mode. These wrapped nodes will be selected
 // into MOV32ri.
-SDValue
-X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerConstantPool(SDValue Op,
+                                             SelectionDAG &DAG) const {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -17275,11 +17309,10 @@
   return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
 }
 
-SDValue
-X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
   // Create the TargetBlockAddressAddress node.
-  unsigned char OpFlags =
-    Subtarget.classifyBlockAddressReference();
+  unsigned char OpFlags = Subtarget.classifyBlockAddressReference();
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
   SDLoc dl(Op);
@@ -17369,31 +17402,30 @@
   return Result;
 }
 
-SDValue
-X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerGlobalAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
   return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
 }
 
-static SDValue
-GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
-           SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
-           unsigned char OperandFlags, bool LocalDynamic = false) {
+static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
+                          GlobalAddressSDNode *GA, SDValue *InFlag,
+                          const EVT PtrVT, unsigned ReturnReg,
+                          unsigned char OperandFlags,
+                          bool LocalDynamic = false) {
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDLoc dl(GA);
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
-                                           GA->getValueType(0),
-                                           GA->getOffset(),
-                                           OperandFlags);
+  SDValue TGA = DAG.getTargetGlobalAddress(
+      GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
 
-  X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
-                                           : X86ISD::TLSADDR;
+  X86ISD::NodeType CallType =
+      LocalDynamic ? X86ISD::TLSBASEADDR : X86ISD::TLSADDR;
 
   if (InFlag) {
-    SDValue Ops[] = { Chain,  TGA, *InFlag };
+    SDValue Ops[] = {Chain, TGA, *InFlag};
     Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
   } else {
-    SDValue Ops[]  = { Chain, TGA };
+    SDValue Ops[] = {Chain, TGA};
     Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
   }
 
@@ -17406,36 +17438,35 @@
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
-static SDValue
-LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
-                                const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
+                                               SelectionDAG &DAG,
+                                               const EVT PtrVT) {
   SDValue InFlag;
-  SDLoc dl(GA);  // ? function entry point might be better
-  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
-                                   DAG.getNode(X86ISD::GlobalBaseReg,
-                                               SDLoc(), PtrVT), InFlag);
+  SDLoc dl(GA); // ? function entry point might be better
+  SDValue Chain = DAG.getCopyToReg(
+      DAG.getEntryNode(), dl, X86::EBX,
+      DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
   InFlag = Chain.getValue(1);
 
   return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
-static SDValue
-LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
-                                const EVT PtrVT) {
-  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
-                    X86::RAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
+                                               SelectionDAG &DAG,
+                                               const EVT PtrVT) {
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
+                    X86II::MO_TLSGD);
 }
 
 static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
-                                           SelectionDAG &DAG,
-                                           const EVT PtrVT,
+                                           SelectionDAG &DAG, const EVT PtrVT,
                                            bool is64Bit) {
   SDLoc dl(GA);
 
   // Get the start address of the TLS block for this module.
-  X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
-      .getInfo<X86MachineFunctionInfo>();
+  X86MachineFunctionInfo *MFI =
+      DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
   MFI->incNumLocalDynamicTLSAccesses();
 
   SDValue Base;
@@ -17444,7 +17475,8 @@
                       X86II::MO_TLSLD, /*LocalDynamic=*/true);
   } else {
     SDValue InFlag;
-    SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+    SDValue Chain = DAG.getCopyToReg(
+        DAG.getEntryNode(), dl, X86::EBX,
         DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
     InFlag = Chain.getValue(1);
     Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
@@ -17457,9 +17489,8 @@
   // Build x@dtpoff.
   unsigned char OperandFlags = X86II::MO_DTPOFF;
   unsigned WrapperKind = X86ISD::Wrapper;
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
-                                           GA->getValueType(0),
-                                           GA->getOffset(), OperandFlags);
+  SDValue TGA = DAG.getTargetGlobalAddress(
+      GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
   SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
 
   // Add x@dtpoff with the base.
@@ -17473,8 +17504,8 @@
   SDLoc dl(GA);
 
   // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
-  Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
-                                                         is64Bit ? 257 : 256));
+  Value *Ptr = Constant::getNullValue(
+      Type::getInt8PtrTy(*DAG.getContext(), is64Bit ? 257 : 256));
 
   SDValue ThreadPointer =
       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
@@ -17500,9 +17531,8 @@
   // emit "addl x@ntpoff,%eax" (local exec)
   // or "addl x@indntpoff,%eax" (initial exec)
   // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
-  SDValue TGA =
-      DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
-                                 GA->getOffset(), OperandFlags);
+  SDValue TGA = DAG.getTargetGlobalAddress(
+      GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
   SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
 
   if (model == TLSModel::InitialExec) {
@@ -17521,8 +17551,8 @@
   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
 }
 
-SDValue
-X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+                                                 SelectionDAG &DAG) const {
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
 
@@ -17536,17 +17566,16 @@
   if (Subtarget.isTargetELF()) {
     TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
     switch (model) {
-      case TLSModel::GeneralDynamic:
-        if (Subtarget.is64Bit())
-          return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
-        return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
-      case TLSModel::LocalDynamic:
-        return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
-                                           Subtarget.is64Bit());
-      case TLSModel::InitialExec:
-      case TLSModel::LocalExec:
-        return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
-                                   PositionIndependent);
+    case TLSModel::GeneralDynamic:
+      if (Subtarget.is64Bit())
+        return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+      return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
+    case TLSModel::LocalDynamic:
+      return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit());
+    case TLSModel::InitialExec:
+    case TLSModel::LocalExec:
+      return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
+                                 PositionIndependent);
     }
     llvm_unreachable("Unknown TLS model.");
   }
@@ -17554,8 +17583,8 @@
   if (Subtarget.isTargetDarwin()) {
     // Darwin only has one model of TLS.  Lower to that.
     unsigned char OpFlag = 0;
-    unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ?
-                           X86ISD::WrapperRIP : X86ISD::Wrapper;
+    unsigned WrapperKind =
+        Subtarget.isPICStyleRIPRel() ? X86ISD::WrapperRIP : X86ISD::Wrapper;
 
     // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
     // global base reg.
@@ -17565,9 +17594,8 @@
     else
       OpFlag = X86II::MO_TLVP;
     SDLoc DL(Op);
-    SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
-                                                GA->getValueType(0),
-                                                GA->getOffset(), OpFlag);
+    SDValue Result = DAG.getTargetGlobalAddress(
+        GA->getGlobal(), DL, GA->getValueType(0), GA->getOffset(), OpFlag);
     SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);
 
     // With PIC32, the address is actually $g + Offset.
@@ -17581,7 +17609,7 @@
     SDValue Chain = DAG.getEntryNode();
     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
     Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
-    SDValue Args[] = { Chain, Offset };
+    SDValue Args[] = {Chain, Offset};
     Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
     Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
                                DAG.getIntPtrConstant(0, DL, true),
@@ -17598,8 +17626,7 @@
   }
 
   if (Subtarget.isTargetKnownWindowsMSVC() ||
-      Subtarget.isTargetWindowsItanium() ||
-      Subtarget.isTargetWindowsGNU()) {
+      Subtarget.isTargetWindowsItanium() || Subtarget.isTargetWindowsGNU()) {
     // Just use the implicit TLS architecture
     // Need to generate something similar to:
     //   mov     rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -17617,11 +17644,9 @@
     // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
     // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
     // use its literal value of 0x2C.
-    Value *Ptr = Constant::getNullValue(Subtarget.is64Bit()
-                                        ? Type::getInt8PtrTy(*DAG.getContext(),
-                                                             256)
-                                        : Type::getInt32PtrTy(*DAG.getContext(),
-                                                              257));
+    Value *Ptr = Constant::getNullValue(
+        Subtarget.is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256)
+                            : Type::getInt32PtrTy(*DAG.getContext(), 257));
 
     SDValue TlsArray = Subtarget.is64Bit()
                            ? DAG.getIntPtrConstant(0x58, dl)
@@ -17655,9 +17680,9 @@
     res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo());
 
     // Get the offset of start of .tls section
-    SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
-                                             GA->getValueType(0),
-                                             GA->getOffset(), X86II::MO_SECREL);
+    SDValue TGA =
+        DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+                                   GA->getOffset(), X86II::MO_SECREL);
     SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
 
     // The address of the thread local variable is the add of the thread
@@ -17679,7 +17704,7 @@
   bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
   SDValue ShOpLo = Op.getOperand(0);
   SDValue ShOpHi = Op.getOperand(1);
-  SDValue ShAmt  = Op.getOperand(2);
+  SDValue ShAmt = Op.getOperand(2);
   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away
   // during isel.
@@ -17704,7 +17729,7 @@
   SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
                                 DAG.getConstant(VTBits, dl, MVT::i8));
   SDValue Cond = DAG.getSetCC(dl, MVT::i8, AndNode,
-                             DAG.getConstant(0, dl, MVT::i8), ISD::SETNE);
+                              DAG.getConstant(0, dl, MVT::i8), ISD::SETNE);
 
   SDValue Hi, Lo;
   if (Op.getOpcode() == ISD::SHL_PARTS) {
@@ -17715,7 +17740,7 @@
     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
   }
 
-  return DAG.getMergeValues({ Lo, Hi }, dl);
+  return DAG.getMergeValues({Lo, Hi}, dl);
 }
 
 static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
@@ -17740,12 +17765,12 @@
     APInt APIntShiftAmt;
     if (isConstantSplat(Amt, APIntShiftAmt)) {
       uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
-      return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
-                         Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
+      return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0,
+                         Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
     }
 
-    return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
-                       Op0, Op1, Amt);
+    return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, Op0,
+                       Op1, Amt);
   }
 
   assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -17773,13 +17798,14 @@
 static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
                                         const X86Subtarget &Subtarget) {
   assert((Op.getOpcode() == ISD::SINT_TO_FP ||
-          Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!");
+          Op.getOpcode() == ISD::UINT_TO_FP) &&
+         "Unexpected opcode!");
   SDValue Src = Op.getOperand(0);
   MVT SrcVT = Src.getSimpleValueType();
   MVT VT = Op.getSimpleValueType();
 
-   if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
-       (VT != MVT::f32 && VT != MVT::f64))
+  if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
+      (VT != MVT::f32 && VT != MVT::f64))
     return SDValue();
 
   // Pack the i64 into a vector, do the operation and extract.
@@ -17799,22 +17825,22 @@
 static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
                           const X86Subtarget &Subtarget) {
   switch (Opcode) {
-    case ISD::SINT_TO_FP:
-      // TODO: Handle wider types with AVX/AVX512.
-      if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32)
-        return false;
-      // CVTDQ2PS or (V)CVTDQ2PD
-      return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64);
-
-    case ISD::UINT_TO_FP:
-      // TODO: Handle wider types and i64 elements.
-      if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32)
-        return false;
-      // VCVTUDQ2PS or VCVTUDQ2PD
-      return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
-
-    default:
+  case ISD::SINT_TO_FP:
+    // TODO: Handle wider types with AVX/AVX512.
+    if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32)
+      return false;
+    // CVTDQ2PS or (V)CVTDQ2PD
+    return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64);
+
+  case ISD::UINT_TO_FP:
+    // TODO: Handle wider types and i64 elements.
+    if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32)
       return false;
+    // VCVTUDQ2PS or VCVTUDQ2PD
+    return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
+
+  default:
+    return false;
   }
 }
 
@@ -17893,14 +17919,13 @@
     return V;
 
   SDValue ValueToStore = Op.getOperand(0);
-  if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) &&
-      !Subtarget.is64Bit())
+  if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && !Subtarget.is64Bit())
     // Bitcasting to f64 here allows us to do a single 64-bit store from
     // an SSE register, avoiding the store forwarding penalty that would come
     // with two 32-bit stores.
     ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
 
-  unsigned Size = SrcVT.getSizeInBits()/8;
+  unsigned Size = SrcVT.getSizeInBits() / 8;
   MachineFunction &MF = DAG.getMachineFunction();
   auto PtrVT = getPointerTy(MF.getDataLayout());
   int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
@@ -17989,24 +18014,24 @@
   LLVMContext *Context = DAG.getContext();
 
   // Build some magic constants.
-  static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
+  static const uint32_t CV0[] = {0x43300000, 0x45300000, 0, 0};
   Constant *C0 = ConstantDataVector::get(*Context, CV0);
   auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
 
-  SmallVector<Constant*,2> CV1;
+  SmallVector<Constant *, 2> CV1;
   CV1.push_back(
-    ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
-                                      APInt(64, 0x4330000000000000ULL))));
+      ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
+                                        APInt(64, 0x4330000000000000ULL))));
   CV1.push_back(
-    ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
-                                      APInt(64, 0x4530000000000000ULL))));
+      ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
+                                        APInt(64, 0x4530000000000000ULL))));
   Constant *C1 = ConstantVector::get(CV1);
   SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
 
   // Load the 64-bit value into an XMM register.
-  SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
-                            Op.getOperand(0));
+  SDValue XR1 =
+      DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0));
   SDValue CLod0 =
       DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
                   MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
@@ -18027,7 +18052,7 @@
     // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'.
     Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
   } else {
-    SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
+    SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1, -1});
     Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
   }
 
@@ -18040,12 +18065,12 @@
                                    const X86Subtarget &Subtarget) {
   SDLoc dl(Op);
   // FP constant to bias correct the final result.
-  SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
-                                   MVT::f64);
+  SDValue Bias =
+      DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::f64);
 
   // Load the 32-bit value into an XMM register.
-  SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
-                             Op.getOperand(0));
+  SDValue Load =
+      DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(0));
 
   // Zero out the upper parts of the register.
   Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
@@ -18102,7 +18127,7 @@
   SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask);
 
   SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI);
-          fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW);
+  fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW);
   SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO);
 
   // Add the two halves.
@@ -18285,9 +18310,9 @@
       MachineMemOperand::MOLoad, 8, 8);
 
   SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
-  SDValue Ops[] = { Store, StackSlot };
-  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
-                                         MVT::i64, MMO);
+  SDValue Ops[] = {Store, StackSlot};
+  SDValue Fild =
+      DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MMO);
 
   APInt FF(32, 0x5F800000ULL);
 
@@ -18325,9 +18350,8 @@
 // Otherwise it is assumed to be a conversion from one of f32, f64 or f80
 // to i16, i32 or i64, and we lower it to a legal sequence and return the
 // result.
-SDValue
-X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
-                                   bool IsSigned) const {
+SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+                                           bool IsSigned) const {
   SDLoc DL(Op);
 
   EVT DstTy = Op.getValueType();
@@ -18352,8 +18376,7 @@
     DstTy = MVT::i64;
   }
 
-  assert(DstTy.getSimpleVT() <= MVT::i64 &&
-         DstTy.getSimpleVT() >= MVT::i16 &&
+  assert(DstTy.getSimpleVT() <= MVT::i64 && DstTy.getSimpleVT() >= MVT::i16 &&
          "Unknown FP_TO_INT to lower!");
 
   // We lower FP->int64 into FISTP64 followed by a load from a temporary
@@ -18389,8 +18412,8 @@
     bool LosesInfo = false;
     if (TheVT == MVT::f64)
       // The rounding mode is irrelevant as the conversion should be exact.
-      Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
-                              &LosesInfo);
+      Status = Thresh.convert(APFloat::IEEEdouble(),
+                              APFloat::rmNearestTiesToEven, &LosesInfo);
     else if (TheVT == MVT::f80)
       Status = Thresh.convert(APFloat::x87DoubleExtended(),
                               APFloat::rmNearestTiesToEven, &LosesInfo);
@@ -18400,18 +18423,16 @@
 
     SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
 
-    SDValue Cmp = DAG.getSetCC(DL,
-                               getSetCCResultType(DAG.getDataLayout(),
-                                                  *DAG.getContext(), TheVT),
-                               Value, ThreshVal, ISD::SETLT);
-    Adjust = DAG.getSelect(DL, MVT::i64, Cmp,
-                           DAG.getConstant(0, DL, MVT::i64),
-                           DAG.getConstant(APInt::getSignMask(64),
-                                           DL, MVT::i64));
+    SDValue Cmp = DAG.getSetCC(
+        DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT),
+        Value, ThreshVal, ISD::SETLT);
+    Adjust =
+        DAG.getSelect(DL, MVT::i64, Cmp, DAG.getConstant(0, DL, MVT::i64),
+                      DAG.getConstant(APInt::getSignMask(64), DL, MVT::i64));
     SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
-    Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
-                                              *DAG.getContext(), TheVT),
-                       Value, ThreshVal, ISD::SETLT);
+    Cmp = DAG.getSetCC(
+        DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT),
+        Value, ThreshVal, ISD::SETLT);
     Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
   }
 
@@ -18423,7 +18444,7 @@
     assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
     Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI);
     SDVTList Tys = DAG.getVTList(TheVT, MVT::Other);
-    SDValue Ops[] = { Chain, StackSlot };
+    SDValue Ops[] = {Chain, StackSlot};
 
     unsigned FLDSize = TheVT.getStoreSize();
     assert(FLDSize <= MemSize && "Stack slot not big enough");
@@ -18436,10 +18457,9 @@
   // Build the FP_TO_INT*_IN_MEM
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MPI, MachineMemOperand::MOStore, MemSize, MemSize);
-  SDValue Ops[] = { Chain, Value, StackSlot };
-  SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL,
-                                         DAG.getVTList(MVT::Other),
-                                         Ops, DstTy, MMO);
+  SDValue Ops[] = {Chain, Value, StackSlot};
+  SDValue FIST = DAG.getMemIntrinsicNode(
+      X86ISD::FP_TO_INT_IN_MEM, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO);
 
   SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI);
 
@@ -18474,8 +18494,8 @@
     if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
       return SDValue();
 
-    In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
-                     MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
+    In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), MVT::v16i8, In,
+                     DAG.getUNDEF(MVT::v8i8));
     // FIXME: This should be ANY_EXTEND_VECTOR_INREG for ANY_EXTEND input.
     return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, VT, In);
   }
@@ -18530,9 +18550,8 @@
   return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
 }
 
-static  SDValue LowerZERO_EXTEND_Mask(SDValue Op,
-                                      const X86Subtarget &Subtarget,
-                                      SelectionDAG &DAG) {
+static SDValue LowerZERO_EXTEND_Mask(SDValue Op, const X86Subtarget &Subtarget,
+                                     SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
   MVT InVT = In.getSimpleValueType();
@@ -18563,10 +18582,9 @@
   if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) {
     NumElts *= 512 / ExtVT.getSizeInBits();
     InVT = MVT::getVectorVT(MVT::i1, NumElts);
-    In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT),
-                     In, DAG.getIntPtrConstant(0, DL));
-    WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(),
-                              NumElts);
+    In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), In,
+                     DAG.getIntPtrConstant(0, DL));
+    WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts);
   }
 
   SDValue One = DAG.getConstant(1, DL, WideVT);
@@ -18725,14 +18743,12 @@
       if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
         // We need to shift to get the lsb into sign position.
         // Shift packed bytes not supported natively, bitcast to word
-        MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
-        In = DAG.getNode(ISD::SHL, DL, ExtVT,
-                         DAG.getBitcast(ExtVT, In),
+        MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits() / 16);
+        In = DAG.getNode(ISD::SHL, DL, ExtVT, DAG.getBitcast(ExtVT, In),
                          DAG.getConstant(ShiftInx, DL, ExtVT));
         In = DAG.getBitcast(InVT, In);
       }
-      return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),
-                          In, ISD::SETGT);
+      return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);
     }
     // Use TESTD/Q, extended vector to packed dword/qword.
     assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
@@ -18765,7 +18781,8 @@
     // We either have 8 elements or we're allowed to use 512-bit vectors.
     // If we have VLX, we want to use the narrowest vector that can get the
     // job done so we use vXi32.
-    MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts);
+    MVT EltVT =
+        Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512 / NumElts);
     MVT ExtVT = MVT::getVectorVT(EltVT, NumElts);
     In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
     InVT = ExtVT;
@@ -18856,15 +18873,14 @@
       In = DAG.getBitcast(MVT::v32i8, In);
 
       // The PSHUFB mask:
-      static const int ShufMask1[] = { 0,  1,  4,  5,  8,  9, 12, 13,
-                                      -1, -1, -1, -1, -1, -1, -1, -1,
-                                      16, 17, 20, 21, 24, 25, 28, 29,
-                                      -1, -1, -1, -1, -1, -1, -1, -1 };
+      static const int ShufMask1[] = {
+          0,  1,  4,  5,  8,  9,  12, 13, -1, -1, -1, -1, -1, -1, -1, -1,
+          16, 17, 20, 21, 24, 25, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1};
       In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
       In = DAG.getBitcast(MVT::v4i64, In);
 
-      static const int ShufMask2[] = {0,  2,  -1,  -1};
-      In = DAG.getVectorShuffle(MVT::v4i64, DL,  In, In, ShufMask2);
+      static const int ShufMask2[] = {0, 2, -1, -1};
+      In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
       In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
                        DAG.getIntPtrConstant(0, DL));
       return DAG.getBitcast(VT, In);
@@ -18880,8 +18896,8 @@
     OpHi = DAG.getBitcast(MVT::v16i8, OpHi);
 
     // The PSHUFB mask:
-    static const int ShufMask1[] = {0,  1,  4,  5,  8,  9, 12, 13,
-                                   -1, -1, -1, -1, -1, -1, -1, -1};
+    static const int ShufMask1[] = {0,  1,  4,  5,  8,  9,  12, 13,
+                                    -1, -1, -1, -1, -1, -1, -1, -1};
 
     OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1);
     OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1);
@@ -18942,8 +18958,8 @@
         TruncVT = MVT::v8i1;
         Opc = ISD::FP_TO_UINT;
         Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
-                          DAG.getUNDEF(MVT::v8f64),
-                          Src, DAG.getIntPtrConstant(0, dl));
+                          DAG.getUNDEF(MVT::v8f64), Src,
+                          DAG.getIntPtrConstant(0, dl));
       }
       SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
       Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res);
@@ -18952,7 +18968,7 @@
     }
 
     assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
-    if (VT == MVT::v2i64 && SrcVT  == MVT::v2f32) {
+    if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
       return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
                          DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
                                      DAG.getUNDEF(MVT::v2f32)));
@@ -19001,9 +19017,9 @@
 
   assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
 
-  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
-                     DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
-                                 In, DAG.getUNDEF(SVT)));
+  return DAG.getNode(
+      X86ISD::VFPEXT, DL, VT,
+      DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT)));
 }
 
 /// Horizontal vector math instructions may be slower than normal math with
@@ -19046,12 +19062,20 @@
   // TODO: Allow commuted (f)sub by negating the result of (F)HSUB?
   unsigned HOpcode;
   switch (Op.getOpcode()) {
-    case ISD::ADD: HOpcode = X86ISD::HADD; break;
-    case ISD::SUB: HOpcode = X86ISD::HSUB; break;
-    case ISD::FADD: HOpcode = X86ISD::FHADD; break;
-    case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
-    default:
-      llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
+  case ISD::ADD:
+    HOpcode = X86ISD::HADD;
+    break;
+  case ISD::SUB:
+    HOpcode = X86ISD::HSUB;
+    break;
+  case ISD::FADD:
+    HOpcode = X86ISD::FHADD;
+    break;
+  case ISD::FSUB:
+    HOpcode = X86ISD::FHSUB;
+    break;
+  default:
+    llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
   }
   unsigned LExtIndex = LHS.getConstantOperandVal(1);
   unsigned RExtIndex = RHS.getConstantOperandVal(1);
@@ -19136,16 +19160,15 @@
 
   unsigned EltBits = VT.getScalarSizeInBits();
   // For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
-  APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) :
-                           APInt::getSignMask(EltBits);
+  APInt MaskElt =
+      IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
   const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
   SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
 
   SDValue Op0 = Op.getOperand(0);
   bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
-  unsigned LogicOp = IsFABS  ? X86ISD::FAND :
-                     IsFNABS ? X86ISD::FOR  :
-                               X86ISD::FXOR;
+  unsigned LogicOp =
+      IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
   SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
 
   if (VT.isVector() || IsF128)
@@ -19171,7 +19194,8 @@
 
   // And if it is bigger, shrink it first.
   if (Sign.getSimpleValueType().bitsGT(VT))
-    Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl));
+    Sign =
+        DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl));
 
   // At this point the operands and the result should have the same
   // type, and that won't be f80 since that is not custom lowered.
@@ -19222,8 +19246,9 @@
 
   // OR the magnitude value with the sign bit.
   SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit);
-  return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or,
-                                          DAG.getIntPtrConstant(0, dl));
+  return !IsFakeVector ? Or
+                       : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or,
+                                     DAG.getIntPtrConstant(0, dl));
 }
 
 static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
@@ -19384,14 +19409,20 @@
   bool NeedCF = false;
   bool NeedOF = false;
   switch (X86CC) {
-  default: break;
-  case X86::COND_A: case X86::COND_AE:
-  case X86::COND_B: case X86::COND_BE:
+  default:
+    break;
+  case X86::COND_A:
+  case X86::COND_AE:
+  case X86::COND_B:
+  case X86::COND_BE:
     NeedCF = true;
     break;
-  case X86::COND_G: case X86::COND_GE:
-  case X86::COND_L: case X86::COND_LE:
-  case X86::COND_O: case X86::COND_NO: {
+  case X86::COND_G:
+  case X86::COND_GE:
+  case X86::COND_L:
+  case X86::COND_LE:
+  case X86::COND_O:
+  case X86::COND_NO: {
     // Check if we really need to set the
     // Overflow flag. If NoSignedWrap is present
     // that is not actually needed.
@@ -19442,20 +19473,31 @@
     // using an RMW op or only the flags are used. Otherwise, leave
     // the node alone and emit a 'test' instruction.
     for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-           UE = Op.getNode()->use_end(); UI != UE; ++UI)
-      if (UI->getOpcode() != ISD::CopyToReg &&
-          UI->getOpcode() != ISD::SETCC &&
+                              UE = Op.getNode()->use_end();
+         UI != UE; ++UI)
+      if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC &&
           UI->getOpcode() != ISD::STORE)
         goto default_case;
 
     // Otherwise use a regular EFLAGS-setting instruction.
     switch (ArithOp.getOpcode()) {
-    default: llvm_unreachable("unexpected operator!");
-    case ISD::ADD: Opcode = X86ISD::ADD; break;
-    case ISD::SUB: Opcode = X86ISD::SUB; break;
-    case ISD::XOR: Opcode = X86ISD::XOR; break;
-    case ISD::AND: Opcode = X86ISD::AND; break;
-    case ISD::OR:  Opcode = X86ISD::OR;  break;
+    default:
+      llvm_unreachable("unexpected operator!");
+    case ISD::ADD:
+      Opcode = X86ISD::ADD;
+      break;
+    case ISD::SUB:
+      Opcode = X86ISD::SUB;
+      break;
+    case ISD::XOR:
+      Opcode = X86ISD::XOR;
+      break;
+    case ISD::AND:
+      Opcode = X86ISD::AND;
+      break;
+    case ISD::OR:
+      Opcode = X86ISD::OR;
+      break;
     }
 
     NumOperands = 2;
@@ -19496,8 +19538,9 @@
   if (CmpVT.isFloatingPoint())
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
 
-  assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
-          CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
+  assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || CmpVT == MVT::i32 ||
+          CmpVT == MVT::i64) &&
+         "Unexpected VT!");
 
   // Only promote the compare up to I32 if it is a 16 bit operation
   // with an immediate.  16 bit immediates are to be avoided.
@@ -19544,8 +19587,7 @@
                                                  SelectionDAG &DAG) const {
   // If the subtarget does not support the FUCOMI instruction, floating-point
   // comparisons have to be converted.
-  if (Subtarget.hasCMov() ||
-      Cmp.getOpcode() != X86ISD::CMP ||
+  if (Subtarget.hasCMov() || Cmp.getOpcode() != X86ISD::CMP ||
       !Cmp.getOperand(0).getValueType().isFloatingPoint() ||
       !Cmp.getOperand(1).getValueType().isFloatingPoint())
     return Cmp;
@@ -19581,9 +19623,8 @@
 
 /// The minimum architected relative accuracy is 2^-12. We need one
 /// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
-                                           SelectionDAG &DAG, int Enabled,
-                                           int &RefinementSteps,
+SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, SelectionDAG &DAG,
+                                           int Enabled, int &RefinementSteps,
                                            bool &UseOneConstNR,
                                            bool Reciprocal) const {
   EVT VT = Op.getValueType();
@@ -19652,15 +19693,12 @@
 /// This is because we still need one division to calculate the reciprocal and
 /// then we need two multiplies by that reciprocal as replacements for the
 /// original divisions.
-unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
-  return 2;
-}
+unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; }
 
 /// Result of 'and' is compared against zero. Change to a BT node if possible.
 /// Returns the BT node and the condition code needed to use it.
-static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
-                            const SDLoc &dl, SelectionDAG &DAG,
-                            SDValue &X86CC) {
+static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
+                            SelectionDAG &DAG, SDValue &X86CC) {
   assert(And.getOpcode() == ISD::AND && "Expected AND node!");
   SDValue Op0 = And.getOperand(0);
   SDValue Op1 = And.getOperand(1);
@@ -19701,8 +19739,8 @@
       if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
           isPowerOf2_64(AndRHSVal)) {
         Src = AndLHS;
-        BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl,
-                                Src.getValueType());
+        BitNo =
+            DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, Src.getValueType());
       }
     }
   }
@@ -19732,8 +19770,8 @@
   if (Src.getValueType() != BitNo.getValueType())
     BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
 
-  X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
-                          dl, MVT::i8);
+  X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, dl,
+                          MVT::i8);
   return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
 }
 
@@ -19754,27 +19792,56 @@
   //  6 - NLE
   //  7 - ORD
   switch (SetCCOpcode) {
-  default: llvm_unreachable("Unexpected SETCC condition");
+  default:
+    llvm_unreachable("Unexpected SETCC condition");
   case ISD::SETOEQ:
-  case ISD::SETEQ:  SSECC = 0; break;
+  case ISD::SETEQ:
+    SSECC = 0;
+    break;
   case ISD::SETOGT:
-  case ISD::SETGT:  Swap = true; LLVM_FALLTHROUGH;
+  case ISD::SETGT:
+    Swap = true;
+    LLVM_FALLTHROUGH;
   case ISD::SETLT:
-  case ISD::SETOLT: SSECC = 1; break;
+  case ISD::SETOLT:
+    SSECC = 1;
+    break;
   case ISD::SETOGE:
-  case ISD::SETGE:  Swap = true; LLVM_FALLTHROUGH;
+  case ISD::SETGE:
+    Swap = true;
+    LLVM_FALLTHROUGH;
   case ISD::SETLE:
-  case ISD::SETOLE: SSECC = 2; break;
-  case ISD::SETUO:  SSECC = 3; break;
+  case ISD::SETOLE:
+    SSECC = 2;
+    break;
+  case ISD::SETUO:
+    SSECC = 3;
+    break;
   case ISD::SETUNE:
-  case ISD::SETNE:  SSECC = 4; break;
-  case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
-  case ISD::SETUGE: SSECC = 5; break;
-  case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
-  case ISD::SETUGT: SSECC = 6; break;
-  case ISD::SETO:   SSECC = 7; break;
-  case ISD::SETUEQ: SSECC = 8; break;
-  case ISD::SETONE: SSECC = 12; break;
+  case ISD::SETNE:
+    SSECC = 4;
+    break;
+  case ISD::SETULE:
+    Swap = true;
+    LLVM_FALLTHROUGH;
+  case ISD::SETUGE:
+    SSECC = 5;
+    break;
+  case ISD::SETULT:
+    Swap = true;
+    LLVM_FALLTHROUGH;
+  case ISD::SETUGT:
+    SSECC = 6;
+    break;
+  case ISD::SETO:
+    SSECC = 7;
+    break;
+  case ISD::SETUEQ:
+    SSECC = 8;
+    break;
+  case ISD::SETONE:
+    SSECC = 12;
+    break;
   }
   if (Swap)
     std::swap(Op0, Op1);
@@ -19806,7 +19873,7 @@
 
   // Issue the operation on the smaller types and concatenate the result back
   MVT EltVT = VT.getVectorElementType();
-  MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+  MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
                      DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
                      DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
@@ -19975,10 +20042,10 @@
         CombineOpc = X86ISD::FAND;
       }
 
-      SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
-                                 DAG.getConstant(CC0, dl, MVT::i8));
-      SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
-                                 DAG.getConstant(CC1, dl, MVT::i8));
+      SDValue Cmp0 =
+          DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC0, dl, MVT::i8));
+      SDValue Cmp1 =
+          DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC1, dl, MVT::i8));
       Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
     } else {
       // Handle all other FP comparisons here.
@@ -20025,17 +20092,30 @@
     // Translate compare code to XOP PCOM compare mode.
     unsigned CmpMode = 0;
     switch (Cond) {
-    default: llvm_unreachable("Unexpected SETCC condition");
+    default:
+      llvm_unreachable("Unexpected SETCC condition");
     case ISD::SETULT:
-    case ISD::SETLT: CmpMode = 0x00; break;
+    case ISD::SETLT:
+      CmpMode = 0x00;
+      break;
     case ISD::SETULE:
-    case ISD::SETLE: CmpMode = 0x01; break;
+    case ISD::SETLE:
+      CmpMode = 0x01;
+      break;
     case ISD::SETUGT:
-    case ISD::SETGT: CmpMode = 0x02; break;
+    case ISD::SETGT:
+      CmpMode = 0x02;
+      break;
     case ISD::SETUGE:
-    case ISD::SETGE: CmpMode = 0x03; break;
-    case ISD::SETEQ: CmpMode = 0x04; break;
-    case ISD::SETNE: CmpMode = 0x05; break;
+    case ISD::SETGE:
+      CmpMode = 0x03;
+      break;
+    case ISD::SETEQ:
+      CmpMode = 0x04;
+      break;
+    case ISD::SETNE:
+      CmpMode = 0x05;
+      break;
     }
 
     // Are we comparing unsigned or signed integers?
@@ -20131,11 +20211,20 @@
     bool Invert = false;
     unsigned Opc;
     switch (Cond) {
-    default: llvm_unreachable("Unexpected condition code");
-    case ISD::SETUGT: Invert = true; LLVM_FALLTHROUGH;
-    case ISD::SETULE: Opc = ISD::UMIN; break;
-    case ISD::SETULT: Invert = true; LLVM_FALLTHROUGH;
-    case ISD::SETUGE: Opc = ISD::UMAX; break;
+    default:
+      llvm_unreachable("Unexpected condition code");
+    case ISD::SETUGT:
+      Invert = true;
+      LLVM_FALLTHROUGH;
+    case ISD::SETULE:
+      Opc = ISD::UMIN;
+      break;
+    case ISD::SETULT:
+      Invert = true;
+      LLVM_FALLTHROUGH;
+    case ISD::SETUGE:
+      Opc = ISD::UMAX;
+      break;
     }
 
     SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -20157,10 +20246,10 @@
   // operations may be required for some comparisons.
   unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ
                                                             : X86ISD::PCMPGT;
-  bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT ||
-              Cond == ISD::SETGE || Cond == ISD::SETUGE;
-  bool Invert = Cond == ISD::SETNE ||
-                (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
+  bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || Cond == ISD::SETGE ||
+              Cond == ISD::SETUGE;
+  bool Invert =
+      Cond == ISD::SETNE || (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
 
   if (Swap)
     std::swap(Op0, Op1);
@@ -20192,8 +20281,8 @@
       SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
 
       // Create masks for only the low parts/high parts of the 64 bit integers.
-      static const int MaskHi[] = { 1, 1, 3, 3 };
-      static const int MaskLo[] = { 0, 0, 2, 2 };
+      static const int MaskHi[] = {1, 1, 3, 3};
+      static const int MaskLo[] = {0, 0, 2, 2};
       SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
       SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
       SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
@@ -20220,7 +20309,7 @@
       SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
 
       // Make sure the lower and upper halves are both all-ones.
-      static const int Mask[] = { 1, 0, 3, 2 };
+      static const int Mask[] = {1, 0, 3, 2};
       SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
       Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
 
@@ -20235,8 +20324,8 @@
   // bits of the inputs before performing those operations.
   if (FlipSigns) {
     MVT EltVT = VT.getVectorElementType();
-    SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
-                                 VT);
+    SDValue SM =
+        DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT);
     Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
     Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
   }
@@ -20253,8 +20342,7 @@
 // Try to select this as a KORTEST+SETCC if possible.
 static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
                            const SDLoc &dl, SelectionDAG &DAG,
-                           const X86Subtarget &Subtarget,
-                           SDValue &X86CC) {
+                           const X86Subtarget &Subtarget, SDValue &X86CC) {
   // Only support equality comparisons.
   if (CC != ISD::SETEQ && CC != ISD::SETNE)
     return SDValue();
@@ -20354,7 +20442,8 @@
 
   MVT VT = Op.getSimpleValueType();
 
-  if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+  if (VT.isVector())
+    return LowerVSETCC(Op, Subtarget, DAG);
 
   assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
   SDValue Op0 = Op.getOperand(0);
@@ -20370,7 +20459,8 @@
   return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
 }
 
-SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op,
+                                           SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   SDValue Carry = Op.getOperand(2);
@@ -20383,8 +20473,8 @@
   // Recreate the carry if needed.
   EVT CarryVT = Carry.getValueType();
   APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
-  Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
-                      Carry, DAG.getConstant(NegOne, DL, CarryVT));
+  Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
+                      DAG.getConstant(NegOne, DL, CarryVT));
 
   SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
   SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
@@ -20404,7 +20494,8 @@
   unsigned BaseOp = 0;
   SDLoc DL(Op);
   switch (Op.getOpcode()) {
-  default: llvm_unreachable("Unknown ovf instruction!");
+  default:
+    llvm_unreachable("Unknown ovf instruction!");
   case ISD::SADDO:
     BaseOp = X86ISD::ADD;
     Cond = X86::COND_O;
@@ -20478,12 +20569,13 @@
   SDValue VOp0 = V.getOperand(0);
   unsigned InBits = VOp0.getValueSizeInBits();
   unsigned Bits = V.getValueSizeInBits();
-  return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
+  return DAG.MaskedValueIsZero(VOp0,
+                               APInt::getHighBitsSet(InBits, InBits - Bits));
 }
 
 SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   bool AddTest = true;
-  SDValue Cond  = Op.getOperand(0);
+  SDValue Cond = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   SDValue Op2 = Op.getOperand(2);
   SDLoc DL(Op);
@@ -20538,8 +20630,8 @@
 
         SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2);
 
-        return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
-                           VSel, DAG.getIntPtrConstant(0, DL));
+        return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VSel,
+                           DAG.getIntPtrConstant(0, DL));
       }
       SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
       SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
@@ -20628,14 +20720,14 @@
         return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, CmpZero);
       }
 
-      Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
-                        CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
+      Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+                        DAG.getConstant(1, DL, CmpOp0.getValueType()));
       Cmp = ConvertCmpIfNecessary(Cmp, DAG);
 
       SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
       SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
-      SDValue Res =   // Res = 0 or -1.
-        DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
+      SDValue Res = // Res = 0 or -1.
+          DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
 
       if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
         Res = DAG.getNOT(DL, Res, Res.getValueType());
@@ -20670,7 +20762,8 @@
         if (CmpSz > VT.getSizeInBits())
           Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0);
         else if (CmpSz < VT.getSizeInBits())
-          Neg = DAG.getNode(ISD::AND, DL, VT,
+          Neg = DAG.getNode(
+              ISD::AND, DL, VT,
               DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)),
               DAG.getConstant(1, DL, VT));
         else
@@ -20692,8 +20785,7 @@
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
   unsigned CondOpcode = Cond.getOpcode();
-  if (CondOpcode == X86ISD::SETCC ||
-      CondOpcode == X86ISD::SETCC_CARRY) {
+  if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) {
     CC = Cond.getOperand(0);
 
     SDValue Cmp = Cond.getOperand(1);
@@ -20701,7 +20793,7 @@
 
     bool IllegalFPCMov = false;
     if (VT.isFloatingPoint() && !VT.isVector() &&
-        !isScalarFPTypeInSSEReg(VT))  // FPStack?
+        !isScalarFPTypeInSSEReg(VT)) // FPStack?
       IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
 
     if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
@@ -20754,9 +20846,9 @@
     if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
         (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
         (isNullConstant(Op1) || isNullConstant(Op2))) {
-      SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
-                                DAG.getConstant(X86::COND_B, DL, MVT::i8),
-                                Cond);
+      SDValue Res =
+          DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+                      DAG.getConstant(X86::COND_B, DL, MVT::i8), Cond);
       if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B))
         return DAG.getNOT(DL, Res, Res.getValueType());
       return Res;
@@ -20766,14 +20858,15 @@
   // X86 doesn't have an i8 cmov. If both operands are the result of a truncate
   // widen the cmov and push the truncate through. This avoids introducing a new
   // branch during isel and doesn't add any extensions.
-  if (Op.getValueType() == MVT::i8 &&
-      Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+  if (Op.getValueType() == MVT::i8 && Op1.getOpcode() == ISD::TRUNCATE &&
+      Op2.getOpcode() == ISD::TRUNCATE) {
     SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
     if (T1.getValueType() == T2.getValueType() &&
         // Blacklist CopyFromReg to avoid partial register stalls.
-        T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
-      SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1,
-                                 CC, Cond);
+        T1.getOpcode() != ISD::CopyFromReg &&
+        T2.getOpcode() != ISD::CopyFromReg) {
+      SDValue Cmov =
+          DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, CC, Cond);
       return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
     }
   }
@@ -20789,19 +20882,18 @@
        !MayFoldLoad(Op2))) {
     Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
     Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
-    SDValue Ops[] = { Op2, Op1, CC, Cond };
+    SDValue Ops[] = {Op2, Op1, CC, Cond};
     SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops);
     return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
   }
 
   // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
   // condition is true.
-  SDValue Ops[] = { Op2, Op1, CC, Cond };
+  SDValue Ops[] = {Op2, Op1, CC, Cond};
   return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops);
 }
 
-static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
-                                     const X86Subtarget &Subtarget,
+static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, const X86Subtarget &Subtarget,
                                      SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
@@ -20827,8 +20919,8 @@
   if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) {
     NumElts *= 512 / ExtVT.getSizeInBits();
     InVT = MVT::getVectorVT(MVT::i1, NumElts);
-    In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT),
-                     In, DAG.getIntPtrConstant(0, dl));
+    In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT), In,
+                     DAG.getIntPtrConstant(0, dl));
     WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts);
   }
 
@@ -20907,9 +20999,9 @@
     InVT = In.getSimpleValueType();
   }
 
-  // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results,
-  // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
-  // need to be handled here for 256/512-bit results.
+  // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit
+  // results, so are legal and shouldn't occur here. AVX2/AVX512 pmovsx*
+  // instructions still need to be handled here for 256/512-bit results.
   if (Subtarget.hasInt256()) {
     assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
 
@@ -20918,9 +21010,8 @@
 
     // FIXME: Apparently we create inreg operations that could be regular
     // extends.
-    unsigned ExtOpc =
-        Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
-                                             : ISD::ZERO_EXTEND;
+    unsigned ExtOpc = Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
+                                                           : ISD::ZERO_EXTEND;
     return DAG.getNode(ExtOpc, dl, VT, In);
   }
 
@@ -21012,8 +21103,8 @@
     if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
       return SDValue();
 
-    In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
-                     MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
+    In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), MVT::v16i8, In,
+                     DAG.getUNDEF(MVT::v8i8));
     return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, VT, In);
   }
 
@@ -21035,9 +21126,9 @@
   SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);
 
   unsigned NumElems = InVT.getVectorNumElements();
-  SmallVector<int,8> ShufMask(NumElems, -1);
-  for (unsigned i = 0; i != NumElems/2; ++i)
-    ShufMask[i] = i + NumElems/2;
+  SmallVector<int, 8> ShufMask(NumElems, -1);
+  for (unsigned i = 0; i != NumElems / 2; ++i)
+    ShufMask[i] = i + NumElems / 2;
 
   SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
   OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi);
@@ -21123,7 +21214,7 @@
   assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
          "Unexpected VT");
   if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
-        TargetLowering::TypeWidenVector)
+      TargetLowering::TypeWidenVector)
     return SDValue();
 
   // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
@@ -21151,11 +21242,10 @@
 // TODO: It is possible to support ZExt by zeroing the undef values during
 // the shuffle phase or after the shuffle.
 static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
-                                 SelectionDAG &DAG) {
+                         SelectionDAG &DAG) {
   MVT RegVT = Op.getSimpleValueType();
   assert(RegVT.isVector() && "We only custom lower vector loads.");
-  assert(RegVT.isInteger() &&
-         "We only custom lower integer vector loads.");
+  assert(RegVT.isInteger() && "We only custom lower integer vector loads.");
 
   LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
   SDLoc dl(Ld);
@@ -21190,8 +21280,8 @@
 
   ISD::LoadExtType Ext = Ld->getExtensionType();
 
-  assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)
-         && "Only anyext and sext are currently implemented.");
+  assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD) &&
+         "Only anyext and sext are currently implemented.");
   assert(MemVT != RegVT && "Cannot extend to the same type");
   assert(MemVT.isVector() && "Must load a vector from memory");
 
@@ -21283,9 +21373,8 @@
 
   // Represent the data using the same element type that is stored in
   // memory. In practice, we ''widen'' MemVT.
-  EVT WideVecVT =
-      EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
-                       loadRegSize / MemVT.getScalarSizeInBits());
+  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+                                   loadRegSize / MemVT.getScalarSizeInBits());
 
   assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
          "Invalid vector type");
@@ -21297,8 +21386,8 @@
   SmallVector<SDValue, 8> Chains;
   SDValue Ptr = Ld->getBasePtr();
   unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8;
-  SDValue Increment = DAG.getConstant(OffsetInc, dl,
-                                      TLI.getPointerTy(DAG.getDataLayout()));
+  SDValue Increment =
+      DAG.getConstant(OffsetInc, dl, TLI.getPointerTy(DAG.getDataLayout()));
   SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
 
   unsigned Offset = 0;
@@ -21306,10 +21395,9 @@
     unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset);
 
     // Perform a single load.
-    SDValue ScalarLoad =
-      DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr,
-                  Ld->getPointerInfo().getWithOffset(Offset),
-                  NewAlign, Ld->getMemOperand()->getFlags());
+    SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr,
+                                     Ld->getPointerInfo().getWithOffset(Offset),
+                                     NewAlign, Ld->getMemOperand()->getFlags());
     Chains.push_back(ScalarLoad.getValue(1));
     // Create the first element type using SCALAR_TO_VECTOR in order to avoid
     // another round of DAGCombining.
@@ -21331,13 +21419,13 @@
   unsigned SizeRatio = RegSz / MemSz;
 
   if (Ext == ISD::SEXTLOAD) {
-    SDValue Sext = getExtendInVec(/*Signed*/true, dl, RegVT, SlicedVec, DAG);
+    SDValue Sext = getExtendInVec(/*Signed*/ true, dl, RegVT, SlicedVec, DAG);
     return DAG.getMergeValues({Sext, TF}, dl);
   }
 
   if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
       MemVT == MVT::v8i8) {
-    SDValue Sext = getExtendInVec(/*Signed*/false, dl, RegVT, SlicedVec, DAG);
+    SDValue Sext = getExtendInVec(/*Signed*/ false, dl, RegVT, SlicedVec, DAG);
     return DAG.getMergeValues({Sext, TF}, dl);
   }
 
@@ -21380,8 +21468,8 @@
 SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   bool addTest = true;
   SDValue Chain = Op.getOperand(0);
-  SDValue Cond  = Op.getOperand(1);
-  SDValue Dest  = Op.getOperand(2);
+  SDValue Cond = Op.getOperand(1);
+  SDValue Dest = Op.getOperand(2);
   SDLoc dl(Op);
   SDValue CC;
   bool Inverted = false;
@@ -21422,8 +21510,7 @@
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
   unsigned CondOpcode = Cond.getOpcode();
-  if (CondOpcode == X86ISD::SETCC ||
-      CondOpcode == X86ISD::SETCC_CARRY) {
+  if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) {
     CC = Cond.getOperand(0);
 
     SDValue Cmp = Cond.getOperand(1);
@@ -21434,7 +21521,8 @@
       addTest = false;
     } else {
       switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
-      default: break;
+      default:
+        break;
       case X86::COND_O:
       case X86::COND_B:
         // These can only come from an arithmetic instruction with overflow,
@@ -21466,11 +21554,10 @@
         // Also, recognize the pattern generated by an FCMP_UNE. We can emit
         // two branches instead of an explicit OR instruction with a
         // separate test.
-        if (Cmp == Cond.getOperand(1).getOperand(1) &&
-            isX86LogicalCmp(Cmp)) {
+        if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp)) {
           CC = Cond.getOperand(0).getOperand(0);
-          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
-                              Chain, Dest, CC, Cmp);
+          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain,
+                              Dest, CC, Cmp);
           CC = Cond.getOperand(1).getOperand(0);
           Cond = Cmp;
           addTest = false;
@@ -21481,11 +21568,10 @@
         // separate test. However, we only do this if this block doesn't
         // have a fall-through edge, because this requires an explicit
         // jmp when the condition is false.
-        if (Cmp == Cond.getOperand(1).getOperand(1) &&
-            isX86LogicalCmp(Cmp) &&
+        if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp) &&
             Op.getNode()->hasOneUse()) {
           X86::CondCode CCode =
-            (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+              (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
           CCode = X86::GetOppositeBranchCondition(CCode);
           CC = DAG.getConstant(CCode, dl, MVT::i8);
           SDNode *User = *Op.getNode()->use_begin();
@@ -21495,15 +21581,15 @@
           if (User->getOpcode() == ISD::BR) {
             SDValue FalseBB = User->getOperand(1);
             SDNode *NewBR =
-              DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+                DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
             assert(NewBR == User);
             (void)NewBR;
             Dest = FalseBB;
 
-            Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
-                                Chain, Dest, CC, Cmp);
+            Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain,
+                                Dest, CC, Cmp);
             X86::CondCode CCode =
-              (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+                (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
             CCode = X86::GetOppositeBranchCondition(CCode);
             CC = DAG.getConstant(CCode, dl, MVT::i8);
             Cond = Cmp;
@@ -21516,7 +21602,7 @@
       // It should be transformed during dag combiner except when the condition
       // is set by a arithmetics with overflow node.
       X86::CondCode CCode =
-        (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+          (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
       CCode = X86::GetOppositeBranchCondition(CCode);
       CC = DAG.getConstant(CCode, dl, MVT::i8);
       Cond = Cond.getOperand(0).getOperand(1);
@@ -21536,7 +21622,7 @@
         if (User->getOpcode() == ISD::BR) {
           SDValue FalseBB = User->getOperand(1);
           SDNode *NewBR =
-            DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+              DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
           assert(NewBR == User);
           (void)NewBR;
           Dest = FalseBB;
@@ -21545,8 +21631,8 @@
                                     Cond.getOperand(0), Cond.getOperand(1));
           Cmp = ConvertCmpIfNecessary(Cmp, DAG);
           CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
-          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
-                              Chain, Dest, CC, Cmp);
+          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain,
+                              Dest, CC, Cmp);
           CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
           Cond = Cmp;
           addTest = false;
@@ -21557,12 +21643,12 @@
       // For FCMP_UNE, we can emit
       // two branches instead of an explicit OR instruction with a
       // separate test.
-      SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
-                                Cond.getOperand(0), Cond.getOperand(1));
+      SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0),
+                                Cond.getOperand(1));
       Cmp = ConvertCmpIfNecessary(Cmp, DAG);
       CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
-      Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
-                          Chain, Dest, CC, Cmp);
+      Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest,
+                          CC, Cmp);
       CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
       Cond = Cmp;
       addTest = false;
@@ -21572,7 +21658,7 @@
   if (addTest) {
     // Look pass the truncate if the high bits are known zero.
     if (isTruncWithZeroHighBitsInput(Cond, DAG))
-        Cond = Cond.getOperand(0);
+      Cond = Cond.getOperand(0);
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
@@ -21589,12 +21675,12 @@
   if (addTest) {
     X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
     CC = DAG.getConstant(X86Cond, dl, MVT::i8);
-    Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()),
-                   X86Cond, dl, DAG);
+    Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()), X86Cond,
+                   dl, DAG);
   }
   Cond = ConvertCmpIfNecessary(Cond, DAG);
-  return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
-                     Chain, Dest, CC, Cond);
+  return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC,
+                     Cond);
 }
 
 // Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
@@ -21602,9 +21688,8 @@
 // bytes in one go. Touching the stack at 4K increments is necessary to ensure
 // that the guard pages used by the OS virtual memory manager are allocated in
 // correct sequence.
-SDValue
-X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
-                                           SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                                   SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool SplitStack = MF.shouldSplitStack();
   bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
@@ -21615,7 +21700,7 @@
   // Get the inputs.
   SDNode *Node = Op.getNode();
   SDValue Chain = Op.getOperand(0);
-  SDValue Size  = Op.getOperand(1);
+  SDValue Size = Op.getOperand(1);
   unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
   EVT VT = Node->getValueType(0);
 
@@ -21640,7 +21725,7 @@
     Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
     if (Align > StackAlign)
       Result = DAG.getNode(ISD::AND, dl, VT, Result,
-                         DAG.getConstant(-(uint64_t)Align, dl, VT));
+                           DAG.getConstant(-(uint64_t)Align, dl, VT));
     Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
   } else if (SplitStack) {
     MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -21660,7 +21745,7 @@
     unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
     Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
     Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
-                                DAG.getRegister(Vreg, SPTy));
+                         DAG.getRegister(Vreg, SPTy));
   } else {
     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
     Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size);
@@ -21734,8 +21819,9 @@
   MemOps.push_back(Store);
 
   // Store ptr to reg_save_area.
-  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
-      Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
+  FIN = DAG.getNode(
+      ISD::ADD, DL, PtrVT, FIN,
+      DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
   Store = DAG.getStore(
       Op.getOperand(0), DL, RSFIN, FIN,
@@ -21745,8 +21831,7 @@
 }
 
 SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
-  assert(Subtarget.is64Bit() &&
-         "LowerVAARG only handles 64-bit va_arg!");
+  assert(Subtarget.is64Bit() && "LowerVAARG only handles 64-bit va_arg!");
   assert(Op.getNumOperands() == 4);
 
   MachineFunction &MF = DAG.getMachineFunction();
@@ -21771,9 +21856,9 @@
   if (ArgVT == MVT::f80) {
     llvm_unreachable("va_arg for f80 not yet implemented");
   } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
-    ArgMode = 2;  // Argument passed in XMM register. Use fp_offset.
+    ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
   } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
-    ArgMode = 1;  // Argument passed in GPR64 register(s). Use gp_offset.
+    ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
   } else {
     llvm_unreachable("Unhandled argument type in LowerVAARG");
   }
@@ -21792,11 +21877,8 @@
                        DAG.getConstant(Align, dl, MVT::i32)};
   SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
   SDValue VAARG = DAG.getMemIntrinsicNode(
-    X86ISD::VAARG_64, dl,
-    VTs, InstOps, MVT::i64,
-    MachinePointerInfo(SV),
-    /*Align=*/0,
-    MachineMemOperand::MOLoad | MachineMemOperand::MOStore);
+      X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV),
+      /*Align=*/0, MachineMemOperand::MOLoad | MachineMemOperand::MOStore);
   Chain = VAARG.getValue(1);
 
   // Load the next argument and return it
@@ -21809,7 +21891,7 @@
   // where a va_list is still an i8*.
   assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");
   if (Subtarget.isCallingConvWin64(
-        DAG.getMachineFunction().getFunction().getCallingConv()))
+          DAG.getMachineFunction().getFunction().getCallingConv()))
     // Probably a Win64 va_copy.
     return DAG.expandVACopy(Op.getNode());
 
@@ -21820,9 +21902,8 @@
   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
   SDLoc DL(Op);
 
-  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
-                       DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false,
-                       false, false,
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24, DL),
+                       8, /*isVolatile*/ false, false, false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
@@ -21869,8 +21950,9 @@
       return DAG.getConstant(0, dl, VT);
   }
 
-  assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
-         && "Unknown target vector shift-by-constant node");
+  assert(
+      (Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) &&
+      "Unknown target vector shift-by-constant node");
 
   // Fold this packed vector shift into a build vector if SrcOp is a
   // vector of Constants or UNDEFs.
@@ -21879,10 +21961,11 @@
     unsigned NumElts = SrcOp->getNumOperands();
     ConstantSDNode *ND;
 
-    switch(Opc) {
-    default: llvm_unreachable("Unknown opcode!");
+    switch (Opc) {
+    default:
+      llvm_unreachable("Unknown opcode!");
     case X86ISD::VSHLI:
-      for (unsigned i=0; i!=NumElts; ++i) {
+      for (unsigned i = 0; i != NumElts; ++i) {
         SDValue CurrentOp = SrcOp->getOperand(i);
         if (CurrentOp->isUndef()) {
           Elts.push_back(CurrentOp);
@@ -21894,7 +21977,7 @@
       }
       break;
     case X86ISD::VSRLI:
-      for (unsigned i=0; i!=NumElts; ++i) {
+      for (unsigned i = 0; i != NumElts; ++i) {
         SDValue CurrentOp = SrcOp->getOperand(i);
         if (CurrentOp->isUndef()) {
           Elts.push_back(CurrentOp);
@@ -21906,7 +21989,7 @@
       }
       break;
     case X86ISD::VSRAI:
-      for (unsigned i=0; i!=NumElts; ++i) {
+      for (unsigned i = 0; i != NumElts; ++i) {
         SDValue CurrentOp = SrcOp->getOperand(i);
         if (CurrentOp->isUndef()) {
           Elts.push_back(CurrentOp);
@@ -21979,8 +22062,8 @@
   } else if (Subtarget.hasSSE41() &&
              ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
     ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
-    ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
-                        MVT::v2i64, ShAmt);
+    ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64,
+                        ShAmt);
   } else {
     SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
                         DAG.getUNDEF(SVT)};
@@ -22015,17 +22098,17 @@
     // In case 32bit mode, bitcast i64 is illegal, extend/split it.
     SDValue Lo, Hi;
     Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
-                        DAG.getConstant(0, dl, MVT::i32));
+                     DAG.getConstant(0, dl, MVT::i32));
     Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
-                        DAG.getConstant(1, dl, MVT::i32));
+                     DAG.getConstant(1, dl, MVT::i32));
 
     Lo = DAG.getBitcast(MVT::v32i1, Lo);
     Hi = DAG.getBitcast(MVT::v32i1, Hi);
 
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
   } else {
-    MVT BitcastVT = MVT::getVectorVT(MVT::i1,
-                                     Mask.getSimpleValueType().getSizeInBits());
+    MVT BitcastVT =
+        MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits());
     // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
     // are extracted by EXTRACT_SUBVECTOR.
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
@@ -22038,9 +22121,9 @@
 /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
 /// necessary casting or extending for \p Mask when lowering masking intrinsics
 static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
-                  SDValue PreservedSrc,
-                  const X86Subtarget &Subtarget,
-                  SelectionDAG &DAG) {
+                                    SDValue PreservedSrc,
+                                    const X86Subtarget &Subtarget,
+                                    SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
   unsigned OpcodeSelect = ISD::VSELECT;
@@ -22096,9 +22179,12 @@
   // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
   // WinEHStatePass for the full struct definition.
   switch (classifyEHPersonality(Fn->getPersonalityFn())) {
-  case EHPersonality::MSVC_X86SEH: return 24;
-  case EHPersonality::MSVC_CXX: return 16;
-  default: break;
+  case EHPersonality::MSVC_X86SEH:
+    return 24;
+  case EHPersonality::MSVC_CXX:
+    return 16;
+  default:
+    break;
   }
   report_fatal_error(
       "can only recover FP for 32-bit MSVC EH personality functions");
@@ -22184,9 +22270,9 @@
   SDLoc dl(Op);
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   MVT VT = Op.getSimpleValueType();
-  const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
+  const IntrinsicData *IntrData = getIntrinsicWithoutChain(IntNo);
   if (IntrData) {
-    switch(IntrData->Type) {
+    switch (IntrData->Type) {
     case INTR_TYPE_1OP: {
       // We specify 2 possible opcodes for intrinsics with rounding modes.
       // First, we check if the intrinsic may have non-default rounding mode,
@@ -22202,7 +22288,8 @@
         if (!isRoundModeCurDirection(Rnd))
           return SDValue();
       }
-      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
+      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
+                         Op.getOperand(1));
     }
     case INTR_TYPE_1OP_SAE: {
       SDValue Sae = Op.getOperand(2);
@@ -22276,12 +22363,13 @@
           return SDValue();
       }
 
-      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
-                         Src1, Src2, Src3);
+      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src1, Src2,
+                         Src3);
     }
     case INTR_TYPE_4OP:
-      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
-        Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
+      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+                         Op.getOperand(4));
     case INTR_TYPE_1OP_MASK: {
       SDValue Src = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
@@ -22318,8 +22406,8 @@
       else
         return SDValue();
 
-      return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src),
-                                  Mask, PassThru, Subtarget, DAG);
+      return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru,
+                                  Subtarget, DAG);
     }
     case INTR_TYPE_SCALAR_MASK: {
       SDValue Src1 = Op.getOperand(1);
@@ -22343,9 +22431,9 @@
           if (!isRoundModeCurDirection(Rnd))
             return SDValue();
         }
-        return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
-                                                Src2),
-                                    Mask, passThru, Subtarget, DAG);
+        return getScalarMaskingNode(
+            DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), Mask, passThru,
+            Subtarget, DAG);
       }
 
       assert(Op.getNumOperands() == (6U + HasRounding) &&
@@ -22359,9 +22447,9 @@
         else if (!isRoundModeCurDirection(Sae))
           return SDValue();
       }
-      return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1,
-                                              Src2, RoundingMode),
-                                  Mask, passThru, Subtarget, DAG);
+      return getScalarMaskingNode(
+          DAG.getNode(Opc, dl, VT, Src1, Src2, RoundingMode), Mask, passThru,
+          Subtarget, DAG);
     }
     case INTR_TYPE_SCALAR_MASK_RND: {
       SDValue Src1 = Op.getOperand(1);
@@ -22396,8 +22484,8 @@
       else
         return SDValue();
 
-      return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
-                                  Mask, passThru, Subtarget, DAG);
+      return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask,
+                                  passThru, Subtarget, DAG);
     }
     case INTR_TYPE_2OP_MASK: {
       SDValue Src1 = Op.getOperand(1);
@@ -22433,8 +22521,8 @@
           return SDValue();
       }
 
-      return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
-                                  Mask, PassThru, Subtarget, DAG);
+      return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask,
+                                  PassThru, Subtarget, DAG);
     }
     case INTR_TYPE_3OP_SCALAR_MASK_SAE: {
       SDValue Src1 = Op.getOperand(1);
@@ -22483,12 +22571,12 @@
       // Reverse the operands to match VSELECT order.
       return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1);
     }
-    case VPERM_2OP : {
+    case VPERM_2OP: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
 
       // Swap Src1 and Src2 in the node creation
-      return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
+      return DAG.getNode(IntrData->Opc0, dl, VT, Src2, Src1);
     }
     case IFMA_OP:
       // NOTE: We need to swizzle the operands to pass the multiply operands
@@ -22500,13 +22588,13 @@
       SDValue Imm = Op.getOperand(2);
       SDValue Mask = Op.getOperand(3);
       SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);
-      SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(),
-                                                 Subtarget, DAG);
+      SDValue FPclassMask =
+          getScalarMaskingNode(FPclass, Mask, SDValue(), Subtarget, DAG);
       // Need to fill with zeros to ensure the bitcast will produce zeroes
       // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
       SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
-                                DAG.getConstant(0, dl, MVT::v8i1),
-                                FPclassMask, DAG.getIntPtrConstant(0, dl));
+                                DAG.getConstant(0, dl, MVT::v8i1), FPclassMask,
+                                DAG.getIntPtrConstant(0, dl));
       return DAG.getBitcast(MVT::i8, Ins);
     }
 
@@ -22525,9 +22613,9 @@
         if (!isRoundModeCurDirection(Sae))
           return SDValue();
       }
-      //default rounding mode
+      // default rounding mode
       return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
-                          Op.getOperand(2), CC);
+                         Op.getOperand(2), CC);
     }
     case CMP_MASK_SCALAR_CC: {
       SDValue Src1 = Op.getOperand(1);
@@ -22543,17 +22631,17 @@
         else if (!isRoundModeCurDirection(Sae))
           return SDValue();
       }
-      //default rounding mode
+      // default rounding mode
       if (!Cmp.getNode())
         Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);
 
-      SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(),
-                                             Subtarget, DAG);
+      SDValue CmpMask =
+          getScalarMaskingNode(Cmp, Mask, SDValue(), Subtarget, DAG);
       // Need to fill with zeros to ensure the bitcast will produce zeroes
       // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
       SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
-                                DAG.getConstant(0, dl, MVT::v8i1),
-                                CmpMask, DAG.getIntPtrConstant(0, dl));
+                                DAG.getConstant(0, dl, MVT::v8i1), CmpMask,
+                                DAG.getIntPtrConstant(0, dl));
       return DAG.getBitcast(MVT::i8, Ins);
     }
     case COMI: { // Comparison intrinsics
@@ -22612,8 +22700,8 @@
       // Need to fill with zeros to ensure the bitcast will produce zeroes
       // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
       SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
-                                DAG.getConstant(0, dl, MVT::v16i1),
-                                FCmp, DAG.getIntPtrConstant(0, dl));
+                                DAG.getConstant(0, dl, MVT::v16i1), FCmp,
+                                DAG.getIntPtrConstant(0, dl));
       return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32,
                          DAG.getBitcast(MVT::i16, Ins));
     }
@@ -22666,9 +22754,9 @@
       assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
       // Clear the upper bits of the rounding immediate so that the legacy
       // intrinsic can't trigger the scaling behavior of VRNDSCALE.
-      SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
-                                         Op.getOperand(2),
-                                         DAG.getConstant(0xf, dl, MVT::i32));
+      SDValue RoundingMode =
+          DAG.getNode(ISD::AND, dl, MVT::i32, Op.getOperand(2),
+                      DAG.getConstant(0xf, dl, MVT::i32));
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
                          Op.getOperand(1), RoundingMode);
     }
@@ -22676,9 +22764,9 @@
       assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode");
       // Clear the upper bits of the rounding immediate so that the legacy
       // intrinsic can't trigger the scaling behavior of VRNDSCALE.
-      SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
-                                         Op.getOperand(3),
-                                         DAG.getConstant(0xf, dl, MVT::i32));
+      SDValue RoundingMode =
+          DAG.getNode(ISD::AND, dl, MVT::i32, Op.getOperand(3),
+                      DAG.getConstant(0xf, dl, MVT::i32));
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
                          Op.getOperand(1), Op.getOperand(2), RoundingMode);
     }
@@ -22700,7 +22788,7 @@
                           Op.getOperand(3), GenCF.getValue(1));
       }
       SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
-      SDValue Results[] = { SetCC, Res };
+      SDValue Results[] = {SetCC, Res};
       return DAG.getMergeValues(Results, dl);
     }
     case CVTPD2PS_MASK:
@@ -22734,7 +22822,6 @@
       Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
       return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd,
                          PassThru, Mask);
-
     }
     case CVTNEPS2BF16_MASK: {
       SDValue Src = Op.getOperand(1);
@@ -22757,7 +22844,8 @@
   }
 
   switch (IntNo) {
-  default: return SDValue();    // Don't custom lower most intrinsics.
+  default:
+    return SDValue(); // Don't custom lower most intrinsics.
 
   // ptest and testp intrinsics. The intrinsic these come from are designed to
   // return an integer value, not just an instruction so lower it to the ptest
@@ -22791,7 +22879,8 @@
     unsigned TestOpc = X86ISD::PTEST;
     X86::CondCode X86CC;
     switch (IntNo) {
-    default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+    default:
+      llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
     case Intrinsic::x86_avx512_ktestc_b:
     case Intrinsic::x86_avx512_ktestc_w:
     case Intrinsic::x86_avx512_ktestc_d:
@@ -22862,7 +22951,8 @@
     unsigned Opcode;
     X86::CondCode X86CC;
     switch (IntNo) {
-    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    default:
+      llvm_unreachable("Impossible intrinsic"); // Can't reach here.
     case Intrinsic::x86_sse42_pcmpistria128:
       Opcode = X86ISD::PCMPISTR;
       X86CC = X86::COND_A;
@@ -22904,7 +22994,7 @@
       X86CC = X86::COND_E;
       break;
     }
-    SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
+    SmallVector<SDValue, 5> NewOps(Op->op_begin() + 1, Op->op_end());
     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
     SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2);
     SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG);
@@ -22919,7 +23009,7 @@
     else
       Opcode = X86ISD::PCMPESTR;
 
-    SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
+    SmallVector<SDValue, 5> NewOps(Op->op_begin() + 1, Op->op_end());
     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
     return DAG.getNode(Opcode, dl, VTs, NewOps);
   }
@@ -22932,7 +23022,7 @@
     else
       Opcode = X86ISD::PCMPESTR;
 
-    SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
+    SmallVector<SDValue, 5> NewOps(Op->op_begin() + 1, Op->op_end());
     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
     return DAG.getNode(Opcode, dl, VTs, NewOps).getValue(1);
   }
@@ -23002,14 +23092,13 @@
     SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
     SDLoc DL(Op);
 
-    SDValue Operation =
-        DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs,
-                    Op->getOperand(1), Op->getOperand(2));
-
-    SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL,
-                                                 MaskVT, Operation);
-    SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL,
-                                                 MaskVT, Operation);
+    SDValue Operation = DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs,
+                                    Op->getOperand(1), Op->getOperand(2));
+
+    SDValue Result0 =
+        DAG.getTargetExtractSubreg(X86::sub_mask_0, DL, MaskVT, Operation);
+    SDValue Result1 =
+        DAG.getTargetExtractSubreg(X86::sub_mask_1, DL, MaskVT, Operation);
     return DAG.getMergeValues({Result0, Result1}, DL);
   }
   }
@@ -23035,15 +23124,15 @@
 
   MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
 
-  SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+  SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
   SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
-    VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
-  return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
+      VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+  return DAG.getMergeValues({Res, Res.getValue(2)}, dl);
 }
 
-static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
-                             SDValue Src, SDValue Mask, SDValue Base,
-                             SDValue Index, SDValue ScaleOp, SDValue Chain,
+static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, SDValue Src,
+                             SDValue Mask, SDValue Base, SDValue Index,
+                             SDValue ScaleOp, SDValue Chain,
                              const X86Subtarget &Subtarget) {
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
@@ -23070,16 +23159,16 @@
 
   MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
 
-  SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+  SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
   SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
-    VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
-  return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
+      VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+  return DAG.getMergeValues({Res, Res.getValue(2)}, dl);
 }
 
 static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
-                               SDValue Src, SDValue Mask, SDValue Base,
-                               SDValue Index, SDValue ScaleOp, SDValue Chain,
-                               const X86Subtarget &Subtarget) {
+                              SDValue Src, SDValue Mask, SDValue Base,
+                              SDValue Index, SDValue ScaleOp, SDValue Chain,
+                              const X86Subtarget &Subtarget) {
   SDLoc dl(Op);
   auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
   // Scale must be constant.
@@ -23116,8 +23205,8 @@
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
   SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
-  MVT MaskVT =
-    MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
+  MVT MaskVT = MVT::getVectorVT(
+      MVT::i1, Index.getSimpleValueType().getVectorNumElements());
   SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
   SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
   SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
@@ -23133,11 +23222,11 @@
 /// expanded intrinsics implicitly defines extra registers (i.e. not just
 /// EDX:EAX).
 static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
-                                        SelectionDAG &DAG,
-                                        unsigned TargetOpcode,
-                                        unsigned SrcReg,
-                                        const X86Subtarget &Subtarget,
-                                        SmallVectorImpl<SDValue> &Results) {
+                                           SelectionDAG &DAG,
+                                           unsigned TargetOpcode,
+                                           unsigned SrcReg,
+                                           const X86Subtarget &Subtarget,
+                                           SmallVectorImpl<SDValue> &Results) {
   SDValue Chain = N->getOperand(0);
   SDValue Glue;
 
@@ -23177,7 +23266,7 @@
   }
 
   // Use a buildpair to merge the two 32-bit values into a 64-bit one.
-  SDValue Ops[] = { LO, HI };
+  SDValue Ops[] = {LO, HI};
   SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
   Results.push_back(Pair);
   Results.push_back(Chain);
@@ -23194,9 +23283,9 @@
   // The processor's time-stamp counter (a 64-bit MSR) is stored into the
   // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
   // and the EAX register is loaded with the low-order 32 bits.
-  SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
-                                             /* NoRegister */0, Subtarget,
-                                             Results);
+  SDValue Glue =
+      expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
+                                  /* NoRegister */ 0, Subtarget, Results);
   if (Opcode != X86::RDTSCP)
     return;
 
@@ -23254,30 +23343,33 @@
 }
 
 /// Emit Truncating Store with signed or unsigned saturation.
-static SDValue
-EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
-                SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
-                SelectionDAG &DAG) {
+static SDValue EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
+                               SDValue Val, SDValue Ptr, EVT MemVT,
+                               MachineMemOperand *MMO, SelectionDAG &DAG) {
 
   SDVTList VTs = DAG.getVTList(MVT::Other);
   SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
-  SDValue Ops[] = { Chain, Val, Ptr, Undef };
-  return SignedSat ?
-    DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
-    DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+  SDValue Ops[] = {Chain, Val, Ptr, Undef};
+  return SignedSat ? DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl,
+                                                               MemVT, MMO)
+                   : DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl,
+                                                                MemVT, MMO);
 }
 
 /// Emit Masked Truncating Store with signed or unsigned saturation.
-static SDValue
-EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
-                      SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
-                      MachineMemOperand *MMO, SelectionDAG &DAG) {
+static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain,
+                                     const SDLoc &Dl, SDValue Val, SDValue Ptr,
+                                     SDValue Mask, EVT MemVT,
+                                     MachineMemOperand *MMO,
+                                     SelectionDAG &DAG) {
 
   SDVTList VTs = DAG.getVTList(MVT::Other);
-  SDValue Ops[] = { Chain, Val, Ptr, Mask };
-  return SignedSat ?
-    DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
-    DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+  SDValue Ops[] = {Chain, Val, Ptr, Mask};
+  return SignedSat
+             ? DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl,
+                                                               MemVT, MMO)
+             : DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl,
+                                                                MemVT, MMO);
 }
 
 static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
@@ -23302,9 +23394,8 @@
       SDLoc dl(Op);
       // Create a WRPKRU node, pass the input to the EAX parameter,  and pass 0
       // to the EDX and ECX parameters.
-      return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other,
-                         Op.getOperand(0), Op.getOperand(2),
-                         DAG.getConstant(0, dl, MVT::i32),
+      return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other, Op.getOperand(0),
+                         Op.getOperand(2), DAG.getConstant(0, dl, MVT::i32),
                          DAG.getConstant(0, dl, MVT::i32));
     }
     case llvm::Intrinsic::x86_flags_read_u32:
@@ -23329,7 +23420,8 @@
       unsigned Opcode;
 
       switch (IntNo) {
-      default: llvm_unreachable("Impossible intrinsic");
+      default:
+        llvm_unreachable("Impossible intrinsic");
       case Intrinsic::x86_umwait:
         Opcode = X86ISD::UMWAIT;
         break;
@@ -23342,9 +23434,8 @@
         break;
       }
 
-      SDValue Operation =
-          DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
-                      Op->getOperand(3), Op->getOperand(4));
+      SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
+                                      Op->getOperand(3), Op->getOperand(4));
       SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
       return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
                          Operation.getValue(1));
@@ -23356,7 +23447,8 @@
       SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
       unsigned Opcode;
       switch (IntNo) {
-      default: llvm_unreachable("Impossible intrinsic!");
+      default:
+        llvm_unreachable("Impossible intrinsic!");
       case Intrinsic::x86_enqcmd:
         Opcode = X86ISD::ENQCMD;
         break;
@@ -23375,8 +23467,9 @@
   }
 
   SDLoc dl(Op);
-  switch(IntrData->Type) {
-  default: llvm_unreachable("Unknown Intrinsic Type");
+  switch (IntrData->Type) {
+  default:
+    llvm_unreachable("Unknown Intrinsic Type");
   case RDSEED:
   case RDRAND: {
     // Emit the node with the right value type.
@@ -23385,10 +23478,10 @@
 
     // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
     // Otherwise return the value from Rand, which is always 0, casted to i32.
-    SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
-                      DAG.getConstant(1, dl, Op->getValueType(1)),
-                      DAG.getConstant(X86::COND_B, dl, MVT::i8),
-                      SDValue(Result.getNode(), 1) };
+    SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
+                     DAG.getConstant(1, dl, Op->getValueType(1)),
+                     DAG.getConstant(X86::COND_B, dl, MVT::i8),
+                     SDValue(Result.getNode(), 1)};
     SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops);
 
     // Return { result, isValid, chain }.
@@ -23397,32 +23490,32 @@
   }
   case GATHER_AVX2: {
     SDValue Chain = Op.getOperand(0);
-    SDValue Src   = Op.getOperand(2);
-    SDValue Base  = Op.getOperand(3);
+    SDValue Src = Op.getOperand(2);
+    SDValue Base = Op.getOperand(3);
     SDValue Index = Op.getOperand(4);
-    SDValue Mask  = Op.getOperand(5);
+    SDValue Mask = Op.getOperand(5);
     SDValue Scale = Op.getOperand(6);
     return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
                              Scale, Chain, Subtarget);
   }
   case GATHER: {
-  //gather(v1, mask, index, base, scale);
+    // gather(v1, mask, index, base, scale);
     SDValue Chain = Op.getOperand(0);
-    SDValue Src   = Op.getOperand(2);
-    SDValue Base  = Op.getOperand(3);
+    SDValue Src = Op.getOperand(2);
+    SDValue Base = Op.getOperand(3);
     SDValue Index = Op.getOperand(4);
-    SDValue Mask  = Op.getOperand(5);
+    SDValue Mask = Op.getOperand(5);
     SDValue Scale = Op.getOperand(6);
-    return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale,
-                         Chain, Subtarget);
+    return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+                         Subtarget);
   }
   case SCATTER: {
-  //scatter(base, mask, index, v1, scale);
+    // scatter(base, mask, index, v1, scale);
     SDValue Chain = Op.getOperand(0);
-    SDValue Base  = Op.getOperand(2);
-    SDValue Mask  = Op.getOperand(3);
+    SDValue Base = Op.getOperand(2);
+    SDValue Mask = Op.getOperand(3);
     SDValue Index = Op.getOperand(4);
-    SDValue Src   = Op.getOperand(5);
+    SDValue Src = Op.getOperand(5);
     SDValue Scale = Op.getOperand(6);
     return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
                           Scale, Chain, Subtarget);
@@ -23434,9 +23527,9 @@
            "Wrong prefetch hint in intrinsic: should be 2 or 3");
     unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
     SDValue Chain = Op.getOperand(0);
-    SDValue Mask  = Op.getOperand(2);
+    SDValue Mask = Op.getOperand(2);
     SDValue Index = Op.getOperand(3);
-    SDValue Base  = Op.getOperand(4);
+    SDValue Base = Op.getOperand(4);
     SDValue Scale = Op.getOperand(5);
     return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain,
                            Subtarget);
@@ -23468,8 +23561,8 @@
 
     SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG);
     SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
-    return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
-                       Ret, SDValue(InTrans.getNode(), 1));
+    return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Ret,
+                       SDValue(InTrans.getNode(), 1));
   }
   case TRUNCATE_TO_MEM_VI8:
   case TRUNCATE_TO_MEM_VI16:
@@ -23482,7 +23575,7 @@
     MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
     assert(MemIntr && "Expected MemIntrinsicSDNode!");
 
-    EVT MemVT  = MemIntr->getMemoryVT();
+    EVT MemVT = MemIntr->getMemoryVT();
 
     uint16_t TruncationOp = IntrData->Opc0;
     switch (TruncationOp) {
@@ -23495,7 +23588,8 @@
       SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
 
       return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT,
-                                MemIntr->getMemOperand(), true /* truncating */);
+                                MemIntr->getMemOperand(),
+                                true /* truncating */);
     }
     case X86ISD::VTRUNCUS:
     case X86ISD::VTRUNCS: {
@@ -23576,7 +23670,7 @@
 
   unsigned FrameReg =
       RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
-  SDLoc dl(Op);  // FIXME probably not meaningful
+  SDLoc dl(Op); // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
           (FrameReg == X86::EBP && VT == MVT::i32)) &&
@@ -23590,17 +23684,17 @@
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+unsigned X86TargetLowering::getRegisterByName(const char *RegName, EVT VT,
                                               SelectionDAG &DAG) const {
   const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
   const MachineFunction &MF = DAG.getMachineFunction();
 
   unsigned Reg = StringSwitch<unsigned>(RegName)
-                       .Case("esp", X86::ESP)
-                       .Case("rsp", X86::RSP)
-                       .Case("ebp", X86::EBP)
-                       .Case("rbp", X86::RBP)
-                       .Default(0);
+                     .Case("esp", X86::ESP)
+                     .Case("rsp", X86::RSP)
+                     .Case("ebp", X86::EBP)
+                     .Case("rbp", X86::RBP)
+                     .Default(0);
 
   if (Reg == X86::EBP || Reg == X86::RBP) {
     if (!TFI.hasFP(MF))
@@ -23649,10 +23743,10 @@
 }
 
 SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain     = Op.getOperand(0);
-  SDValue Offset    = Op.getOperand(1);
-  SDValue Handler   = Op.getOperand(2);
-  SDLoc dl      (Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Offset = Op.getOperand(1);
+  SDValue Handler = Op.getOperand(2);
+  SDLoc dl(Op);
 
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -23663,9 +23757,9 @@
   SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
   unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
 
-  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
-                                 DAG.getIntPtrConstant(RegInfo->getSlotSize(),
-                                                       dl));
+  SDValue StoreAddr =
+      DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
+                  DAG.getIntPtrConstant(RegInfo->getSlotSize(), dl));
   StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
   Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
@@ -23688,19 +23782,20 @@
     (void)TII->getGlobalBaseReg(&DAG.getMachineFunction());
   }
   return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
-                     DAG.getVTList(MVT::i32, MVT::Other),
-                     Op.getOperand(0), Op.getOperand(1));
+                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+                     Op.getOperand(1));
 }
 
 SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
                                                 SelectionDAG &DAG) const {
   SDLoc DL(Op);
-  return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
-                     Op.getOperand(0), Op.getOperand(1));
+  return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1));
 }
 
-SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
-                                                       SelectionDAG &DAG) const {
+SDValue
+X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+                                               SelectionDAG &DAG) const {
   SDLoc DL(Op);
   return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
                      Op.getOperand(0));
@@ -23716,7 +23811,7 @@
   SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
-  SDLoc dl (Op);
+  SDLoc dl(Op);
 
   const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -23725,7 +23820,7 @@
     SDValue OutChains[6];
 
     // Large code-model.
-    const unsigned char JMP64r  = 0xFF; // 64-bit jmp through register opcode.
+    const unsigned char JMP64r = 0xFF;  // 64-bit jmp through register opcode.
     const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
 
     const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
@@ -23775,7 +23870,7 @@
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
   } else {
     const Function *Func =
-      cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+        cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
     CallingConv::ID CC = Func->getCallingConv();
     unsigned NestReg;
 
@@ -23797,7 +23892,8 @@
         unsigned Idx = 1;
 
         for (FunctionType::param_iterator I = FTy->param_begin(),
-             E = FTy->param_end(); I != E; ++I, ++Idx)
+                                          E = FTy->param_end();
+             I != E; ++I, ++Idx)
           if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
             auto &DL = DAG.getDataLayout();
             // FIXME: should only count parameters that are lowered to integers.
@@ -23893,36 +23989,34 @@
       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
                               MachineMemOperand::MOStore, 2, 2);
 
-  SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
-  SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
-                                          DAG.getVTList(MVT::Other),
-                                          Ops, MVT::i16, MMO);
+  SDValue Ops[] = {DAG.getEntryNode(), StackSlot};
+  SDValue Chain = DAG.getMemIntrinsicNode(
+      X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO);
 
   // Load FP Control Word from stack slot
   SDValue CWD =
       DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());
 
   // Transform as necessary
-  SDValue CWD1 =
-    DAG.getNode(ISD::SRL, DL, MVT::i16,
-                DAG.getNode(ISD::AND, DL, MVT::i16,
-                            CWD, DAG.getConstant(0x800, DL, MVT::i16)),
-                DAG.getConstant(11, DL, MVT::i8));
-  SDValue CWD2 =
-    DAG.getNode(ISD::SRL, DL, MVT::i16,
-                DAG.getNode(ISD::AND, DL, MVT::i16,
-                            CWD, DAG.getConstant(0x400, DL, MVT::i16)),
-                DAG.getConstant(9, DL, MVT::i8));
+  SDValue CWD1 = DAG.getNode(ISD::SRL, DL, MVT::i16,
+                             DAG.getNode(ISD::AND, DL, MVT::i16, CWD,
+                                         DAG.getConstant(0x800, DL, MVT::i16)),
+                             DAG.getConstant(11, DL, MVT::i8));
+  SDValue CWD2 = DAG.getNode(ISD::SRL, DL, MVT::i16,
+                             DAG.getNode(ISD::AND, DL, MVT::i16, CWD,
+                                         DAG.getConstant(0x400, DL, MVT::i16)),
+                             DAG.getConstant(9, DL, MVT::i8));
 
   SDValue RetVal =
-    DAG.getNode(ISD::AND, DL, MVT::i16,
-                DAG.getNode(ISD::ADD, DL, MVT::i16,
-                            DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
-                            DAG.getConstant(1, DL, MVT::i16)),
-                DAG.getConstant(3, DL, MVT::i16));
-
-  return DAG.getNode((VT.getSizeInBits() < 16 ?
-                      ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
+      DAG.getNode(ISD::AND, DL, MVT::i16,
+                  DAG.getNode(ISD::ADD, DL, MVT::i16,
+                              DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
+                              DAG.getConstant(1, DL, MVT::i16)),
+                  DAG.getConstant(3, DL, MVT::i16));
+
+  return DAG.getNode(
+      (VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT,
+      RetVal);
 }
 
 // Split an unary integer op into 2 half sized ops.
@@ -23976,17 +24070,15 @@
   MVT EltVT = VT.getVectorElementType();
   unsigned NumElems = VT.getVectorNumElements();
 
-  assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
-          "Unsupported element type");
+  assert((EltVT == MVT::i8 || EltVT == MVT::i16) && "Unsupported element type");
 
   // Split vector, it's Lo and Hi parts will be handled in next iteration.
-  if (NumElems > 16 ||
-      (NumElems == 16 && !Subtarget.canExtendTo512DQ()))
+  if (NumElems > 16 || (NumElems == 16 && !Subtarget.canExtendTo512DQ()))
     return LowerVectorIntUnary(Op, DAG);
 
   MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
   assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
-          "Unsupported value type for operation");
+         "Unsupported value type for operation");
 
   // Use native supported vector instruction vplzcntd.
   Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
@@ -24126,12 +24218,8 @@
 
   if (Opc == ISD::CTLZ) {
     // If src is zero (i.e. bsr sets ZF), returns NumBits.
-    SDValue Ops[] = {
-      Op,
-      DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
-      DAG.getConstant(X86::COND_E, dl, MVT::i8),
-      Op.getValue(1)
-    };
+    SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
+                     DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1)};
     Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
   }
 
@@ -24159,12 +24247,8 @@
   Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0);
 
   // If src is zero (i.e. bsf sets ZF), returns NumBits.
-  SDValue Ops[] = {
-    Op,
-    DAG.getConstant(NumBits, dl, VT),
-    DAG.getConstant(X86::COND_E, dl, MVT::i8),
-    Op.getValue(1)
-  };
+  SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT),
+                   DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1)};
   return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
 }
 
@@ -24190,7 +24274,7 @@
   SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl);
 
   MVT EltVT = VT.getVectorElementType();
-  MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+  MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
 
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
                      DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
@@ -24219,7 +24303,7 @@
   SDValue RHS2 = extract256BitVector(RHS, NumElems / 2, DAG, dl);
 
   MVT EltVT = VT.getVectorElementType();
-  MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+  MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
 
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
                      DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
@@ -24233,8 +24317,8 @@
     return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
 
   if (VT.getScalarType() == MVT::i1)
-    return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
-                       Op.getOperand(0), Op.getOperand(1));
+    return DAG.getNode(ISD::XOR, SDLoc(Op), VT, Op.getOperand(0),
+                       Op.getOperand(1));
 
   assert(Op.getSimpleValueType().is256BitVector() &&
          Op.getSimpleValueType().isInteger() &&
@@ -24250,7 +24334,8 @@
   if (VT.getScalarType() == MVT::i1) {
     SDLoc dl(Op);
     switch (Opcode) {
-    default: llvm_unreachable("Expected saturated arithmetic opcode");
+    default:
+      llvm_unreachable("Expected saturated arithmetic opcode");
     case ISD::UADDSAT:
     case ISD::SADDSAT:
       // *addsat i1 X, Y --> X | Y
@@ -24265,8 +24350,8 @@
   if (VT.is128BitVector()) {
     // Avoid the generic expansion with min/max if we don't have pminu*/pmaxu*.
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(),
-                                                 *DAG.getContext(), VT);
+    EVT SetCCResultType =
+        TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
     SDLoc DL(Op);
     if (Opcode == ISD::UADDSAT && !TLI.isOperationLegal(ISD::UMIN, VT)) {
       // uaddsat X, Y --> (X >u (X + Y)) ? -1 : X + Y
@@ -24352,11 +24437,20 @@
   // Else, expand to a compare/select.
   ISD::CondCode CC;
   switch (Opcode) {
-  case ISD::SMIN: CC = ISD::CondCode::SETLT;  break;
-  case ISD::SMAX: CC = ISD::CondCode::SETGT;  break;
-  case ISD::UMIN: CC = ISD::CondCode::SETULT; break;
-  case ISD::UMAX: CC = ISD::CondCode::SETUGT; break;
-  default: llvm_unreachable("Unknown MINMAX opcode");
+  case ISD::SMIN:
+    CC = ISD::CondCode::SETLT;
+    break;
+  case ISD::SMAX:
+    CC = ISD::CondCode::SETGT;
+    break;
+  case ISD::UMIN:
+    CC = ISD::CondCode::SETULT;
+    break;
+  case ISD::UMAX:
+    CC = ISD::CondCode::SETUGT;
+    break;
+  default:
+    llvm_unreachable("Unknown MINMAX opcode");
   }
 
   SDValue Cond = DAG.getSetCC(DL, VT, N0, N1, CC);
@@ -24409,10 +24503,10 @@
       SmallVector<SDValue, 16> LoOps, HiOps;
       for (unsigned i = 0; i != NumElts; i += 16) {
         for (unsigned j = 0; j != 8; ++j) {
-          LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl,
-                                               MVT::i16));
-          HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl,
-                                               MVT::i16));
+          LoOps.push_back(
+              DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl, MVT::i16));
+          HiOps.push_back(
+              DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl, MVT::i16));
         }
       }
 
@@ -24437,7 +24531,7 @@
            "Should not custom lower when pmulld is available!");
 
     // Extract the odd parts.
-    static const int UnpackMask[] = { 1, -1, 3, -1 };
+    static const int UnpackMask[] = {1, -1, 3, -1};
     SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
     SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
 
@@ -24455,7 +24549,7 @@
 
     // Merge the two vectors back together with a shuffle. This expands into 2
     // shuffles.
-    static const int ShufMask[] = { 0, 4, 2, 6 };
+    static const int ShufMask[] = {0, 4, 2, 6};
     return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
   }
 
@@ -24538,14 +24632,14 @@
     //
     // Place the odd value at an even position (basically, shift all values 1
     // step to the left):
-    const int Mask[] = {1, -1,  3, -1,  5, -1,  7, -1,
+    const int Mask[] = {1, -1, 3,  -1, 5,  -1, 7,  -1,
                         9, -1, 11, -1, 13, -1, 15, -1};
     // <a|b|c|d> => <b|undef|d|undef>
-    SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A,
-                                        makeArrayRef(&Mask[0], NumElts));
+    SDValue Odd0 =
+        DAG.getVectorShuffle(VT, dl, A, A, makeArrayRef(&Mask[0], NumElts));
     // <e|f|g|h> => <f|undef|h|undef>
-    SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B,
-                                        makeArrayRef(&Mask[0], NumElts));
+    SDValue Odd1 =
+        DAG.getVectorShuffle(VT, dl, B, B, makeArrayRef(&Mask[0], NumElts));
 
     // Emit two multiplies, one for the lower 2 ints and one for the higher 2
     // ints.
@@ -24588,7 +24682,7 @@
 
   // Only i8 vectors should need custom lowering after this.
   assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
-         (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
+          (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
          "Unsupported vector type");
 
   // Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
@@ -24634,10 +24728,9 @@
     Lo = DAG.getBitcast(VT, Lo);
     Hi = DAG.getBitcast(VT, Hi);
     return DAG.getVectorShuffle(VT, dl, Lo, Hi,
-                                { 0,  2,  4,  6,  8, 10, 12, 14,
-                                 16, 18, 20, 22, 24, 26, 28, 30,
-                                 32, 34, 36, 38, 40, 42, 44, 46,
-                                 48, 50, 52, 54, 56, 58, 60, 62});
+                                {0,  2,  4,  6,  8,  10, 12, 14, 16, 18, 20,
+                                 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,
+                                 44, 46, 48, 50, 52, 54, 56, 58, 60, 62});
   }
 
   // For signed v16i8 and all unsigned vXi8 we will unpack the low and high
@@ -24646,7 +24739,7 @@
 
   MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
 
-  static const int PSHUFDMask[] = { 8,  9, 10, 11, 12, 13, 14, 15,
+  static const int PSHUFDMask[] = {8,  9,  10, 11, 12, 13, 14, 15,
                                    -1, -1, -1, -1, -1, -1, -1, -1};
 
   // Extract the lo parts and zero/sign extend to i16.
@@ -24669,10 +24762,10 @@
     ALo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, ALo, 8, DAG);
     AHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, AHi, 8, DAG);
   } else {
-    ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A,
-                                          DAG.getConstant(0, dl, VT)));
-    AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A,
-                                          DAG.getConstant(0, dl, VT)));
+    ALo = DAG.getBitcast(
+        ExVT, getUnpackl(DAG, dl, VT, A, DAG.getConstant(0, dl, VT)));
+    AHi = DAG.getBitcast(
+        ExVT, getUnpackh(DAG, dl, VT, A, DAG.getConstant(0, dl, VT)));
   }
 
   SDValue BLo, BHi;
@@ -24711,10 +24804,10 @@
     BLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BLo, 8, DAG);
     BHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BHi, 8, DAG);
   } else {
-    BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B,
-                                          DAG.getConstant(0, dl, VT)));
-    BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B,
-                                          DAG.getConstant(0, dl, VT)));
+    BLo = DAG.getBitcast(
+        ExVT, getUnpackl(DAG, dl, VT, B, DAG.getConstant(0, dl, VT)));
+    BHi = DAG.getBitcast(
+        ExVT, getUnpackh(DAG, dl, VT, B, DAG.getConstant(0, dl, VT)));
   }
 
   // Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
@@ -24728,7 +24821,8 @@
   return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
 }
 
-SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op,
+                                             SelectionDAG &DAG) const {
   assert(Subtarget.isTargetWin64() && "Unexpected target");
   EVT VT = Op.getValueType();
   assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
@@ -24737,13 +24831,32 @@
   RTLIB::Libcall LC;
   bool isSigned;
   switch (Op->getOpcode()) {
-  default: llvm_unreachable("Unexpected request for libcall!");
-  case ISD::SDIV:      isSigned = true;  LC = RTLIB::SDIV_I128;    break;
-  case ISD::UDIV:      isSigned = false; LC = RTLIB::UDIV_I128;    break;
-  case ISD::SREM:      isSigned = true;  LC = RTLIB::SREM_I128;    break;
-  case ISD::UREM:      isSigned = false; LC = RTLIB::UREM_I128;    break;
-  case ISD::SDIVREM:   isSigned = true;  LC = RTLIB::SDIVREM_I128; break;
-  case ISD::UDIVREM:   isSigned = false; LC = RTLIB::UDIVREM_I128; break;
+  default:
+    llvm_unreachable("Unexpected request for libcall!");
+  case ISD::SDIV:
+    isSigned = true;
+    LC = RTLIB::SDIV_I128;
+    break;
+  case ISD::UDIV:
+    isSigned = false;
+    LC = RTLIB::UDIV_I128;
+    break;
+  case ISD::SREM:
+    isSigned = true;
+    LC = RTLIB::SREM_I128;
+    break;
+  case ISD::UREM:
+    isSigned = false;
+    LC = RTLIB::UREM_I128;
+    break;
+  case ISD::SDIVREM:
+    isSigned = true;
+    LC = RTLIB::SDIVREM_I128;
+    break;
+  case ISD::UDIVREM:
+    isSigned = false;
+    LC = RTLIB::UDIVREM_I128;
+    break;
   }
 
   SDLoc dl(Op);
@@ -24760,7 +24873,7 @@
     InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr,
                            MachinePointerInfo(), /* Alignment = */ 16);
     Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-    Entry.Ty = PointerType::get(ArgTy,0);
+    Entry.Ty = PointerType::get(ArgTy, 0);
     Entry.IsSExt = false;
     Entry.IsZExt = false;
     Args.push_back(Entry);
@@ -24805,9 +24918,9 @@
 
 // The shift amount is a variable, but it is the same for all vector lanes.
 // These instructions are defined together with shift-immediate.
-static
-bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
-                                      unsigned Opcode) {
+static bool SupportedVectorShiftWithBaseAmnt(MVT VT,
+                                             const X86Subtarget &Subtarget,
+                                             unsigned Opcode) {
   return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
 }
 
@@ -24827,7 +24940,7 @@
     return true;
 
   bool LShift = VT.is128BitVector() || VT.is256BitVector();
-  bool AShift = LShift &&  VT != MVT::v2i64 && VT != MVT::v4i64;
+  bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
   return (Opcode == ISD::SRA) ? AShift : LShift;
 }
 
@@ -25094,8 +25207,8 @@
       return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
 
     return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, Lo),
-                                        DAG.getBitcast(VT, Hi),
-                                        {0, 2, 4, 6, 8, 10, 12, 14});
+                                DAG.getBitcast(VT, Hi),
+                                {0, 2, 4, 6, 8, 10, 12, 14});
   }
 
   return SDValue();
@@ -25874,7 +25987,7 @@
 
   // If this is a canonical idempotent atomicrmw w/no uses, we have a better
   // lowering available in lowerAtomicArith.
-  // TODO: push more cases through this path. 
+  // TODO: push more cases through this path.
   if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
     if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
         AI->use_empty())
@@ -25885,7 +25998,8 @@
   auto SSID = AI->getSyncScopeID();
   // We must restrict the ordering to avoid generating loads with Release or
   // ReleaseAcquire orderings.
-  auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
+  auto Order =
+      AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
 
   // Before the load we need a fence. Here is an example lifted from
   // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
@@ -25932,31 +26046,31 @@
 /// Emit a locked operation on a stack location which does not change any
 /// memory location, but does involve a lock prefix.  Location is chosen to be
 /// a) very likely accessed only by a single thread to minimize cache traffic,
-/// and b) definitely dereferenceable.  Returns the new Chain result.  
+/// and b) definitely dereferenceable.  Returns the new Chain result.
 static SDValue emitLockedStackOp(SelectionDAG &DAG,
-                                 const X86Subtarget &Subtarget,
-                                 SDValue Chain, SDLoc DL) {
+                                 const X86Subtarget &Subtarget, SDValue Chain,
+                                 SDLoc DL) {
   // Implementation notes:
   // 1) LOCK prefix creates a full read/write reordering barrier for memory
   // operations issued by the current processor.  As such, the location
   // referenced is not relevant for the ordering properties of the instruction.
   // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
-  // 8.2.3.9  Loads and Stores Are Not Reordered with Locked Instructions 
+  // 8.2.3.9  Loads and Stores Are Not Reordered with Locked Instructions
   // 2) Using an immediate operand appears to be the best encoding choice
   // here since it doesn't require an extra register.
   // 3) OR appears to be very slightly faster than ADD. (Though, the difference
   // is small enough it might just be measurement noise.)
   // 4) When choosing offsets, there are several contributing factors:
   //   a) If there's no redzone, we default to TOS.  (We could allocate a cache
-  //      line aligned stack object to improve this case.) 
+  //      line aligned stack object to improve this case.)
   //   b) To minimize our chances of introducing a false dependence, we prefer
-  //      to offset the stack usage from TOS slightly.  
+  //      to offset the stack usage from TOS slightly.
   //   c) To minimize concerns about cross thread stack usage - in particular,
   //      the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
   //      captures state in the TOS frame and accesses it from many threads -
   //      we want to use an offset such that the offset is in a distinct cache
   //      line from the TOS frame.
-  // 
+  //
   // For a general discussion of the tradeoffs and benchmark results, see:
   // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
 
@@ -25966,31 +26080,28 @@
 
   if (Subtarget.is64Bit()) {
     SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
-    SDValue Ops[] = {
-      DAG.getRegister(X86::RSP, MVT::i64),                  // Base
-      DAG.getTargetConstant(1, DL, MVT::i8),                // Scale
-      DAG.getRegister(0, MVT::i64),                         // Index
-      DAG.getTargetConstant(SPOffset, DL, MVT::i32),        // Disp
-      DAG.getRegister(0, MVT::i16),                         // Segment.
-      Zero,
-      Chain};
-    SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
-                                     MVT::Other, Ops);
+    SDValue Ops[] = {DAG.getRegister(X86::RSP, MVT::i64),           // Base
+                     DAG.getTargetConstant(1, DL, MVT::i8),         // Scale
+                     DAG.getRegister(0, MVT::i64),                  // Index
+                     DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
+                     DAG.getRegister(0, MVT::i16),                  // Segment.
+                     Zero,
+                     Chain};
+    SDNode *Res =
+        DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, MVT::Other, Ops);
     return SDValue(Res, 1);
   }
 
   SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
-  SDValue Ops[] = {
-    DAG.getRegister(X86::ESP, MVT::i32),            // Base
-    DAG.getTargetConstant(1, DL, MVT::i8),          // Scale
-    DAG.getRegister(0, MVT::i32),                   // Index
-    DAG.getTargetConstant(SPOffset, DL, MVT::i32),  // Disp
-    DAG.getRegister(0, MVT::i16),                   // Segment.
-    Zero,
-    Chain
-  };
-  SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
-                                   MVT::Other, Ops);
+  SDValue Ops[] = {DAG.getRegister(X86::ESP, MVT::i32),           // Base
+                   DAG.getTargetConstant(1, DL, MVT::i8),         // Scale
+                   DAG.getRegister(0, MVT::i32),                  // Index
+                   DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
+                   DAG.getRegister(0, MVT::i16),                  // Segment.
+                   Zero,
+                   Chain};
+  SDNode *Res =
+      DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, MVT::Other, Ops);
   return SDValue(Res, 1);
 }
 
@@ -25998,9 +26109,9 @@
                                  SelectionDAG &DAG) {
   SDLoc dl(Op);
   AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
-    cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
   SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
-    cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+      cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
 
   // The only fence that needs an instruction is a sequentially-consistent
   // cross-thread fence.
@@ -26009,7 +26120,7 @@
     if (Subtarget.hasMFence())
       return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
 
-    SDValue Chain = Op.getOperand(0); 
+    SDValue Chain = Op.getOperand(0);
     return emitLockedStackOp(DAG, Subtarget, Chain, dl);
   }
 
@@ -26023,36 +26134,44 @@
   SDLoc DL(Op);
   unsigned Reg = 0;
   unsigned size = 0;
-  switch(T.SimpleTy) {
-  default: llvm_unreachable("Invalid value type!");
-  case MVT::i8:  Reg = X86::AL;  size = 1; break;
-  case MVT::i16: Reg = X86::AX;  size = 2; break;
-  case MVT::i32: Reg = X86::EAX; size = 4; break;
+  switch (T.SimpleTy) {
+  default:
+    llvm_unreachable("Invalid value type!");
+  case MVT::i8:
+    Reg = X86::AL;
+    size = 1;
+    break;
+  case MVT::i16:
+    Reg = X86::AX;
+    size = 2;
+    break;
+  case MVT::i32:
+    Reg = X86::EAX;
+    size = 4;
+    break;
   case MVT::i64:
     assert(Subtarget.is64Bit() && "Node not type legal!");
-    Reg = X86::RAX; size = 8;
+    Reg = X86::RAX;
+    size = 8;
     break;
   }
-  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
-                                  Op.getOperand(2), SDValue());
-  SDValue Ops[] = { cpIn.getValue(0),
-                    Op.getOperand(1),
-                    Op.getOperand(3),
-                    DAG.getTargetConstant(size, DL, MVT::i8),
-                    cpIn.getValue(1) };
+  SDValue cpIn =
+      DAG.getCopyToReg(Op.getOperand(0), DL, Reg, Op.getOperand(2), SDValue());
+  SDValue Ops[] = {cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3),
+                   DAG.getTargetConstant(size, DL, MVT::i8), cpIn.getValue(1)};
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
-  SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
-                                           Ops, T, MMO);
+  SDValue Result =
+      DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, Ops, T, MMO);
 
   SDValue cpOut =
-    DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+      DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
   SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
                                       MVT::i32, cpOut.getValue(2));
   SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);
 
-  return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
-                     cpOut, Success, EFLAGS.getValue(1));
+  return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), cpOut, Success,
+                     EFLAGS.getValue(1));
 }
 
 // Create MOVMSKB, taking into account whether we need to split for AVX1.
@@ -26096,7 +26215,7 @@
 
   // Custom splitting for BWI types when AVX512F is available but BWI isn't.
   if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector() &&
-    DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) {
+      DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) {
     SDLoc dl(Op);
     SDValue Lo, Hi;
     std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
@@ -26131,8 +26250,8 @@
       // Example: from MVT::v2i32 to MVT::v4i32.
       MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
                                    SrcVT.getVectorNumElements() * 2);
-      Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
-                        DAG.getUNDEF(SrcVT));
+      Src =
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, DAG.getUNDEF(SrcVT));
     } else {
       assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
              "Unexpected source type in LowerBITCAST");
@@ -26149,15 +26268,15 @@
                        DAG.getIntPtrConstant(0, dl));
   }
 
-  assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() &&
-         Subtarget.hasMMX() && "Unexpected custom BITCAST");
+  assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() && Subtarget.hasMMX() &&
+         "Unexpected custom BITCAST");
   assert((DstVT == MVT::i64 ||
-          (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+          (DstVT.isVector() && DstVT.getSizeInBits() == 64)) &&
          "Unexpected custom BITCAST");
   // i64 <=> MMX conversions are Legal.
-  if (SrcVT==MVT::i64 && DstVT.isVector())
+  if (SrcVT == MVT::i64 && DstVT.isVector())
     return Op;
-  if (DstVT==MVT::i64 && SrcVT.isVector())
+  if (DstVT == MVT::i64 && SrcVT.isVector())
     return Op;
   // MMX <=> MMX conversions are Legal.
   if (SrcVT.isVector() && DstVT.isVector())
@@ -26295,7 +26414,8 @@
   if (Subtarget.hasVPOPCNTDQ()) {
     unsigned NumElems = VT.getVectorNumElements();
     assert((VT.getVectorElementType() == MVT::i8 ||
-            VT.getVectorElementType() == MVT::i16) && "Unexpected type");
+            VT.getVectorElementType() == MVT::i16) &&
+           "Unexpected type");
     if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) {
       MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
       Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0);
@@ -26480,8 +26600,8 @@
     // select LXADD if LOCK_SUB can't be selected.
     if (Opc == ISD::ATOMIC_LOAD_SUB) {
       RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
-      return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
-                           RHS, AN->getMemOperand());
+      return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS,
+                           AN->getMemOperand());
     }
     assert(Opc == ISD::ATOMIC_LOAD_ADD &&
            "Used AtomicRMW ops other than Add should have been expanded!");
@@ -26498,12 +26618,12 @@
     // seq_cst which isn't SingleThread, everything just needs to be preserved
     // during codegen and then dropped. Note that we expect (but don't assume),
     // that orderings other than seq_cst and acq_rel have been canonicalized to
-    // a store or load. 
+    // a store or load.
     if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
         AN->getSyncScopeID() == SyncScope::System) {
       // Prefer a locked operation against a stack location to minimize cache
       // traffic.  This assumes that stack locations are very likely to be
-      // accessed only by the owning thread. 
+      // accessed only by the owning thread.
       SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
       assert(!N->hasAnyUseOfValue(0));
       // NOTE: The getUNDEF is needed to give something for the unused result 0.
@@ -26514,16 +26634,16 @@
     SDValue NewChain = DAG.getNode(X86ISD::MEMBARRIER, DL, MVT::Other, Chain);
     assert(!N->hasAnyUseOfValue(0));
     // NOTE: The getUNDEF is needed to give something for the unused result 0.
-    return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
-                       DAG.getUNDEF(VT), NewChain);
+    return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT),
+                       NewChain);
   }
 
   SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
   // RAUW the chain, but don't worry about the result, as it's unused.
   assert(!N->hasAnyUseOfValue(0));
   // NOTE: The getUNDEF is needed to give something for the unused result 0.
-  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
-                     DAG.getUNDEF(VT), LockOp.getValue(1));
+  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT),
+                     LockOp.getValue(1));
 }
 
 static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
@@ -26552,10 +26672,10 @@
       SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
                                      Node->getOperand(2));
       SDVTList Tys = DAG.getVTList(MVT::Other);
-      SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() };
-      SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys,
-                                              Ops, MVT::i64,
-                                              Node->getMemOperand());
+      SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()};
+      SDValue Chain =
+          DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops,
+                                  MVT::i64, Node->getMemOperand());
 
       // If this is a sequentially consistent store, also emit an appropriate
       // barrier.
@@ -26569,11 +26689,9 @@
   // Convert seq_cst store -> xchg
   // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
   // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
-  SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
-                               Node->getMemoryVT(),
-                               Node->getOperand(0),
-                               Node->getOperand(1), Node->getOperand(2),
-                               Node->getMemOperand());
+  SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, Node->getMemoryVT(),
+                               Node->getOperand(0), Node->getOperand(1),
+                               Node->getOperand(2), Node->getMemOperand());
   return Swap.getValue(1);
 }
 
@@ -26592,12 +26710,12 @@
   SDValue Carry = Op.getOperand(2);
   EVT CarryVT = Carry.getValueType();
   APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
-  Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
-                      Carry, DAG.getConstant(NegOne, DL, CarryVT));
+  Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
+                      DAG.getConstant(NegOne, DL, CarryVT));
 
   unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
-  SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
-                            Op.getOperand(1), Carry.getValue(1));
+  SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0), Op.getOperand(1),
+                            Carry.getValue(1));
 
   SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
   if (N->getValueType(1) == MVT::i1)
@@ -26681,8 +26799,7 @@
          "Unexpected request for vector widening");
 
   SDLoc dl(InOp);
-  if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
-      InOp.getNumOperands() == 2) {
+  if (InOp.getOpcode() == ISD::CONCAT_VECTORS && InOp.getNumOperands() == 2) {
     SDValue N1 = InOp.getOperand(1);
     if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) ||
         N1.isUndef()) {
@@ -26699,16 +26816,16 @@
 
     EVT EltVT = InOp.getOperand(0).getValueType();
 
-    SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
-      DAG.getUNDEF(EltVT);
+    SDValue FillVal =
+        FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT);
     for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)
       Ops.push_back(FillVal);
     return DAG.getBuildVector(NVT, dl, Ops);
   }
-  SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) :
-    DAG.getUNDEF(NVT);
-  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal,
-                     InOp, DAG.getIntPtrConstant(0, dl));
+  SDValue FillVal =
+      FillWithZeroes ? DAG.getConstant(0, dl, NVT) : DAG.getUNDEF(NVT);
+  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, InOp,
+                     DAG.getIntPtrConstant(0, dl));
 }
 
 static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
@@ -26780,8 +26897,8 @@
   if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
       !Index.getSimpleValueType().is512BitVector()) {
     // Determine how much we need to widen by to get a 512-bit type.
-    unsigned Factor = std::min(512/VT.getSizeInBits(),
-                               512/IndexVT.getSizeInBits());
+    unsigned Factor =
+        std::min(512 / VT.getSizeInBits(), 512 / IndexVT.getSizeInBits());
     unsigned NumElts = VT.getVectorNumElements() * Factor;
 
     VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
@@ -26818,10 +26935,10 @@
   assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
          "Cannot lower masked load op.");
 
-  assert((ScalarVT.getSizeInBits() >= 32 ||
-          (Subtarget.hasBWI() &&
-              (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
-         "Unsupported masked load op.");
+  assert(
+      (ScalarVT.getSizeInBits() >= 32 ||
+       (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
+      "Unsupported masked load op.");
 
   // This operation is legal for targets with VLX, but without
   // VLX the vector should be widened to 512 bit
@@ -26836,15 +26953,14 @@
   MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
 
   Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
-  SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
-                                      N->getBasePtr(), Mask, PassThru,
-                                      N->getMemoryVT(), N->getMemOperand(),
-                                      N->getExtensionType(),
-                                      N->isExpandingLoad());
-
-  SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                               NewLoad.getValue(0),
-                               DAG.getIntPtrConstant(0, dl));
+  SDValue NewLoad =
+      DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask,
+                        PassThru, N->getMemoryVT(), N->getMemOperand(),
+                        N->getExtensionType(), N->isExpandingLoad());
+
+  SDValue Exract =
+      DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0),
+                  DAG.getIntPtrConstant(0, dl));
   SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
   return DAG.getMergeValues(RetOps, dl);
 }
@@ -26867,14 +26983,14 @@
   assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
          "Cannot lower masked store op.");
 
-  assert((ScalarVT.getSizeInBits() >= 32 ||
-          (Subtarget.hasBWI() &&
-              (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
-          "Unsupported masked store op.");
+  assert(
+      (ScalarVT.getSizeInBits() >= 32 ||
+       (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
+      "Unsupported masked store op.");
 
   // This operation is legal for targets with VLX, but without
   // VLX the vector should be widened to 512 bit
-  unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+  unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
   MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
 
   // Mask element has to be i1.
@@ -26916,8 +27032,8 @@
   if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
       !IndexVT.is512BitVector()) {
     // Determine how much we need to widen by to get a 512-bit type.
-    unsigned Factor = std::min(512/VT.getSizeInBits(),
-                               512/IndexVT.getSizeInBits());
+    unsigned Factor =
+        std::min(512 / VT.getSizeInBits(), 512 / IndexVT.getSizeInBits());
 
     unsigned NumElts = VT.getVectorNumElements() * Factor;
 
@@ -26930,13 +27046,13 @@
     Mask = ExtendToType(Mask, MaskVT, DAG, true);
   }
 
-  SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
-                    N->getScale() };
+  SDValue Ops[] = {N->getChain(),   PassThru, Mask,
+                   N->getBasePtr(), Index,    N->getScale()};
   SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
       DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(),
       N->getMemOperand());
-  SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT,
-                                NewGather, DAG.getIntPtrConstant(0, dl));
+  SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT, NewGather,
+                                DAG.getIntPtrConstant(0, dl));
   return DAG.getMergeValues({Extract, NewGather.getValue(2)}, dl);
 }
 
@@ -26985,123 +27101,199 @@
 /// Provide custom lowering hooks for some operations.
 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
-  default: llvm_unreachable("Should not custom lower this!");
-  case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, Subtarget, DAG);
+  default:
+    llvm_unreachable("Should not custom lower this!");
+  case ISD::ATOMIC_FENCE:
+    return LowerATOMIC_FENCE(Op, Subtarget, DAG);
   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
     return LowerCMP_SWAP(Op, Subtarget, DAG);
-  case ISD::CTPOP:              return LowerCTPOP(Op, Subtarget, DAG);
+  case ISD::CTPOP:
+    return LowerCTPOP(Op, Subtarget, DAG);
   case ISD::ATOMIC_LOAD_ADD:
   case ISD::ATOMIC_LOAD_SUB:
   case ISD::ATOMIC_LOAD_OR:
   case ISD::ATOMIC_LOAD_XOR:
-  case ISD::ATOMIC_LOAD_AND:    return lowerAtomicArith(Op, DAG, Subtarget);
-  case ISD::ATOMIC_STORE:       return LowerATOMIC_STORE(Op, DAG, Subtarget);
-  case ISD::BITREVERSE:         return LowerBITREVERSE(Op, Subtarget, DAG);
-  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
-  case ISD::CONCAT_VECTORS:     return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
-  case ISD::VECTOR_SHUFFLE:     return lowerVectorShuffle(Op, Subtarget, DAG);
-  case ISD::VSELECT:            return LowerVSELECT(Op, DAG);
-  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
-  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
-  case ISD::INSERT_SUBVECTOR:   return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
-  case ISD::EXTRACT_SUBVECTOR:  return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
-  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
-  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
-  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
-  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
-  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
+  case ISD::ATOMIC_LOAD_AND:
+    return lowerAtomicArith(Op, DAG, Subtarget);
+  case ISD::ATOMIC_STORE:
+    return LowerATOMIC_STORE(Op, DAG, Subtarget);
+  case ISD::BITREVERSE:
+    return LowerBITREVERSE(Op, Subtarget, DAG);
+  case ISD::BUILD_VECTOR:
+    return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::CONCAT_VECTORS:
+    return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
+  case ISD::VECTOR_SHUFFLE:
+    return lowerVectorShuffle(Op, Subtarget, DAG);
+  case ISD::VSELECT:
+    return LowerVSELECT(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT:
+    return LowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_SUBVECTOR:
+    return LowerINSERT_SUBVECTOR(Op, Subtarget, DAG);
+  case ISD::EXTRACT_SUBVECTOR:
+    return LowerEXTRACT_SUBVECTOR(Op, Subtarget, DAG);
+  case ISD::SCALAR_TO_VECTOR:
+    return LowerSCALAR_TO_VECTOR(Op, Subtarget, DAG);
+  case ISD::ConstantPool:
+    return LowerConstantPool(Op, DAG);
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::ExternalSymbol:
+    return LowerExternalSymbol(Op, DAG);
+  case ISD::BlockAddress:
+    return LowerBlockAddress(Op, DAG);
   case ISD::SHL_PARTS:
   case ISD::SRA_PARTS:
-  case ISD::SRL_PARTS:          return LowerShiftParts(Op, DAG);
+  case ISD::SRL_PARTS:
+    return LowerShiftParts(Op, DAG);
   case ISD::FSHL:
-  case ISD::FSHR:               return LowerFunnelShift(Op, Subtarget, DAG);
-  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
-  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
-  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
-  case ISD::ZERO_EXTEND:        return LowerZERO_EXTEND(Op, Subtarget, DAG);
-  case ISD::SIGN_EXTEND:        return LowerSIGN_EXTEND(Op, Subtarget, DAG);
-  case ISD::ANY_EXTEND:         return LowerANY_EXTEND(Op, Subtarget, DAG);
+  case ISD::FSHR:
+    return LowerFunnelShift(Op, Subtarget, DAG);
+  case ISD::SINT_TO_FP:
+    return LowerSINT_TO_FP(Op, DAG);
+  case ISD::UINT_TO_FP:
+    return LowerUINT_TO_FP(Op, DAG);
+  case ISD::TRUNCATE:
+    return LowerTRUNCATE(Op, DAG);
+  case ISD::ZERO_EXTEND:
+    return LowerZERO_EXTEND(Op, Subtarget, DAG);
+  case ISD::SIGN_EXTEND:
+    return LowerSIGN_EXTEND(Op, Subtarget, DAG);
+  case ISD::ANY_EXTEND:
+    return LowerANY_EXTEND(Op, Subtarget, DAG);
   case ISD::ZERO_EXTEND_VECTOR_INREG:
   case ISD::SIGN_EXTEND_VECTOR_INREG:
     return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
   case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT:         return LowerFP_TO_INT(Op, DAG);
-  case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
-  case ISD::LOAD:               return LowerLoad(Op, Subtarget, DAG);
-  case ISD::STORE:              return LowerStore(Op, Subtarget, DAG);
+  case ISD::FP_TO_UINT:
+    return LowerFP_TO_INT(Op, DAG);
+  case ISD::FP_EXTEND:
+    return LowerFP_EXTEND(Op, DAG);
+  case ISD::LOAD:
+    return LowerLoad(Op, Subtarget, DAG);
+  case ISD::STORE:
+    return LowerStore(Op, Subtarget, DAG);
   case ISD::FADD:
-  case ISD::FSUB:               return lowerFaddFsub(Op, DAG, Subtarget);
+  case ISD::FSUB:
+    return lowerFaddFsub(Op, DAG, Subtarget);
   case ISD::FABS:
-  case ISD::FNEG:               return LowerFABSorFNEG(Op, DAG);
-  case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
-  case ISD::FGETSIGN:           return LowerFGETSIGN(Op, DAG);
-  case ISD::SETCC:              return LowerSETCC(Op, DAG);
-  case ISD::SETCCCARRY:         return LowerSETCCCARRY(Op, DAG);
-  case ISD::SELECT:             return LowerSELECT(Op, DAG);
-  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
-  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
-  case ISD::VASTART:            return LowerVASTART(Op, DAG);
-  case ISD::VAARG:              return LowerVAARG(Op, DAG);
-  case ISD::VACOPY:             return LowerVACOPY(Op, Subtarget, DAG);
-  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::FNEG:
+    return LowerFABSorFNEG(Op, DAG);
+  case ISD::FCOPYSIGN:
+    return LowerFCOPYSIGN(Op, DAG);
+  case ISD::FGETSIGN:
+    return LowerFGETSIGN(Op, DAG);
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG);
+  case ISD::SETCCCARRY:
+    return LowerSETCCCARRY(Op, DAG);
+  case ISD::SELECT:
+    return LowerSELECT(Op, DAG);
+  case ISD::BRCOND:
+    return LowerBRCOND(Op, DAG);
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG);
+  case ISD::VAARG:
+    return LowerVAARG(Op, DAG);
+  case ISD::VACOPY:
+    return LowerVACOPY(Op, Subtarget, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+    return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::INTRINSIC_VOID:
-  case ISD::INTRINSIC_W_CHAIN:  return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
-  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
-  case ISD::ADDROFRETURNADDR:   return LowerADDROFRETURNADDR(Op, DAG);
-  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
+  case ISD::RETURNADDR:
+    return LowerRETURNADDR(Op, DAG);
+  case ISD::ADDROFRETURNADDR:
+    return LowerADDROFRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:
+    return LowerFRAMEADDR(Op, DAG);
   case ISD::FRAME_TO_ARGS_OFFSET:
-                                return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
-  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
-  case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
-  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
-  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
+    return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC:
+    return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::EH_RETURN:
+    return LowerEH_RETURN(Op, DAG);
+  case ISD::EH_SJLJ_SETJMP:
+    return lowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::EH_SJLJ_LONGJMP:
+    return lowerEH_SJLJ_LONGJMP(Op, DAG);
   case ISD::EH_SJLJ_SETUP_DISPATCH:
     return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
-  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
-  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
-  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
+  case ISD::INIT_TRAMPOLINE:
+    return LowerINIT_TRAMPOLINE(Op, DAG);
+  case ISD::ADJUST_TRAMPOLINE:
+    return LowerADJUST_TRAMPOLINE(Op, DAG);
+  case ISD::FLT_ROUNDS_:
+    return LowerFLT_ROUNDS_(Op, DAG);
   case ISD::CTLZ:
-  case ISD::CTLZ_ZERO_UNDEF:    return LowerCTLZ(Op, Subtarget, DAG);
+  case ISD::CTLZ_ZERO_UNDEF:
+    return LowerCTLZ(Op, Subtarget, DAG);
   case ISD::CTTZ:
-  case ISD::CTTZ_ZERO_UNDEF:    return LowerCTTZ(Op, Subtarget, DAG);
-  case ISD::MUL:                return LowerMUL(Op, Subtarget, DAG);
+  case ISD::CTTZ_ZERO_UNDEF:
+    return LowerCTTZ(Op, Subtarget, DAG);
+  case ISD::MUL:
+    return LowerMUL(Op, Subtarget, DAG);
   case ISD::MULHS:
-  case ISD::MULHU:              return LowerMULH(Op, Subtarget, DAG);
+  case ISD::MULHU:
+    return LowerMULH(Op, Subtarget, DAG);
   case ISD::ROTL:
-  case ISD::ROTR:               return LowerRotate(Op, Subtarget, DAG);
+  case ISD::ROTR:
+    return LowerRotate(Op, Subtarget, DAG);
   case ISD::SRA:
   case ISD::SRL:
-  case ISD::SHL:                return LowerShift(Op, Subtarget, DAG);
+  case ISD::SHL:
+    return LowerShift(Op, Subtarget, DAG);
   case ISD::SADDO:
   case ISD::UADDO:
   case ISD::SSUBO:
   case ISD::USUBO:
   case ISD::SMULO:
-  case ISD::UMULO:              return LowerXALUO(Op, DAG);
-  case ISD::READCYCLECOUNTER:   return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
-  case ISD::BITCAST:            return LowerBITCAST(Op, Subtarget, DAG);
+  case ISD::UMULO:
+    return LowerXALUO(Op, DAG);
+  case ISD::READCYCLECOUNTER:
+    return LowerREADCYCLECOUNTER(Op, Subtarget, DAG);
+  case ISD::BITCAST:
+    return LowerBITCAST(Op, Subtarget, DAG);
   case ISD::ADDCARRY:
-  case ISD::SUBCARRY:           return LowerADDSUBCARRY(Op, DAG);
+  case ISD::SUBCARRY:
+    return LowerADDSUBCARRY(Op, DAG);
   case ISD::ADD:
-  case ISD::SUB:                return lowerAddSub(Op, DAG, Subtarget);
+  case ISD::SUB:
+    return lowerAddSub(Op, DAG, Subtarget);
   case ISD::UADDSAT:
   case ISD::SADDSAT:
   case ISD::USUBSAT:
-  case ISD::SSUBSAT:            return LowerADDSAT_SUBSAT(Op, DAG, Subtarget);
+  case ISD::SSUBSAT:
+    return LowerADDSAT_SUBSAT(Op, DAG, Subtarget);
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
-  case ISD::UMIN:               return LowerMINMAX(Op, DAG);
-  case ISD::ABS:                return LowerABS(Op, Subtarget, DAG);
-  case ISD::FSINCOS:            return LowerFSINCOS(Op, Subtarget, DAG);
-  case ISD::MLOAD:              return LowerMLOAD(Op, Subtarget, DAG);
-  case ISD::MSTORE:             return LowerMSTORE(Op, Subtarget, DAG);
-  case ISD::MGATHER:            return LowerMGATHER(Op, Subtarget, DAG);
-  case ISD::MSCATTER:           return LowerMSCATTER(Op, Subtarget, DAG);
+  case ISD::UMIN:
+    return LowerMINMAX(Op, DAG);
+  case ISD::ABS:
+    return LowerABS(Op, Subtarget, DAG);
+  case ISD::FSINCOS:
+    return LowerFSINCOS(Op, Subtarget, DAG);
+  case ISD::MLOAD:
+    return LowerMLOAD(Op, Subtarget, DAG);
+  case ISD::MSTORE:
+    return LowerMSTORE(Op, Subtarget, DAG);
+  case ISD::MGATHER:
+    return LowerMGATHER(Op, Subtarget, DAG);
+  case ISD::MSCATTER:
+    return LowerMSCATTER(Op, Subtarget, DAG);
   case ISD::GC_TRANSITION_START:
-                                return LowerGC_TRANSITION_START(Op, DAG);
-  case ISD::GC_TRANSITION_END:  return LowerGC_TRANSITION_END(Op, DAG);
+    return LowerGC_TRANSITION_START(Op, DAG);
+  case ISD::GC_TRANSITION_END:
+    return LowerGC_TRANSITION_END(Op, DAG);
   }
 }
 
@@ -27127,7 +27319,7 @@
   // If the original node has multiple results, then the return node should
   // have the same number of results.
   assert((N->getNumValues() == Res->getNumValues()) &&
-      "Lowering returned the wrong number of results!");
+         "Lowering returned the wrong number of results!");
 
   // Places new result values base on N result number.
   for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
@@ -27137,7 +27329,7 @@
 /// Replace a node with an illegal result type with a new node built out of
 /// custom code.
 void X86TargetLowering::ReplaceNodeResults(SDNode *N,
-                                           SmallVectorImpl<SDValue>&Results,
+                                           SmallVectorImpl<SDValue> &Results,
                                            SelectionDAG &DAG) const {
   SDLoc dl(N);
   switch (N->getOpcode()) {
@@ -27169,10 +27361,10 @@
     if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger &&
         VT.getVectorNumElements() == 2) {
       // Promote to a pattern that will be turned into PMULUDQ.
-      SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
-                               N->getOperand(0));
-      SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
-                               N->getOperand(1));
+      SDValue N0 =
+          DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, N->getOperand(0));
+      SDValue N1 =
+          DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, N->getOperand(1));
       SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1);
       Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul));
     } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
@@ -27208,11 +27400,10 @@
            "Expected a VT that divides into 128 bits.");
     unsigned NumConcat = 128 / InVT.getSizeInBits();
 
-    EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
-                                    InVT.getVectorElementType(),
-                                    NumConcat * InVT.getVectorNumElements());
-    EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
-                                  VT.getVectorElementType(),
+    EVT InWideVT =
+        EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                         NumConcat * InVT.getVectorNumElements());
+    EVT WideVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
                                   NumConcat * VT.getVectorNumElements());
 
     SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
@@ -27266,8 +27457,8 @@
                               N->getOperand(0), UNDEF);
     SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
                               N->getOperand(1), UNDEF);
-    SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS,
-                              N->getOperand(2));
+    SDValue Res =
+        DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS, N->getOperand(2));
     Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
                       DAG.getIntPtrConstant(0, dl));
     Results.push_back(Res);
@@ -27326,7 +27517,7 @@
   }
   case ISD::SDIVREM:
   case ISD::UDIVREM: {
-    SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
+    SDValue V = LowerWin64_i128OP(SDValue(N, 0), DAG);
     Results.push_back(V);
     return;
   }
@@ -27354,7 +27545,7 @@
       // Use the original element count so we don't do more scalar opts than
       // necessary.
       unsigned MinElts = VT.getVectorNumElements();
-      for (unsigned i=0; i < MinElts; ++i) {
+      for (unsigned i = 0; i < MinElts; ++i) {
         SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In,
                                   DAG.getIntPtrConstant(i, dl));
         Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val);
@@ -27394,13 +27585,12 @@
       // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
       // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
       // we allow the sra from the extend to i32 to be shared by the split.
-      EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
-                                       InVT.getVectorElementType(),
-                                       InVT.getVectorNumElements() / 2);
-      MVT ExtendVT = MVT::getVectorVT(MVT::i32,
-                                      VT.getVectorNumElements());
-      In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT,
-                       In, DAG.getIntPtrConstant(0, dl));
+      EVT ExtractVT =
+          EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                           InVT.getVectorNumElements() / 2);
+      MVT ExtendVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements());
+      In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT, In,
+                       DAG.getIntPtrConstant(0, dl));
       In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In);
 
       // Fill a vector with sign bits for each element.
@@ -27444,11 +27634,11 @@
 
       // Create an unpackl and unpackh to interleave the sign bits then bitcast
       // to v2i64.
-      SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
-                                        {0, 4, 1, 5});
+      SDValue Lo =
+          DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {0, 4, 1, 5});
       Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
-      SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
-                                        {2, 6, 3, 7});
+      SDValue Hi =
+          DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {2, 6, 3, 7});
       Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);
 
       SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
@@ -27512,10 +27702,9 @@
         return;
 
       SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
-      Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
-                                                          : ISD::AssertSext,
-                        dl, MVT::v2i32, Res,
-                        DAG.getValueType(VT.getVectorElementType()));
+      Res = DAG.getNode(
+          N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext : ISD::AssertSext,
+          dl, MVT::v2i32, Res, DAG.getValueType(VT.getVectorElementType()));
       Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
       Results.push_back(Res);
       return;
@@ -27553,13 +27742,12 @@
       return;
     }
 
-
     if (VT == MVT::v2i32) {
       assert((IsSigned || Subtarget.hasAVX512()) &&
              "Can only handle signed conversion without AVX512");
       assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
       bool Widenv2i32 =
-        getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector;
+          getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector;
       if (Src.getValueType() == MVT::v2f64) {
         unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
         if (!IsSigned && !Subtarget.hasVLX()) {
@@ -27569,8 +27757,8 @@
           // Custom widen by doubling to a legal vector with. Isel will
           // further widen to v8f64.
           Opc = ISD::FP_TO_UINT;
-          Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64,
-                            Src, DAG.getUNDEF(MVT::v2f64));
+          Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src,
+                            DAG.getUNDEF(MVT::v2f64));
         }
         SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
         if (!Widenv2i32)
@@ -27584,8 +27772,8 @@
         SDValue Idx = DAG.getIntPtrConstant(0, dl);
         SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
                                   DAG.getUNDEF(MVT::v2f32));
-        Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
-                                   : ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
+        Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
+                          MVT::v4i32, Res);
         Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
         Results.push_back(Res);
         return;
@@ -27607,9 +27795,9 @@
       MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts);
 
       SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
-      SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
-                                DAG.getConstantFP(0.0, dl, VecInVT), Src,
-                                ZeroIdx);
+      SDValue Res =
+          DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
+                      DAG.getConstantFP(0.0, dl, VecInVT), Src, ZeroIdx);
       Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res);
       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx);
       Results.push_back(Res);
@@ -27654,7 +27842,7 @@
   }
   case ISD::FP_ROUND: {
     if (!isTypeLegal(N->getOperand(0).getValueType()))
-        return;
+      return;
     SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
     Results.push_back(V);
     return;
@@ -27669,8 +27857,9 @@
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
     switch (IntNo) {
-    default : llvm_unreachable("Do not know how to custom type "
-                               "legalize this intrinsic operation!");
+    default:
+      llvm_unreachable("Do not know how to custom type "
+                       "legalize this intrinsic operation!");
     case Intrinsic::x86_rdtsc:
       return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget,
                                      Results);
@@ -27703,11 +27892,10 @@
     cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
                         DAG.getConstant(1, dl, HalfT));
     cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
-                             Regs64bit ? X86::RAX : X86::EAX,
-                             cpInL, SDValue());
-    cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
-                             Regs64bit ? X86::RDX : X86::EDX,
-                             cpInH, cpInL.getValue(1));
+                             Regs64bit ? X86::RAX : X86::EAX, cpInL, SDValue());
+    cpInH =
+        DAG.getCopyToReg(cpInL.getValue(0), dl, Regs64bit ? X86::RDX : X86::EDX,
+                         cpInH, cpInL.getValue(1));
     SDValue swapInL, swapInH;
     swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
                           DAG.getConstant(0, dl, HalfT));
@@ -27757,12 +27945,12 @@
       Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
     }
     SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
-                                        Regs64bit ? X86::RAX : X86::EAX,
-                                        HalfT, Result.getValue(1));
+                                        Regs64bit ? X86::RAX : X86::EAX, HalfT,
+                                        Result.getValue(1));
     SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
-                                        Regs64bit ? X86::RDX : X86::EDX,
-                                        HalfT, cpOutL.getValue(2));
-    SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+                                        Regs64bit ? X86::RDX : X86::EDX, HalfT,
+                                        cpOutL.getValue(2));
+    SDValue OpsF[] = {cpOutL.getValue(0), cpOutH.getValue(0)};
 
     SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
                                         MVT::i32, cpOutH.getValue(2));
@@ -27785,7 +27973,7 @@
         // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
         // lower 64-bits.
         SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
-        SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+        SDValue Ops[] = {Node->getChain(), Node->getBasePtr()};
         SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
                                              MVT::i64, Node->getMemOperand());
         SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
@@ -27799,10 +27987,9 @@
         // integer into the significand.
         // FIXME: Do we need to glue? See FIXME comment in BuildFILD.
         SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
-        SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
-        SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
-                                                 dl, Tys, Ops, MVT::i64,
-                                                 Node->getMemOperand());
+        SDValue Ops[] = {Node->getChain(), Node->getBasePtr()};
+        SDValue Result = DAG.getMemIntrinsicNode(
+            X86ISD::FILD_FLAG, dl, Tys, Ops, MVT::i64, Node->getMemOperand());
         SDValue Chain = Result.getValue(1);
         SDValue InFlag = Result.getValue(2);
 
@@ -27814,11 +28001,10 @@
         int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
         MachinePointerInfo MPI =
             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
-        SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
-        Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
-                                        DAG.getVTList(MVT::Other), StoreOps,
-                                        MVT::i64, MPI, 0 /*Align*/,
-                                        MachineMemOperand::MOStore);
+        SDValue StoreOps[] = {Chain, Result, StackPtr, InFlag};
+        Chain = DAG.getMemIntrinsicNode(
+            X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
+            MPI, 0 /*Align*/, MachineMemOperand::MOStore);
 
         // Finally load the value back from the stack temporary and return it.
         // This load is not atomic and doesn't need to be.
@@ -27868,8 +28054,8 @@
     }
 
     // Custom splitting for BWI types when AVX512F is available but BWI isn't.
-    if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) &&
-        SrcVT.isVector() && isTypeLegal(SrcVT)) {
+    if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) && SrcVT.isVector() &&
+        isTypeLegal(SrcVT)) {
       SDValue Lo, Hi;
       std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
       MVT CastVT = (DstVT == MVT::v32i16) ? MVT::v16i16 : MVT::v32i8;
@@ -27905,9 +28091,9 @@
         return;
       SDValue Mask = Gather->getMask();
       assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
-      SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
-                                     Gather->getPassThru(),
-                                     DAG.getUNDEF(MVT::v2f32));
+      SDValue PassThru =
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+                      Gather->getPassThru(), DAG.getUNDEF(MVT::v2f32));
       if (!Subtarget.hasVLX()) {
         // We need to widen the mask, but the instruction will only use 2
         // of its elements. So we can use undef.
@@ -27915,11 +28101,11 @@
                            DAG.getUNDEF(MVT::v2i1));
         Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
       }
-      SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
-                        Gather->getBasePtr(), Index, Gather->getScale() };
+      SDValue Ops[] = {Gather->getChain(),   PassThru, Mask,
+                       Gather->getBasePtr(), Index,    Gather->getScale()};
       SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
-        DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
-        Gather->getMemoryVT(), Gather->getMemOperand());
+          DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
+          Gather->getMemoryVT(), Gather->getMemOperand());
       Results.push_back(Res);
       Results.push_back(Res.getValue(2));
       return;
@@ -27929,9 +28115,9 @@
       SDValue Index = Gather->getIndex();
       SDValue Mask = Gather->getMask();
       assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
-      SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
-                                     Gather->getPassThru(),
-                                     DAG.getUNDEF(MVT::v2i32));
+      SDValue PassThru =
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
+                      Gather->getPassThru(), DAG.getUNDEF(MVT::v2i32));
       // If the index is v2i64 we can use it directly.
       if (Index.getValueType() == MVT::v2i64 &&
           (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
@@ -27942,11 +28128,11 @@
                              DAG.getUNDEF(MVT::v2i1));
           Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
         }
-        SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
-                          Gather->getBasePtr(), Index, Gather->getScale() };
+        SDValue Ops[] = {Gather->getChain(),   PassThru, Mask,
+                         Gather->getBasePtr(), Index,    Gather->getScale()};
         SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
-          DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl,
-          Gather->getMemoryVT(), Gather->getMemOperand());
+            DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl,
+            Gather->getMemoryVT(), Gather->getMemOperand());
         SDValue Chain = Res.getValue(2);
         if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
           Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
@@ -27957,15 +28143,15 @@
       }
       if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
         EVT IndexVT = Index.getValueType();
-        EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
-                                          IndexVT.getScalarType(), 4);
+        EVT NewIndexVT =
+            EVT::getVectorVT(*DAG.getContext(), IndexVT.getScalarType(), 4);
         // Otherwise we need to custom widen everything to avoid promotion.
         Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
                             DAG.getUNDEF(IndexVT));
         Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
                            DAG.getConstant(0, dl, MVT::v2i1));
-        SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
-                          Gather->getBasePtr(), Index, Gather->getScale() };
+        SDValue Ops[] = {Gather->getChain(),   PassThru, Mask,
+                         Gather->getBasePtr(), Index,    Gather->getScale()};
         SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
                                           Gather->getMemoryVT(), dl, Ops,
                                           Gather->getMemOperand());
@@ -27993,8 +28179,7 @@
     auto *Ld = cast<LoadSDNode>(N);
     MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
     SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
-                              Ld->getPointerInfo(),
-                              Ld->getAlignment(),
+                              Ld->getPointerInfo(), Ld->getAlignment(),
                               Ld->getMemOperand()->getFlags());
     SDValue Chain = Res.getValue(1);
     MVT WideVT = MVT::getVectorVT(LdVT, 2);
@@ -28011,348 +28196,684 @@
 
 const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((X86ISD::NodeType)Opcode) {
-  case X86ISD::FIRST_NUMBER:       break;
-  case X86ISD::BSF:                return "X86ISD::BSF";
-  case X86ISD::BSR:                return "X86ISD::BSR";
-  case X86ISD::SHLD:               return "X86ISD::SHLD";
-  case X86ISD::SHRD:               return "X86ISD::SHRD";
-  case X86ISD::FAND:               return "X86ISD::FAND";
-  case X86ISD::FANDN:              return "X86ISD::FANDN";
-  case X86ISD::FOR:                return "X86ISD::FOR";
-  case X86ISD::FXOR:               return "X86ISD::FXOR";
-  case X86ISD::FILD:               return "X86ISD::FILD";
-  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
-  case X86ISD::FIST:               return "X86ISD::FIST";
-  case X86ISD::FP_TO_INT_IN_MEM:   return "X86ISD::FP_TO_INT_IN_MEM";
-  case X86ISD::FLD:                return "X86ISD::FLD";
-  case X86ISD::FST:                return "X86ISD::FST";
-  case X86ISD::CALL:               return "X86ISD::CALL";
-  case X86ISD::BT:                 return "X86ISD::BT";
-  case X86ISD::CMP:                return "X86ISD::CMP";
-  case X86ISD::COMI:               return "X86ISD::COMI";
-  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
-  case X86ISD::CMPM:               return "X86ISD::CMPM";
-  case X86ISD::CMPM_SAE:           return "X86ISD::CMPM_SAE";
-  case X86ISD::SETCC:              return "X86ISD::SETCC";
-  case X86ISD::SETCC_CARRY:        return "X86ISD::SETCC_CARRY";
-  case X86ISD::FSETCC:             return "X86ISD::FSETCC";
-  case X86ISD::FSETCCM:            return "X86ISD::FSETCCM";
-  case X86ISD::FSETCCM_SAE:        return "X86ISD::FSETCCM_SAE";
-  case X86ISD::CMOV:               return "X86ISD::CMOV";
-  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
-  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
-  case X86ISD::IRET:               return "X86ISD::IRET";
-  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
-  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
-  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
-  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
-  case X86ISD::WrapperRIP:         return "X86ISD::WrapperRIP";
-  case X86ISD::MOVDQ2Q:            return "X86ISD::MOVDQ2Q";
-  case X86ISD::MMX_MOVD2W:         return "X86ISD::MMX_MOVD2W";
-  case X86ISD::MMX_MOVW2D:         return "X86ISD::MMX_MOVW2D";
-  case X86ISD::PEXTRB:             return "X86ISD::PEXTRB";
-  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
-  case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
-  case X86ISD::PINSRB:             return "X86ISD::PINSRB";
-  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
-  case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
-  case X86ISD::ANDNP:              return "X86ISD::ANDNP";
-  case X86ISD::BLENDI:             return "X86ISD::BLENDI";
-  case X86ISD::BLENDV:             return "X86ISD::BLENDV";
-  case X86ISD::HADD:               return "X86ISD::HADD";
-  case X86ISD::HSUB:               return "X86ISD::HSUB";
-  case X86ISD::FHADD:              return "X86ISD::FHADD";
-  case X86ISD::FHSUB:              return "X86ISD::FHSUB";
-  case X86ISD::CONFLICT:           return "X86ISD::CONFLICT";
-  case X86ISD::FMAX:               return "X86ISD::FMAX";
-  case X86ISD::FMAXS:              return "X86ISD::FMAXS";
-  case X86ISD::FMAX_SAE:           return "X86ISD::FMAX_SAE";
-  case X86ISD::FMAXS_SAE:          return "X86ISD::FMAXS_SAE";
-  case X86ISD::FMIN:               return "X86ISD::FMIN";
-  case X86ISD::FMINS:              return "X86ISD::FMINS";
-  case X86ISD::FMIN_SAE:           return "X86ISD::FMIN_SAE";
-  case X86ISD::FMINS_SAE:          return "X86ISD::FMINS_SAE";
-  case X86ISD::FMAXC:              return "X86ISD::FMAXC";
-  case X86ISD::FMINC:              return "X86ISD::FMINC";
-  case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
-  case X86ISD::FRCP:               return "X86ISD::FRCP";
-  case X86ISD::EXTRQI:             return "X86ISD::EXTRQI";
-  case X86ISD::INSERTQI:           return "X86ISD::INSERTQI";
-  case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
-  case X86ISD::TLSBASEADDR:        return "X86ISD::TLSBASEADDR";
-  case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
-  case X86ISD::EH_SJLJ_SETJMP:     return "X86ISD::EH_SJLJ_SETJMP";
-  case X86ISD::EH_SJLJ_LONGJMP:    return "X86ISD::EH_SJLJ_LONGJMP";
+  case X86ISD::FIRST_NUMBER:
+    break;
+  case X86ISD::BSF:
+    return "X86ISD::BSF";
+  case X86ISD::BSR:
+    return "X86ISD::BSR";
+  case X86ISD::SHLD:
+    return "X86ISD::SHLD";
+  case X86ISD::SHRD:
+    return "X86ISD::SHRD";
+  case X86ISD::FAND:
+    return "X86ISD::FAND";
+  case X86ISD::FANDN:
+    return "X86ISD::FANDN";
+  case X86ISD::FOR:
+    return "X86ISD::FOR";
+  case X86ISD::FXOR:
+    return "X86ISD::FXOR";
+  case X86ISD::FILD:
+    return "X86ISD::FILD";
+  case X86ISD::FILD_FLAG:
+    return "X86ISD::FILD_FLAG";
+  case X86ISD::FIST:
+    return "X86ISD::FIST";
+  case X86ISD::FP_TO_INT_IN_MEM:
+    return "X86ISD::FP_TO_INT_IN_MEM";
+  case X86ISD::FLD:
+    return "X86ISD::FLD";
+  case X86ISD::FST:
+    return "X86ISD::FST";
+  case X86ISD::CALL:
+    return "X86ISD::CALL";
+  case X86ISD::BT:
+    return "X86ISD::BT";
+  case X86ISD::CMP:
+    return "X86ISD::CMP";
+  case X86ISD::COMI:
+    return "X86ISD::COMI";
+  case X86ISD::UCOMI:
+    return "X86ISD::UCOMI";
+  case X86ISD::CMPM:
+    return "X86ISD::CMPM";
+  case X86ISD::CMPM_SAE:
+    return "X86ISD::CMPM_SAE";
+  case X86ISD::SETCC:
+    return "X86ISD::SETCC";
+  case X86ISD::SETCC_CARRY:
+    return "X86ISD::SETCC_CARRY";
+  case X86ISD::FSETCC:
+    return "X86ISD::FSETCC";
+  case X86ISD::FSETCCM:
+    return "X86ISD::FSETCCM";
+  case X86ISD::FSETCCM_SAE:
+    return "X86ISD::FSETCCM_SAE";
+  case X86ISD::CMOV:
+    return "X86ISD::CMOV";
+  case X86ISD::BRCOND:
+    return "X86ISD::BRCOND";
+  case X86ISD::RET_FLAG:
+    return "X86ISD::RET_FLAG";
+  case X86ISD::IRET:
+    return "X86ISD::IRET";
+  case X86ISD::REP_STOS:
+    return "X86ISD::REP_STOS";
+  case X86ISD::REP_MOVS:
+    return "X86ISD::REP_MOVS";
+  case X86ISD::GlobalBaseReg:
+    return "X86ISD::GlobalBaseReg";
+  case X86ISD::Wrapper:
+    return "X86ISD::Wrapper";
+  case X86ISD::WrapperRIP:
+    return "X86ISD::WrapperRIP";
+  case X86ISD::MOVDQ2Q:
+    return "X86ISD::MOVDQ2Q";
+  case X86ISD::MMX_MOVD2W:
+    return "X86ISD::MMX_MOVD2W";
+  case X86ISD::MMX_MOVW2D:
+    return "X86ISD::MMX_MOVW2D";
+  case X86ISD::PEXTRB:
+    return "X86ISD::PEXTRB";
+  case X86ISD::PEXTRW:
+    return "X86ISD::PEXTRW";
+  case X86ISD::INSERTPS:
+    return "X86ISD::INSERTPS";
+  case X86ISD::PINSRB:
+    return "X86ISD::PINSRB";
+  case X86ISD::PINSRW:
+    return "X86ISD::PINSRW";
+  case X86ISD::PSHUFB:
+    return "X86ISD::PSHUFB";
+  case X86ISD::ANDNP:
+    return "X86ISD::ANDNP";
+  case X86ISD::BLENDI:
+    return "X86ISD::BLENDI";
+  case X86ISD::BLENDV:
+    return "X86ISD::BLENDV";
+  case X86ISD::HADD:
+    return "X86ISD::HADD";
+  case X86ISD::HSUB:
+    return "X86ISD::HSUB";
+  case X86ISD::FHADD:
+    return "X86ISD::FHADD";
+  case X86ISD::FHSUB:
+    return "X86ISD::FHSUB";
+  case X86ISD::CONFLICT:
+    return "X86ISD::CONFLICT";
+  case X86ISD::FMAX:
+    return "X86ISD::FMAX";
+  case X86ISD::FMAXS:
+    return "X86ISD::FMAXS";
+  case X86ISD::FMAX_SAE:
+    return "X86ISD::FMAX_SAE";
+  case X86ISD::FMAXS_SAE:
+    return "X86ISD::FMAXS_SAE";
+  case X86ISD::FMIN:
+    return "X86ISD::FMIN";
+  case X86ISD::FMINS:
+    return "X86ISD::FMINS";
+  case X86ISD::FMIN_SAE:
+    return "X86ISD::FMIN_SAE";
+  case X86ISD::FMINS_SAE:
+    return "X86ISD::FMINS_SAE";
+  case X86ISD::FMAXC:
+    return "X86ISD::FMAXC";
+  case X86ISD::FMINC:
+    return "X86ISD::FMINC";
+  case X86ISD::FRSQRT:
+    return "X86ISD::FRSQRT";
+  case X86ISD::FRCP:
+    return "X86ISD::FRCP";
+  case X86ISD::EXTRQI:
+    return "X86ISD::EXTRQI";
+  case X86ISD::INSERTQI:
+    return "X86ISD::INSERTQI";
+  case X86ISD::TLSADDR:
+    return "X86ISD::TLSADDR";
+  case X86ISD::TLSBASEADDR:
+    return "X86ISD::TLSBASEADDR";
+  case X86ISD::TLSCALL:
+    return "X86ISD::TLSCALL";
+  case X86ISD::EH_SJLJ_SETJMP:
+    return "X86ISD::EH_SJLJ_SETJMP";
+  case X86ISD::EH_SJLJ_LONGJMP:
+    return "X86ISD::EH_SJLJ_LONGJMP";
   case X86ISD::EH_SJLJ_SETUP_DISPATCH:
     return "X86ISD::EH_SJLJ_SETUP_DISPATCH";
-  case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
-  case X86ISD::TC_RETURN:          return "X86ISD::TC_RETURN";
-  case X86ISD::FNSTCW16m:          return "X86ISD::FNSTCW16m";
-  case X86ISD::FNSTSW16r:          return "X86ISD::FNSTSW16r";
-  case X86ISD::LCMPXCHG_DAG:       return "X86ISD::LCMPXCHG_DAG";
-  case X86ISD::LCMPXCHG8_DAG:      return "X86ISD::LCMPXCHG8_DAG";
-  case X86ISD::LCMPXCHG16_DAG:     return "X86ISD::LCMPXCHG16_DAG";
+  case X86ISD::EH_RETURN:
+    return "X86ISD::EH_RETURN";
+  case X86ISD::TC_RETURN:
+    return "X86ISD::TC_RETURN";
+  case X86ISD::FNSTCW16m:
+    return "X86ISD::FNSTCW16m";
+  case X86ISD::FNSTSW16r:
+    return "X86ISD::FNSTSW16r";
+  case X86ISD::LCMPXCHG_DAG:
+    return "X86ISD::LCMPXCHG_DAG";
+  case X86ISD::LCMPXCHG8_DAG:
+    return "X86ISD::LCMPXCHG8_DAG";
+  case X86ISD::LCMPXCHG16_DAG:
+    return "X86ISD::LCMPXCHG16_DAG";
   case X86ISD::LCMPXCHG8_SAVE_EBX_DAG:
     return "X86ISD::LCMPXCHG8_SAVE_EBX_DAG";
   case X86ISD::LCMPXCHG16_SAVE_RBX_DAG:
     return "X86ISD::LCMPXCHG16_SAVE_RBX_DAG";
-  case X86ISD::LADD:               return "X86ISD::LADD";
-  case X86ISD::LSUB:               return "X86ISD::LSUB";
-  case X86ISD::LOR:                return "X86ISD::LOR";
-  case X86ISD::LXOR:               return "X86ISD::LXOR";
-  case X86ISD::LAND:               return "X86ISD::LAND";
-  case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
-  case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
-  case X86ISD::VEXTRACT_STORE:     return "X86ISD::VEXTRACT_STORE";
-  case X86ISD::VTRUNC:             return "X86ISD::VTRUNC";
-  case X86ISD::VTRUNCS:            return "X86ISD::VTRUNCS";
-  case X86ISD::VTRUNCUS:           return "X86ISD::VTRUNCUS";
-  case X86ISD::VMTRUNC:            return "X86ISD::VMTRUNC";
-  case X86ISD::VMTRUNCS:           return "X86ISD::VMTRUNCS";
-  case X86ISD::VMTRUNCUS:          return "X86ISD::VMTRUNCUS";
-  case X86ISD::VTRUNCSTORES:       return "X86ISD::VTRUNCSTORES";
-  case X86ISD::VTRUNCSTOREUS:      return "X86ISD::VTRUNCSTOREUS";
-  case X86ISD::VMTRUNCSTORES:      return "X86ISD::VMTRUNCSTORES";
-  case X86ISD::VMTRUNCSTOREUS:     return "X86ISD::VMTRUNCSTOREUS";
-  case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
-  case X86ISD::VFPEXT_SAE:         return "X86ISD::VFPEXT_SAE";
-  case X86ISD::VFPEXTS:            return "X86ISD::VFPEXTS";
-  case X86ISD::VFPEXTS_SAE:        return "X86ISD::VFPEXTS_SAE";
-  case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
-  case X86ISD::VMFPROUND:          return "X86ISD::VMFPROUND";
-  case X86ISD::VFPROUND_RND:       return "X86ISD::VFPROUND_RND";
-  case X86ISD::VFPROUNDS:          return "X86ISD::VFPROUNDS";
-  case X86ISD::VFPROUNDS_RND:      return "X86ISD::VFPROUNDS_RND";
-  case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
-  case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
-  case X86ISD::VSHL:               return "X86ISD::VSHL";
-  case X86ISD::VSRL:               return "X86ISD::VSRL";
-  case X86ISD::VSRA:               return "X86ISD::VSRA";
-  case X86ISD::VSHLI:              return "X86ISD::VSHLI";
-  case X86ISD::VSRLI:              return "X86ISD::VSRLI";
-  case X86ISD::VSRAI:              return "X86ISD::VSRAI";
-  case X86ISD::VSHLV:              return "X86ISD::VSHLV";
-  case X86ISD::VSRLV:              return "X86ISD::VSRLV";
-  case X86ISD::VSRAV:              return "X86ISD::VSRAV";
-  case X86ISD::VROTLI:             return "X86ISD::VROTLI";
-  case X86ISD::VROTRI:             return "X86ISD::VROTRI";
-  case X86ISD::VPPERM:             return "X86ISD::VPPERM";
-  case X86ISD::CMPP:               return "X86ISD::CMPP";
-  case X86ISD::PCMPEQ:             return "X86ISD::PCMPEQ";
-  case X86ISD::PCMPGT:             return "X86ISD::PCMPGT";
-  case X86ISD::PHMINPOS:           return "X86ISD::PHMINPOS";
-  case X86ISD::ADD:                return "X86ISD::ADD";
-  case X86ISD::SUB:                return "X86ISD::SUB";
-  case X86ISD::ADC:                return "X86ISD::ADC";
-  case X86ISD::SBB:                return "X86ISD::SBB";
-  case X86ISD::SMUL:               return "X86ISD::SMUL";
-  case X86ISD::UMUL:               return "X86ISD::UMUL";
-  case X86ISD::OR:                 return "X86ISD::OR";
-  case X86ISD::XOR:                return "X86ISD::XOR";
-  case X86ISD::AND:                return "X86ISD::AND";
-  case X86ISD::BEXTR:              return "X86ISD::BEXTR";
-  case X86ISD::BZHI:               return "X86ISD::BZHI";
-  case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
-  case X86ISD::MOVMSK:             return "X86ISD::MOVMSK";
-  case X86ISD::PTEST:              return "X86ISD::PTEST";
-  case X86ISD::TESTP:              return "X86ISD::TESTP";
-  case X86ISD::KORTEST:            return "X86ISD::KORTEST";
-  case X86ISD::KTEST:              return "X86ISD::KTEST";
-  case X86ISD::KADD:               return "X86ISD::KADD";
-  case X86ISD::KSHIFTL:            return "X86ISD::KSHIFTL";
-  case X86ISD::KSHIFTR:            return "X86ISD::KSHIFTR";
-  case X86ISD::PACKSS:             return "X86ISD::PACKSS";
-  case X86ISD::PACKUS:             return "X86ISD::PACKUS";
-  case X86ISD::PALIGNR:            return "X86ISD::PALIGNR";
-  case X86ISD::VALIGN:             return "X86ISD::VALIGN";
-  case X86ISD::VSHLD:              return "X86ISD::VSHLD";
-  case X86ISD::VSHRD:              return "X86ISD::VSHRD";
-  case X86ISD::VSHLDV:             return "X86ISD::VSHLDV";
-  case X86ISD::VSHRDV:             return "X86ISD::VSHRDV";
-  case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
-  case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
-  case X86ISD::PSHUFLW:            return "X86ISD::PSHUFLW";
-  case X86ISD::SHUFP:              return "X86ISD::SHUFP";
-  case X86ISD::SHUF128:            return "X86ISD::SHUF128";
-  case X86ISD::MOVLHPS:            return "X86ISD::MOVLHPS";
-  case X86ISD::MOVHLPS:            return "X86ISD::MOVHLPS";
-  case X86ISD::MOVDDUP:            return "X86ISD::MOVDDUP";
-  case X86ISD::MOVSHDUP:           return "X86ISD::MOVSHDUP";
-  case X86ISD::MOVSLDUP:           return "X86ISD::MOVSLDUP";
-  case X86ISD::MOVSD:              return "X86ISD::MOVSD";
-  case X86ISD::MOVSS:              return "X86ISD::MOVSS";
-  case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";
-  case X86ISD::UNPCKH:             return "X86ISD::UNPCKH";
-  case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";
-  case X86ISD::VBROADCASTM:        return "X86ISD::VBROADCASTM";
-  case X86ISD::SUBV_BROADCAST:     return "X86ISD::SUBV_BROADCAST";
-  case X86ISD::VPERMILPV:          return "X86ISD::VPERMILPV";
-  case X86ISD::VPERMILPI:          return "X86ISD::VPERMILPI";
-  case X86ISD::VPERM2X128:         return "X86ISD::VPERM2X128";
-  case X86ISD::VPERMV:             return "X86ISD::VPERMV";
-  case X86ISD::VPERMV3:            return "X86ISD::VPERMV3";
-  case X86ISD::VPERMI:             return "X86ISD::VPERMI";
-  case X86ISD::VPTERNLOG:          return "X86ISD::VPTERNLOG";
-  case X86ISD::VFIXUPIMM:          return "X86ISD::VFIXUPIMM";
-  case X86ISD::VFIXUPIMM_SAE:      return "X86ISD::VFIXUPIMM_SAE";
-  case X86ISD::VFIXUPIMMS:         return "X86ISD::VFIXUPIMMS";
-  case X86ISD::VFIXUPIMMS_SAE:     return "X86ISD::VFIXUPIMMS_SAE";
-  case X86ISD::VRANGE:             return "X86ISD::VRANGE";
-  case X86ISD::VRANGE_SAE:         return "X86ISD::VRANGE_SAE";
-  case X86ISD::VRANGES:            return "X86ISD::VRANGES";
-  case X86ISD::VRANGES_SAE:        return "X86ISD::VRANGES_SAE";
-  case X86ISD::PMULUDQ:            return "X86ISD::PMULUDQ";
-  case X86ISD::PMULDQ:             return "X86ISD::PMULDQ";
-  case X86ISD::PSADBW:             return "X86ISD::PSADBW";
-  case X86ISD::DBPSADBW:           return "X86ISD::DBPSADBW";
-  case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
-  case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
-  case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
-  case X86ISD::MEMBARRIER:         return "X86ISD::MEMBARRIER";
-  case X86ISD::MFENCE:             return "X86ISD::MFENCE";
-  case X86ISD::SEG_ALLOCA:         return "X86ISD::SEG_ALLOCA";
-  case X86ISD::SAHF:               return "X86ISD::SAHF";
-  case X86ISD::RDRAND:             return "X86ISD::RDRAND";
-  case X86ISD::RDSEED:             return "X86ISD::RDSEED";
-  case X86ISD::RDPKRU:             return "X86ISD::RDPKRU";
-  case X86ISD::WRPKRU:             return "X86ISD::WRPKRU";
-  case X86ISD::VPMADDUBSW:         return "X86ISD::VPMADDUBSW";
-  case X86ISD::VPMADDWD:           return "X86ISD::VPMADDWD";
-  case X86ISD::VPSHA:              return "X86ISD::VPSHA";
-  case X86ISD::VPSHL:              return "X86ISD::VPSHL";
-  case X86ISD::VPCOM:              return "X86ISD::VPCOM";
-  case X86ISD::VPCOMU:             return "X86ISD::VPCOMU";
-  case X86ISD::VPERMIL2:           return "X86ISD::VPERMIL2";
-  case X86ISD::FMSUB:              return "X86ISD::FMSUB";
-  case X86ISD::FNMADD:             return "X86ISD::FNMADD";
-  case X86ISD::FNMSUB:             return "X86ISD::FNMSUB";
-  case X86ISD::FMADDSUB:           return "X86ISD::FMADDSUB";
-  case X86ISD::FMSUBADD:           return "X86ISD::FMSUBADD";
-  case X86ISD::FMADD_RND:          return "X86ISD::FMADD_RND";
-  case X86ISD::FNMADD_RND:         return "X86ISD::FNMADD_RND";
-  case X86ISD::FMSUB_RND:          return "X86ISD::FMSUB_RND";
-  case X86ISD::FNMSUB_RND:         return "X86ISD::FNMSUB_RND";
-  case X86ISD::FMADDSUB_RND:       return "X86ISD::FMADDSUB_RND";
-  case X86ISD::FMSUBADD_RND:       return "X86ISD::FMSUBADD_RND";
-  case X86ISD::VPMADD52H:          return "X86ISD::VPMADD52H";
-  case X86ISD::VPMADD52L:          return "X86ISD::VPMADD52L";
-  case X86ISD::VRNDSCALE:          return "X86ISD::VRNDSCALE";
-  case X86ISD::VRNDSCALE_SAE:      return "X86ISD::VRNDSCALE_SAE";
-  case X86ISD::VRNDSCALES:         return "X86ISD::VRNDSCALES";
-  case X86ISD::VRNDSCALES_SAE:     return "X86ISD::VRNDSCALES_SAE";
-  case X86ISD::VREDUCE:            return "X86ISD::VREDUCE";
-  case X86ISD::VREDUCE_SAE:        return "X86ISD::VREDUCE_SAE";
-  case X86ISD::VREDUCES:           return "X86ISD::VREDUCES";
-  case X86ISD::VREDUCES_SAE:       return "X86ISD::VREDUCES_SAE";
-  case X86ISD::VGETMANT:           return "X86ISD::VGETMANT";
-  case X86ISD::VGETMANT_SAE:       return "X86ISD::VGETMANT_SAE";
-  case X86ISD::VGETMANTS:          return "X86ISD::VGETMANTS";
-  case X86ISD::VGETMANTS_SAE:      return "X86ISD::VGETMANTS_SAE";
-  case X86ISD::PCMPESTR:           return "X86ISD::PCMPESTR";
-  case X86ISD::PCMPISTR:           return "X86ISD::PCMPISTR";
-  case X86ISD::XTEST:              return "X86ISD::XTEST";
-  case X86ISD::COMPRESS:           return "X86ISD::COMPRESS";
-  case X86ISD::EXPAND:             return "X86ISD::EXPAND";
-  case X86ISD::SELECTS:            return "X86ISD::SELECTS";
-  case X86ISD::ADDSUB:             return "X86ISD::ADDSUB";
-  case X86ISD::RCP14:              return "X86ISD::RCP14";
-  case X86ISD::RCP14S:             return "X86ISD::RCP14S";
-  case X86ISD::RCP28:              return "X86ISD::RCP28";
-  case X86ISD::RCP28_SAE:          return "X86ISD::RCP28_SAE";
-  case X86ISD::RCP28S:             return "X86ISD::RCP28S";
-  case X86ISD::RCP28S_SAE:         return "X86ISD::RCP28S_SAE";
-  case X86ISD::EXP2:               return "X86ISD::EXP2";
-  case X86ISD::EXP2_SAE:           return "X86ISD::EXP2_SAE";
-  case X86ISD::RSQRT14:            return "X86ISD::RSQRT14";
-  case X86ISD::RSQRT14S:           return "X86ISD::RSQRT14S";
-  case X86ISD::RSQRT28:            return "X86ISD::RSQRT28";
-  case X86ISD::RSQRT28_SAE:        return "X86ISD::RSQRT28_SAE";
-  case X86ISD::RSQRT28S:           return "X86ISD::RSQRT28S";
-  case X86ISD::RSQRT28S_SAE:       return "X86ISD::RSQRT28S_SAE";
-  case X86ISD::FADD_RND:           return "X86ISD::FADD_RND";
-  case X86ISD::FADDS:              return "X86ISD::FADDS";
-  case X86ISD::FADDS_RND:          return "X86ISD::FADDS_RND";
-  case X86ISD::FSUB_RND:           return "X86ISD::FSUB_RND";
-  case X86ISD::FSUBS:              return "X86ISD::FSUBS";
-  case X86ISD::FSUBS_RND:          return "X86ISD::FSUBS_RND";
-  case X86ISD::FMUL_RND:           return "X86ISD::FMUL_RND";
-  case X86ISD::FMULS:              return "X86ISD::FMULS";
-  case X86ISD::FMULS_RND:          return "X86ISD::FMULS_RND";
-  case X86ISD::FDIV_RND:           return "X86ISD::FDIV_RND";
-  case X86ISD::FDIVS:              return "X86ISD::FDIVS";
-  case X86ISD::FDIVS_RND:          return "X86ISD::FDIVS_RND";
-  case X86ISD::FSQRT_RND:          return "X86ISD::FSQRT_RND";
-  case X86ISD::FSQRTS:             return "X86ISD::FSQRTS";
-  case X86ISD::FSQRTS_RND:         return "X86ISD::FSQRTS_RND";
-  case X86ISD::FGETEXP:            return "X86ISD::FGETEXP";
-  case X86ISD::FGETEXP_SAE:        return "X86ISD::FGETEXP_SAE";
-  case X86ISD::FGETEXPS:           return "X86ISD::FGETEXPS";
-  case X86ISD::FGETEXPS_SAE:       return "X86ISD::FGETEXPS_SAE";
-  case X86ISD::SCALEF:             return "X86ISD::SCALEF";
-  case X86ISD::SCALEF_RND:         return "X86ISD::SCALEF_RND";
-  case X86ISD::SCALEFS:            return "X86ISD::SCALEFS";
-  case X86ISD::SCALEFS_RND:        return "X86ISD::SCALEFS_RND";
-  case X86ISD::AVG:                return "X86ISD::AVG";
-  case X86ISD::MULHRS:             return "X86ISD::MULHRS";
-  case X86ISD::SINT_TO_FP_RND:     return "X86ISD::SINT_TO_FP_RND";
-  case X86ISD::UINT_TO_FP_RND:     return "X86ISD::UINT_TO_FP_RND";
-  case X86ISD::CVTTP2SI:           return "X86ISD::CVTTP2SI";
-  case X86ISD::CVTTP2UI:           return "X86ISD::CVTTP2UI";
-  case X86ISD::MCVTTP2SI:          return "X86ISD::MCVTTP2SI";
-  case X86ISD::MCVTTP2UI:          return "X86ISD::MCVTTP2UI";
-  case X86ISD::CVTTP2SI_SAE:       return "X86ISD::CVTTP2SI_SAE";
-  case X86ISD::CVTTP2UI_SAE:       return "X86ISD::CVTTP2UI_SAE";
-  case X86ISD::CVTTS2SI:           return "X86ISD::CVTTS2SI";
-  case X86ISD::CVTTS2UI:           return "X86ISD::CVTTS2UI";
-  case X86ISD::CVTTS2SI_SAE:       return "X86ISD::CVTTS2SI_SAE";
-  case X86ISD::CVTTS2UI_SAE:       return "X86ISD::CVTTS2UI_SAE";
-  case X86ISD::CVTSI2P:            return "X86ISD::CVTSI2P";
-  case X86ISD::CVTUI2P:            return "X86ISD::CVTUI2P";
-  case X86ISD::MCVTSI2P:           return "X86ISD::MCVTSI2P";
-  case X86ISD::MCVTUI2P:           return "X86ISD::MCVTUI2P";
-  case X86ISD::VFPCLASS:           return "X86ISD::VFPCLASS";
-  case X86ISD::VFPCLASSS:          return "X86ISD::VFPCLASSS";
-  case X86ISD::MULTISHIFT:         return "X86ISD::MULTISHIFT";
-  case X86ISD::SCALAR_SINT_TO_FP:     return "X86ISD::SCALAR_SINT_TO_FP";
-  case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
-  case X86ISD::SCALAR_UINT_TO_FP:     return "X86ISD::SCALAR_UINT_TO_FP";
-  case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
-  case X86ISD::CVTPS2PH:           return "X86ISD::CVTPS2PH";
-  case X86ISD::MCVTPS2PH:          return "X86ISD::MCVTPS2PH";
-  case X86ISD::CVTPH2PS:           return "X86ISD::CVTPH2PS";
-  case X86ISD::CVTPH2PS_SAE:       return "X86ISD::CVTPH2PS_SAE";
-  case X86ISD::CVTP2SI:            return "X86ISD::CVTP2SI";
-  case X86ISD::CVTP2UI:            return "X86ISD::CVTP2UI";
-  case X86ISD::MCVTP2SI:           return "X86ISD::MCVTP2SI";
-  case X86ISD::MCVTP2UI:           return "X86ISD::MCVTP2UI";
-  case X86ISD::CVTP2SI_RND:        return "X86ISD::CVTP2SI_RND";
-  case X86ISD::CVTP2UI_RND:        return "X86ISD::CVTP2UI_RND";
-  case X86ISD::CVTS2SI:            return "X86ISD::CVTS2SI";
-  case X86ISD::CVTS2UI:            return "X86ISD::CVTS2UI";
-  case X86ISD::CVTS2SI_RND:        return "X86ISD::CVTS2SI_RND";
-  case X86ISD::CVTS2UI_RND:        return "X86ISD::CVTS2UI_RND";
-  case X86ISD::CVTNE2PS2BF16:      return "X86ISD::CVTNE2PS2BF16";
-  case X86ISD::CVTNEPS2BF16:       return "X86ISD::CVTNEPS2BF16";
-  case X86ISD::MCVTNEPS2BF16:      return "X86ISD::MCVTNEPS2BF16";
-  case X86ISD::DPBF16PS:           return "X86ISD::DPBF16PS";
-  case X86ISD::LWPINS:             return "X86ISD::LWPINS";
-  case X86ISD::MGATHER:            return "X86ISD::MGATHER";
-  case X86ISD::MSCATTER:           return "X86ISD::MSCATTER";
-  case X86ISD::VPDPBUSD:           return "X86ISD::VPDPBUSD";
-  case X86ISD::VPDPBUSDS:          return "X86ISD::VPDPBUSDS";
-  case X86ISD::VPDPWSSD:           return "X86ISD::VPDPWSSD";
-  case X86ISD::VPDPWSSDS:          return "X86ISD::VPDPWSSDS";
-  case X86ISD::VPSHUFBITQMB:       return "X86ISD::VPSHUFBITQMB";
-  case X86ISD::GF2P8MULB:          return "X86ISD::GF2P8MULB";
-  case X86ISD::GF2P8AFFINEQB:      return "X86ISD::GF2P8AFFINEQB";
-  case X86ISD::GF2P8AFFINEINVQB:   return "X86ISD::GF2P8AFFINEINVQB";
-  case X86ISD::NT_CALL:            return "X86ISD::NT_CALL";
-  case X86ISD::NT_BRIND:           return "X86ISD::NT_BRIND";
-  case X86ISD::UMWAIT:             return "X86ISD::UMWAIT";
-  case X86ISD::TPAUSE:             return "X86ISD::TPAUSE";
-  case X86ISD::ENQCMD:             return "X86ISD:ENQCMD";
-  case X86ISD::ENQCMDS:            return "X86ISD:ENQCMDS";
-  case X86ISD::VP2INTERSECT:       return "X86ISD::VP2INTERSECT";
+  case X86ISD::LADD:
+    return "X86ISD::LADD";
+  case X86ISD::LSUB:
+    return "X86ISD::LSUB";
+  case X86ISD::LOR:
+    return "X86ISD::LOR";
+  case X86ISD::LXOR:
+    return "X86ISD::LXOR";
+  case X86ISD::LAND:
+    return "X86ISD::LAND";
+  case X86ISD::VZEXT_MOVL:
+    return "X86ISD::VZEXT_MOVL";
+  case X86ISD::VZEXT_LOAD:
+    return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VEXTRACT_STORE:
+    return "X86ISD::VEXTRACT_STORE";
+  case X86ISD::VTRUNC:
+    return "X86ISD::VTRUNC";
+  case X86ISD::VTRUNCS:
+    return "X86ISD::VTRUNCS";
+  case X86ISD::VTRUNCUS:
+    return "X86ISD::VTRUNCUS";
+  case X86ISD::VMTRUNC:
+    return "X86ISD::VMTRUNC";
+  case X86ISD::VMTRUNCS:
+    return "X86ISD::VMTRUNCS";
+  case X86ISD::VMTRUNCUS:
+    return "X86ISD::VMTRUNCUS";
+  case X86ISD::VTRUNCSTORES:
+    return "X86ISD::VTRUNCSTORES";
+  case X86ISD::VTRUNCSTOREUS:
+    return "X86ISD::VTRUNCSTOREUS";
+  case X86ISD::VMTRUNCSTORES:
+    return "X86ISD::VMTRUNCSTORES";
+  case X86ISD::VMTRUNCSTOREUS:
+    return "X86ISD::VMTRUNCSTOREUS";
+  case X86ISD::VFPEXT:
+    return "X86ISD::VFPEXT";
+  case X86ISD::VFPEXT_SAE:
+    return "X86ISD::VFPEXT_SAE";
+  case X86ISD::VFPEXTS:
+    return "X86ISD::VFPEXTS";
+  case X86ISD::VFPEXTS_SAE:
+    return "X86ISD::VFPEXTS_SAE";
+  case X86ISD::VFPROUND:
+    return "X86ISD::VFPROUND";
+  case X86ISD::VMFPROUND:
+    return "X86ISD::VMFPROUND";
+  case X86ISD::VFPROUND_RND:
+    return "X86ISD::VFPROUND_RND";
+  case X86ISD::VFPROUNDS:
+    return "X86ISD::VFPROUNDS";
+  case X86ISD::VFPROUNDS_RND:
+    return "X86ISD::VFPROUNDS_RND";
+  case X86ISD::VSHLDQ:
+    return "X86ISD::VSHLDQ";
+  case X86ISD::VSRLDQ:
+    return "X86ISD::VSRLDQ";
+  case X86ISD::VSHL:
+    return "X86ISD::VSHL";
+  case X86ISD::VSRL:
+    return "X86ISD::VSRL";
+  case X86ISD::VSRA:
+    return "X86ISD::VSRA";
+  case X86ISD::VSHLI:
+    return "X86ISD::VSHLI";
+  case X86ISD::VSRLI:
+    return "X86ISD::VSRLI";
+  case X86ISD::VSRAI:
+    return "X86ISD::VSRAI";
+  case X86ISD::VSHLV:
+    return "X86ISD::VSHLV";
+  case X86ISD::VSRLV:
+    return "X86ISD::VSRLV";
+  case X86ISD::VSRAV:
+    return "X86ISD::VSRAV";
+  case X86ISD::VROTLI:
+    return "X86ISD::VROTLI";
+  case X86ISD::VROTRI:
+    return "X86ISD::VROTRI";
+  case X86ISD::VPPERM:
+    return "X86ISD::VPPERM";
+  case X86ISD::CMPP:
+    return "X86ISD::CMPP";
+  case X86ISD::PCMPEQ:
+    return "X86ISD::PCMPEQ";
+  case X86ISD::PCMPGT:
+    return "X86ISD::PCMPGT";
+  case X86ISD::PHMINPOS:
+    return "X86ISD::PHMINPOS";
+  case X86ISD::ADD:
+    return "X86ISD::ADD";
+  case X86ISD::SUB:
+    return "X86ISD::SUB";
+  case X86ISD::ADC:
+    return "X86ISD::ADC";
+  case X86ISD::SBB:
+    return "X86ISD::SBB";
+  case X86ISD::SMUL:
+    return "X86ISD::SMUL";
+  case X86ISD::UMUL:
+    return "X86ISD::UMUL";
+  case X86ISD::OR:
+    return "X86ISD::OR";
+  case X86ISD::XOR:
+    return "X86ISD::XOR";
+  case X86ISD::AND:
+    return "X86ISD::AND";
+  case X86ISD::BEXTR:
+    return "X86ISD::BEXTR";
+  case X86ISD::BZHI:
+    return "X86ISD::BZHI";
+  case X86ISD::MUL_IMM:
+    return "X86ISD::MUL_IMM";
+  case X86ISD::MOVMSK:
+    return "X86ISD::MOVMSK";
+  case X86ISD::PTEST:
+    return "X86ISD::PTEST";
+  case X86ISD::TESTP:
+    return "X86ISD::TESTP";
+  case X86ISD::KORTEST:
+    return "X86ISD::KORTEST";
+  case X86ISD::KTEST:
+    return "X86ISD::KTEST";
+  case X86ISD::KADD:
+    return "X86ISD::KADD";
+  case X86ISD::KSHIFTL:
+    return "X86ISD::KSHIFTL";
+  case X86ISD::KSHIFTR:
+    return "X86ISD::KSHIFTR";
+  case X86ISD::PACKSS:
+    return "X86ISD::PACKSS";
+  case X86ISD::PACKUS:
+    return "X86ISD::PACKUS";
+  case X86ISD::PALIGNR:
+    return "X86ISD::PALIGNR";
+  case X86ISD::VALIGN:
+    return "X86ISD::VALIGN";
+  case X86ISD::VSHLD:
+    return "X86ISD::VSHLD";
+  case X86ISD::VSHRD:
+    return "X86ISD::VSHRD";
+  case X86ISD::VSHLDV:
+    return "X86ISD::VSHLDV";
+  case X86ISD::VSHRDV:
+    return "X86ISD::VSHRDV";
+  case X86ISD::PSHUFD:
+    return "X86ISD::PSHUFD";
+  case X86ISD::PSHUFHW:
+    return "X86ISD::PSHUFHW";
+  case X86ISD::PSHUFLW:
+    return "X86ISD::PSHUFLW";
+  case X86ISD::SHUFP:
+    return "X86ISD::SHUFP";
+  case X86ISD::SHUF128:
+    return "X86ISD::SHUF128";
+  case X86ISD::MOVLHPS:
+    return "X86ISD::MOVLHPS";
+  case X86ISD::MOVHLPS:
+    return "X86ISD::MOVHLPS";
+  case X86ISD::MOVDDUP:
+    return "X86ISD::MOVDDUP";
+  case X86ISD::MOVSHDUP:
+    return "X86ISD::MOVSHDUP";
+  case X86ISD::MOVSLDUP:
+    return "X86ISD::MOVSLDUP";
+  case X86ISD::MOVSD:
+    return "X86ISD::MOVSD";
+  case X86ISD::MOVSS:
+    return "X86ISD::MOVSS";
+  case X86ISD::UNPCKL:
+    return "X86ISD::UNPCKL";
+  case X86ISD::UNPCKH:
+    return "X86ISD::UNPCKH";
+  case X86ISD::VBROADCAST:
+    return "X86ISD::VBROADCAST";
+  case X86ISD::VBROADCASTM:
+    return "X86ISD::VBROADCASTM";
+  case X86ISD::SUBV_BROADCAST:
+    return "X86ISD::SUBV_BROADCAST";
+  case X86ISD::VPERMILPV:
+    return "X86ISD::VPERMILPV";
+  case X86ISD::VPERMILPI:
+    return "X86ISD::VPERMILPI";
+  case X86ISD::VPERM2X128:
+    return "X86ISD::VPERM2X128";
+  case X86ISD::VPERMV:
+    return "X86ISD::VPERMV";
+  case X86ISD::VPERMV3:
+    return "X86ISD::VPERMV3";
+  case X86ISD::VPERMI:
+    return "X86ISD::VPERMI";
+  case X86ISD::VPTERNLOG:
+    return "X86ISD::VPTERNLOG";
+  case X86ISD::VFIXUPIMM:
+    return "X86ISD::VFIXUPIMM";
+  case X86ISD::VFIXUPIMM_SAE:
+    return "X86ISD::VFIXUPIMM_SAE";
+  case X86ISD::VFIXUPIMMS:
+    return "X86ISD::VFIXUPIMMS";
+  case X86ISD::VFIXUPIMMS_SAE:
+    return "X86ISD::VFIXUPIMMS_SAE";
+  case X86ISD::VRANGE:
+    return "X86ISD::VRANGE";
+  case X86ISD::VRANGE_SAE:
+    return "X86ISD::VRANGE_SAE";
+  case X86ISD::VRANGES:
+    return "X86ISD::VRANGES";
+  case X86ISD::VRANGES_SAE:
+    return "X86ISD::VRANGES_SAE";
+  case X86ISD::PMULUDQ:
+    return "X86ISD::PMULUDQ";
+  case X86ISD::PMULDQ:
+    return "X86ISD::PMULDQ";
+  case X86ISD::PSADBW:
+    return "X86ISD::PSADBW";
+  case X86ISD::DBPSADBW:
+    return "X86ISD::DBPSADBW";
+  case X86ISD::VASTART_SAVE_XMM_REGS:
+    return "X86ISD::VASTART_SAVE_XMM_REGS";
+  case X86ISD::VAARG_64:
+    return "X86ISD::VAARG_64";
+  case X86ISD::WIN_ALLOCA:
+    return "X86ISD::WIN_ALLOCA";
+  case X86ISD::MEMBARRIER:
+    return "X86ISD::MEMBARRIER";
+  case X86ISD::MFENCE:
+    return "X86ISD::MFENCE";
+  case X86ISD::SEG_ALLOCA:
+    return "X86ISD::SEG_ALLOCA";
+  case X86ISD::SAHF:
+    return "X86ISD::SAHF";
+  case X86ISD::RDRAND:
+    return "X86ISD::RDRAND";
+  case X86ISD::RDSEED:
+    return "X86ISD::RDSEED";
+  case X86ISD::RDPKRU:
+    return "X86ISD::RDPKRU";
+  case X86ISD::WRPKRU:
+    return "X86ISD::WRPKRU";
+  case X86ISD::VPMADDUBSW:
+    return "X86ISD::VPMADDUBSW";
+  case X86ISD::VPMADDWD:
+    return "X86ISD::VPMADDWD";
+  case X86ISD::VPSHA:
+    return "X86ISD::VPSHA";
+  case X86ISD::VPSHL:
+    return "X86ISD::VPSHL";
+  case X86ISD::VPCOM:
+    return "X86ISD::VPCOM";
+  case X86ISD::VPCOMU:
+    return "X86ISD::VPCOMU";
+  case X86ISD::VPERMIL2:
+    return "X86ISD::VPERMIL2";
+  case X86ISD::FMSUB:
+    return "X86ISD::FMSUB";
+  case X86ISD::FNMADD:
+    return "X86ISD::FNMADD";
+  case X86ISD::FNMSUB:
+    return "X86ISD::FNMSUB";
+  case X86ISD::FMADDSUB:
+    return "X86ISD::FMADDSUB";
+  case X86ISD::FMSUBADD:
+    return "X86ISD::FMSUBADD";
+  case X86ISD::FMADD_RND:
+    return "X86ISD::FMADD_RND";
+  case X86ISD::FNMADD_RND:
+    return "X86ISD::FNMADD_RND";
+  case X86ISD::FMSUB_RND:
+    return "X86ISD::FMSUB_RND";
+  case X86ISD::FNMSUB_RND:
+    return "X86ISD::FNMSUB_RND";
+  case X86ISD::FMADDSUB_RND:
+    return "X86ISD::FMADDSUB_RND";
+  case X86ISD::FMSUBADD_RND:
+    return "X86ISD::FMSUBADD_RND";
+  case X86ISD::VPMADD52H:
+    return "X86ISD::VPMADD52H";
+  case X86ISD::VPMADD52L:
+    return "X86ISD::VPMADD52L";
+  case X86ISD::VRNDSCALE:
+    return "X86ISD::VRNDSCALE";
+  case X86ISD::VRNDSCALE_SAE:
+    return "X86ISD::VRNDSCALE_SAE";
+  case X86ISD::VRNDSCALES:
+    return "X86ISD::VRNDSCALES";
+  case X86ISD::VRNDSCALES_SAE:
+    return "X86ISD::VRNDSCALES_SAE";
+  case X86ISD::VREDUCE:
+    return "X86ISD::VREDUCE";
+  case X86ISD::VREDUCE_SAE:
+    return "X86ISD::VREDUCE_SAE";
+  case X86ISD::VREDUCES:
+    return "X86ISD::VREDUCES";
+  case X86ISD::VREDUCES_SAE:
+    return "X86ISD::VREDUCES_SAE";
+  case X86ISD::VGETMANT:
+    return "X86ISD::VGETMANT";
+  case X86ISD::VGETMANT_SAE:
+    return "X86ISD::VGETMANT_SAE";
+  case X86ISD::VGETMANTS:
+    return "X86ISD::VGETMANTS";
+  case X86ISD::VGETMANTS_SAE:
+    return "X86ISD::VGETMANTS_SAE";
+  case X86ISD::PCMPESTR:
+    return "X86ISD::PCMPESTR";
+  case X86ISD::PCMPISTR:
+    return "X86ISD::PCMPISTR";
+  case X86ISD::XTEST:
+    return "X86ISD::XTEST";
+  case X86ISD::COMPRESS:
+    return "X86ISD::COMPRESS";
+  case X86ISD::EXPAND:
+    return "X86ISD::EXPAND";
+  case X86ISD::SELECTS:
+    return "X86ISD::SELECTS";
+  case X86ISD::ADDSUB:
+    return "X86ISD::ADDSUB";
+  case X86ISD::RCP14:
+    return "X86ISD::RCP14";
+  case X86ISD::RCP14S:
+    return "X86ISD::RCP14S";
+  case X86ISD::RCP28:
+    return "X86ISD::RCP28";
+  case X86ISD::RCP28_SAE:
+    return "X86ISD::RCP28_SAE";
+  case X86ISD::RCP28S:
+    return "X86ISD::RCP28S";
+  case X86ISD::RCP28S_SAE:
+    return "X86ISD::RCP28S_SAE";
+  case X86ISD::EXP2:
+    return "X86ISD::EXP2";
+  case X86ISD::EXP2_SAE:
+    return "X86ISD::EXP2_SAE";
+  case X86ISD::RSQRT14:
+    return "X86ISD::RSQRT14";
+  case X86ISD::RSQRT14S:
+    return "X86ISD::RSQRT14S";
+  case X86ISD::RSQRT28:
+    return "X86ISD::RSQRT28";
+  case X86ISD::RSQRT28_SAE:
+    return "X86ISD::RSQRT28_SAE";
+  case X86ISD::RSQRT28S:
+    return "X86ISD::RSQRT28S";
+  case X86ISD::RSQRT28S_SAE:
+    return "X86ISD::RSQRT28S_SAE";
+  case X86ISD::FADD_RND:
+    return "X86ISD::FADD_RND";
+  case X86ISD::FADDS:
+    return "X86ISD::FADDS";
+  case X86ISD::FADDS_RND:
+    return "X86ISD::FADDS_RND";
+  case X86ISD::FSUB_RND:
+    return "X86ISD::FSUB_RND";
+  case X86ISD::FSUBS:
+    return "X86ISD::FSUBS";
+  case X86ISD::FSUBS_RND:
+    return "X86ISD::FSUBS_RND";
+  case X86ISD::FMUL_RND:
+    return "X86ISD::FMUL_RND";
+  case X86ISD::FMULS:
+    return "X86ISD::FMULS";
+  case X86ISD::FMULS_RND:
+    return "X86ISD::FMULS_RND";
+  case X86ISD::FDIV_RND:
+    return "X86ISD::FDIV_RND";
+  case X86ISD::FDIVS:
+    return "X86ISD::FDIVS";
+  case X86ISD::FDIVS_RND:
+    return "X86ISD::FDIVS_RND";
+  case X86ISD::FSQRT_RND:
+    return "X86ISD::FSQRT_RND";
+  case X86ISD::FSQRTS:
+    return "X86ISD::FSQRTS";
+  case X86ISD::FSQRTS_RND:
+    return "X86ISD::FSQRTS_RND";
+  case X86ISD::FGETEXP:
+    return "X86ISD::FGETEXP";
+  case X86ISD::FGETEXP_SAE:
+    return "X86ISD::FGETEXP_SAE";
+  case X86ISD::FGETEXPS:
+    return "X86ISD::FGETEXPS";
+  case X86ISD::FGETEXPS_SAE:
+    return "X86ISD::FGETEXPS_SAE";
+  case X86ISD::SCALEF:
+    return "X86ISD::SCALEF";
+  case X86ISD::SCALEF_RND:
+    return "X86ISD::SCALEF_RND";
+  case X86ISD::SCALEFS:
+    return "X86ISD::SCALEFS";
+  case X86ISD::SCALEFS_RND:
+    return "X86ISD::SCALEFS_RND";
+  case X86ISD::AVG:
+    return "X86ISD::AVG";
+  case X86ISD::MULHRS:
+    return "X86ISD::MULHRS";
+  case X86ISD::SINT_TO_FP_RND:
+    return "X86ISD::SINT_TO_FP_RND";
+  case X86ISD::UINT_TO_FP_RND:
+    return "X86ISD::UINT_TO_FP_RND";
+  case X86ISD::CVTTP2SI:
+    return "X86ISD::CVTTP2SI";
+  case X86ISD::CVTTP2UI:
+    return "X86ISD::CVTTP2UI";
+  case X86ISD::MCVTTP2SI:
+    return "X86ISD::MCVTTP2SI";
+  case X86ISD::MCVTTP2UI:
+    return "X86ISD::MCVTTP2UI";
+  case X86ISD::CVTTP2SI_SAE:
+    return "X86ISD::CVTTP2SI_SAE";
+  case X86ISD::CVTTP2UI_SAE:
+    return "X86ISD::CVTTP2UI_SAE";
+  case X86ISD::CVTTS2SI:
+    return "X86ISD::CVTTS2SI";
+  case X86ISD::CVTTS2UI:
+    return "X86ISD::CVTTS2UI";
+  case X86ISD::CVTTS2SI_SAE:
+    return "X86ISD::CVTTS2SI_SAE";
+  case X86ISD::CVTTS2UI_SAE:
+    return "X86ISD::CVTTS2UI_SAE";
+  case X86ISD::CVTSI2P:
+    return "X86ISD::CVTSI2P";
+  case X86ISD::CVTUI2P:
+    return "X86ISD::CVTUI2P";
+  case X86ISD::MCVTSI2P:
+    return "X86ISD::MCVTSI2P";
+  case X86ISD::MCVTUI2P:
+    return "X86ISD::MCVTUI2P";
+  case X86ISD::VFPCLASS:
+    return "X86ISD::VFPCLASS";
+  case X86ISD::VFPCLASSS:
+    return "X86ISD::VFPCLASSS";
+  case X86ISD::MULTISHIFT:
+    return "X86ISD::MULTISHIFT";
+  case X86ISD::SCALAR_SINT_TO_FP:
+    return "X86ISD::SCALAR_SINT_TO_FP";
+  case X86ISD::SCALAR_SINT_TO_FP_RND:
+    return "X86ISD::SCALAR_SINT_TO_FP_RND";
+  case X86ISD::SCALAR_UINT_TO_FP:
+    return "X86ISD::SCALAR_UINT_TO_FP";
+  case X86ISD::SCALAR_UINT_TO_FP_RND:
+    return "X86ISD::SCALAR_UINT_TO_FP_RND";
+  case X86ISD::CVTPS2PH:
+    return "X86ISD::CVTPS2PH";
+  case X86ISD::MCVTPS2PH:
+    return "X86ISD::MCVTPS2PH";
+  case X86ISD::CVTPH2PS:
+    return "X86ISD::CVTPH2PS";
+  case X86ISD::CVTPH2PS_SAE:
+    return "X86ISD::CVTPH2PS_SAE";
+  case X86ISD::CVTP2SI:
+    return "X86ISD::CVTP2SI";
+  case X86ISD::CVTP2UI:
+    return "X86ISD::CVTP2UI";
+  case X86ISD::MCVTP2SI:
+    return "X86ISD::MCVTP2SI";
+  case X86ISD::MCVTP2UI:
+    return "X86ISD::MCVTP2UI";
+  case X86ISD::CVTP2SI_RND:
+    return "X86ISD::CVTP2SI_RND";
+  case X86ISD::CVTP2UI_RND:
+    return "X86ISD::CVTP2UI_RND";
+  case X86ISD::CVTS2SI:
+    return "X86ISD::CVTS2SI";
+  case X86ISD::CVTS2UI:
+    return "X86ISD::CVTS2UI";
+  case X86ISD::CVTS2SI_RND:
+    return "X86ISD::CVTS2SI_RND";
+  case X86ISD::CVTS2UI_RND:
+    return "X86ISD::CVTS2UI_RND";
+  case X86ISD::CVTNE2PS2BF16:
+    return "X86ISD::CVTNE2PS2BF16";
+  case X86ISD::CVTNEPS2BF16:
+    return "X86ISD::CVTNEPS2BF16";
+  case X86ISD::MCVTNEPS2BF16:
+    return "X86ISD::MCVTNEPS2BF16";
+  case X86ISD::DPBF16PS:
+    return "X86ISD::DPBF16PS";
+  case X86ISD::LWPINS:
+    return "X86ISD::LWPINS";
+  case X86ISD::MGATHER:
+    return "X86ISD::MGATHER";
+  case X86ISD::MSCATTER:
+    return "X86ISD::MSCATTER";
+  case X86ISD::VPDPBUSD:
+    return "X86ISD::VPDPBUSD";
+  case X86ISD::VPDPBUSDS:
+    return "X86ISD::VPDPBUSDS";
+  case X86ISD::VPDPWSSD:
+    return "X86ISD::VPDPWSSD";
+  case X86ISD::VPDPWSSDS:
+    return "X86ISD::VPDPWSSDS";
+  case X86ISD::VPSHUFBITQMB:
+    return "X86ISD::VPSHUFBITQMB";
+  case X86ISD::GF2P8MULB:
+    return "X86ISD::GF2P8MULB";
+  case X86ISD::GF2P8AFFINEQB:
+    return "X86ISD::GF2P8AFFINEQB";
+  case X86ISD::GF2P8AFFINEINVQB:
+    return "X86ISD::GF2P8AFFINEINVQB";
+  case X86ISD::NT_CALL:
+    return "X86ISD::NT_CALL";
+  case X86ISD::NT_BRIND:
+    return "X86ISD::NT_BRIND";
+  case X86ISD::UMWAIT:
+    return "X86ISD::UMWAIT";
+  case X86ISD::TPAUSE:
+    return "X86ISD::TPAUSE";
+  case X86ISD::ENQCMD:
+    return "X86ISD:ENQCMD";
+  case X86ISD::ENQCMDS:
+    return "X86ISD:ENQCMDS";
+  case X86ISD::VP2INTERSECT:
+    return "X86ISD::VP2INTERSECT";
   }
   return nullptr;
 }
@@ -28404,7 +28925,7 @@
     if (AM.HasBaseReg)
       return false;
     break;
-  default:  // Other stuff never works.
+  default: // Other stuff never works.
     return false;
   }
 
@@ -28431,7 +28952,7 @@
 
   // AVX512BW has shifts such as vpsllvw.
   if (Subtarget.hasBWI() && Bits == 16)
-      return false;
+    return false;
 
   // Otherwise, it's significantly cheaper to shift by a scalar amount than by a
   // fully general vector.
@@ -28526,12 +29047,13 @@
   if (Val.getOpcode() != ISD::LOAD)
     return false;
 
-  if (!VT1.isSimple() || !VT1.isInteger() ||
-      !VT2.isSimple() || !VT2.isInteger())
+  if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() ||
+      !VT2.isInteger())
     return false;
 
   switch (VT1.getSimpleVT().SimpleTy) {
-  default: break;
+  default:
+    break;
   case MVT::i8:
   case MVT::i16:
   case MVT::i32:
@@ -28552,8 +29074,7 @@
   return true;
 }
 
-bool
-X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
   if (!Subtarget.hasAnyFMA())
     return false;
 
@@ -28693,15 +29214,15 @@
   // sinkMBB:
   //  DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
   BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg)
-      .addReg(mainDstReg).addMBB(mainMBB)
-      .addReg(fallDstReg).addMBB(fallMBB);
+      .addReg(mainDstReg)
+      .addMBB(mainMBB)
+      .addReg(fallDstReg)
+      .addMBB(fallMBB);
 
   MI.eraseFromParent();
   return sinkMBB;
 }
 
-
-
 MachineBasicBlock *
 X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
                                                  MachineBasicBlock *MBB) const {
@@ -28754,8 +29275,8 @@
   unsigned TotalNumXMMRegs = 8;
   bool UseGPOffset = (ArgMode == 1);
   bool UseFPOffset = (ArgMode == 2);
-  unsigned MaxOffset = TotalNumIntRegs * 8 +
-                       (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+  unsigned MaxOffset =
+      TotalNumIntRegs * 8 + (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
 
   /* Align ArgSize to a multiple of 8 */
   unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
@@ -28766,8 +29287,8 @@
   MachineBasicBlock *offsetMBB;
   MachineBasicBlock *endMBB;
 
-  unsigned OffsetDestReg = 0;    // Argument address computed by offsetMBB
-  unsigned OverflowDestReg = 0;  // Argument address computed by overflowMBB
+  unsigned OffsetDestReg = 0;   // Argument address computed by offsetMBB
+  unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
   unsigned OffsetReg = 0;
 
   if (!UseGPOffset && !UseFPOffset) {
@@ -28834,13 +29355,14 @@
 
     // Check if there is enough room left to pull this argument.
     BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
-      .addReg(OffsetReg)
-      .addImm(MaxOffset + 8 - ArgSizeA8);
+        .addReg(OffsetReg)
+        .addImm(MaxOffset + 8 - ArgSizeA8);
 
     // Branch to "overflowMBB" if offset >= max
     // Fall through to "offsetMBB" otherwise
     BuildMI(thisMBB, DL, TII->get(X86::JCC_1))
-      .addMBB(overflowMBB).addImm(X86::COND_AE);
+        .addMBB(overflowMBB)
+        .addImm(X86::COND_AE);
   }
 
   // In offsetMBB, emit code to use the reg_save_area.
@@ -28859,21 +29381,21 @@
 
     // Zero-extend the offset
     unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
-      BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+    BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
         .addImm(0)
         .addReg(OffsetReg)
         .addImm(X86::sub_32bit);
 
     // Add the offset to the reg_save_area to get the final address.
     BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
-      .addReg(OffsetReg64)
-      .addReg(RegSaveReg);
+        .addReg(OffsetReg64)
+        .addReg(RegSaveReg);
 
     // Compute the offset for the next argument
     unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
     BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
-      .addReg(OffsetReg)
-      .addImm(UseFPOffset ? 16 : 8);
+        .addReg(OffsetReg)
+        .addImm(UseFPOffset ? 16 : 8);
 
     // Store it back into the va_list.
     BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
@@ -28886,8 +29408,7 @@
         .setMemRefs(MMOs);
 
     // Jump to endMBB
-    BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
-      .addMBB(endMBB);
+    BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)).addMBB(endMBB);
   }
 
   //
@@ -28913,23 +29434,23 @@
 
     // aligned_addr = (addr + (align-1)) & ~(align-1)
     BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
-      .addReg(OverflowAddrReg)
-      .addImm(Align-1);
+        .addReg(OverflowAddrReg)
+        .addImm(Align - 1);
 
     BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
-      .addReg(TmpReg)
-      .addImm(~(uint64_t)(Align-1));
+        .addReg(TmpReg)
+        .addImm(~(uint64_t)(Align - 1));
   } else {
     BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
-      .addReg(OverflowAddrReg);
+        .addReg(OverflowAddrReg);
   }
 
   // Compute the next overflow address after this argument.
   // (the overflow address should be kept 8-byte aligned)
   unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
   BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
-    .addReg(OverflowDestReg)
-    .addImm(ArgSizeA8);
+      .addReg(OverflowDestReg)
+      .addImm(ArgSizeA8);
 
   // Store the new overflow address.
   BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
@@ -28943,10 +29464,11 @@
 
   // If we branched, emit the PHI to the front of endMBB.
   if (offsetMBB) {
-    BuildMI(*endMBB, endMBB->begin(), DL,
-            TII->get(X86::PHI), DestReg)
-      .addReg(OffsetDestReg).addMBB(offsetMBB)
-      .addReg(OverflowDestReg).addMBB(overflowMBB);
+    BuildMI(*endMBB, endMBB->begin(), DL, TII->get(X86::PHI), DestReg)
+        .addReg(OffsetDestReg)
+        .addMBB(offsetMBB)
+        .addReg(OverflowDestReg)
+        .addMBB(overflowMBB);
   }
 
   // Erase the pseudo instruction
@@ -29036,12 +29558,12 @@
 // kill marker, and set it if it should. Returns the correct kill
 // marker value.
 static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
-                                     MachineBasicBlock* BB,
-                                     const TargetRegisterInfo* TRI) {
+                                     MachineBasicBlock *BB,
+                                     const TargetRegisterInfo *TRI) {
   // Scan forward through BB for a use/def of EFLAGS.
   MachineBasicBlock::iterator miI(std::next(SelectItr));
   for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
-    const MachineInstr& mi = *miI;
+    const MachineInstr &mi = *miI;
     if (mi.readsRegister(X86::EFLAGS))
       return false;
     if (mi.definesRegister(X86::EFLAGS))
@@ -29054,7 +29576,7 @@
     for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
                                           sEnd = BB->succ_end();
          sItr != sEnd; ++sItr) {
-      MachineBasicBlock* succ = *sItr;
+      MachineBasicBlock *succ = *sItr;
       if (succ->isLiveIn(X86::EFLAGS))
         return false;
     }
@@ -29281,7 +29803,9 @@
 
   X86::CondCode SecondCC =
       X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm());
-  BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC);
+  BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1))
+      .addMBB(SinkMBB)
+      .addImm(SecondCC);
 
   //  SinkMBB:
   //   %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ]
@@ -29514,56 +30038,61 @@
   // Add code to the main basic block to check if the stack limit has been hit,
   // and if so, jump to mallocMBB otherwise to bumpMBB.
   BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
-  BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
-    .addReg(tmpSPVReg).addReg(sizeVReg);
-  BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
-    .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
-    .addReg(SPLimitVReg);
+  BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr : X86::SUB32rr), SPLimitVReg)
+      .addReg(tmpSPVReg)
+      .addReg(sizeVReg);
+  BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr : X86::CMP32mr))
+      .addReg(0)
+      .addImm(1)
+      .addReg(0)
+      .addImm(TlsOffset)
+      .addReg(TlsReg)
+      .addReg(SPLimitVReg);
   BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
 
   // bumpMBB simply decreases the stack pointer, since we know the current
   // stacklet has enough space.
   BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
-    .addReg(SPLimitVReg);
+      .addReg(SPLimitVReg);
   BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
-    .addReg(SPLimitVReg);
+      .addReg(SPLimitVReg);
   BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
 
   // Calls into a routine in libgcc to allocate more space from the heap.
   const uint32_t *RegMask =
       Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
   if (IsLP64) {
-    BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
-      .addReg(sizeVReg);
+    BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI).addReg(sizeVReg);
     BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
-      .addExternalSymbol("__morestack_allocate_stack_space")
-      .addRegMask(RegMask)
-      .addReg(X86::RDI, RegState::Implicit)
-      .addReg(X86::RAX, RegState::ImplicitDefine);
+        .addExternalSymbol("__morestack_allocate_stack_space")
+        .addRegMask(RegMask)
+        .addReg(X86::RDI, RegState::Implicit)
+        .addReg(X86::RAX, RegState::ImplicitDefine);
   } else if (Is64Bit) {
-    BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
-      .addReg(sizeVReg);
+    BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI).addReg(sizeVReg);
     BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
-      .addExternalSymbol("__morestack_allocate_stack_space")
-      .addRegMask(RegMask)
-      .addReg(X86::EDI, RegState::Implicit)
-      .addReg(X86::EAX, RegState::ImplicitDefine);
+        .addExternalSymbol("__morestack_allocate_stack_space")
+        .addRegMask(RegMask)
+        .addReg(X86::EDI, RegState::Implicit)
+        .addReg(X86::EAX, RegState::ImplicitDefine);
   } else {
-    BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
-      .addImm(12);
+    BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg)
+        .addReg(physSPReg)
+        .addImm(12);
     BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
     BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
-      .addExternalSymbol("__morestack_allocate_stack_space")
-      .addRegMask(RegMask)
-      .addReg(X86::EAX, RegState::ImplicitDefine);
+        .addExternalSymbol("__morestack_allocate_stack_space")
+        .addRegMask(RegMask)
+        .addReg(X86::EAX, RegState::ImplicitDefine);
   }
 
   if (!Is64Bit)
-    BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
-      .addImm(16);
+    BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg)
+        .addReg(physSPReg)
+        .addImm(16);
 
   BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
-    .addReg(IsLP64 ? X86::RAX : X86::EAX);
+      .addReg(IsLP64 ? X86::RAX : X86::EAX);
   BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
 
   // Set up the CFG correctly.
@@ -29650,7 +30179,7 @@
   // Emit CALLSEQ_START right before the instruction.
   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   MachineInstrBuilder CallseqStart =
-    BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
+      BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
   BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
 
   // Emit CALLSEQ_END right after the instruction.
@@ -29658,7 +30187,7 @@
   // original instruction around.
   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   MachineInstrBuilder CallseqEnd =
-    BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
+      BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
   BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
 
   return BB;
@@ -29682,9 +30211,10 @@
   // FIXME: The 32-bit calls have non-standard calling conventions. Use a
   // proper register mask.
   const uint32_t *RegMask =
-      Subtarget.is64Bit() ?
-      Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
-      Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
+      Subtarget.is64Bit()
+          ? Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask()
+          : Subtarget.getRegisterInfo()->getCallPreservedMask(*F,
+                                                              CallingConv::C);
   if (Subtarget.is64Bit()) {
     MachineInstrBuilder MIB =
         BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI)
@@ -29936,8 +30466,7 @@
   MemOpndSlot = CurOp;
 
   MVT PVT = getPointerTy(MF->getDataLayout());
-  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
-         "Invalid Pointer Size!");
+  assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
 
   // For v = setjmp(buf), we generate
   //
@@ -29985,19 +30514,19 @@
     LabelReg = MRI.createVirtualRegister(PtrRC);
     if (Subtarget.is64Bit()) {
       MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
-              .addReg(X86::RIP)
-              .addImm(0)
-              .addReg(0)
-              .addMBB(restoreMBB)
-              .addReg(0);
+                .addReg(X86::RIP)
+                .addImm(0)
+                .addReg(0)
+                .addMBB(restoreMBB)
+                .addReg(0);
     } else {
-      const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+      const X86InstrInfo *XII = static_cast<const X86InstrInfo *>(TII);
       MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
-              .addReg(XII->getGlobalBaseReg(MF))
-              .addImm(0)
-              .addReg(0)
-              .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
-              .addReg(0);
+                .addReg(XII->getGlobalBaseReg(MF))
+                .addImm(0)
+                .addReg(0)
+                .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
+                .addReg(0);
     }
   } else
     PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
@@ -30021,7 +30550,7 @@
 
   // Setup
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
-          .addMBB(restoreMBB);
+            .addMBB(restoreMBB);
 
   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   MIB.addRegMask(RegInfo->getNoPreservedMask());
@@ -30034,10 +30563,11 @@
   mainMBB->addSuccessor(sinkMBB);
 
   // sinkMBB:
-  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
-          TII->get(X86::PHI), DstReg)
-    .addReg(mainDstReg).addMBB(mainMBB)
-    .addReg(restoreDstReg).addMBB(restoreMBB);
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg)
+      .addReg(mainDstReg)
+      .addMBB(mainMBB)
+      .addReg(restoreDstReg)
+      .addMBB(restoreMBB);
 
   // restoreMBB:
   if (RegInfo->hasBasePointer(*MF)) {
@@ -30048,9 +30578,9 @@
     unsigned FramePtr = RegInfo->getFrameRegister(*MF);
     unsigned BasePtr = RegInfo->getBaseRegister();
     unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
-    addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
-                 FramePtr, true, X86FI->getRestoreBasePointerOffset())
-      .setMIFlag(MachineInstr::FrameSetup);
+    addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), FramePtr,
+                 true, X86FI->getRestoreBasePointerOffset())
+        .setMIFlag(MachineInstr::FrameSetup);
   }
   BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
   BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
@@ -30146,7 +30676,9 @@
   BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
       .addReg(SSPCopyReg)
       .addReg(SSPCopyReg);
-  BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
+  BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1))
+      .addMBB(sinkMBB)
+      .addImm(X86::COND_E);
   checkSspMBB->addSuccessor(sinkMBB);
   checkSspMBB->addSuccessor(fallMBB);
 
@@ -30176,7 +30708,9 @@
       .addReg(SSPCopyReg);
 
   // Jump to sink in case PrevSSPReg <= SSPCopyReg.
-  BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE);
+  BuildMI(fallMBB, DL, TII->get(X86::JCC_1))
+      .addMBB(sinkMBB)
+      .addImm(X86::COND_BE);
   fallMBB->addSuccessor(sinkMBB);
   fallMBB->addSuccessor(fixShadowMBB);
 
@@ -30199,7 +30733,9 @@
       .addImm(8);
 
   // Jump if the result of the shift is zero.
-  BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
+  BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1))
+      .addMBB(sinkMBB)
+      .addImm(X86::COND_E);
   fixShadowMBB->addSuccessor(sinkMBB);
   fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);
 
@@ -30234,7 +30770,9 @@
   BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);
 
   // Jump if the counter is not zero yet.
-  BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE);
+  BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1))
+      .addMBB(fixShadowLoopMBB)
+      .addImm(X86::COND_NE);
   fixShadowLoopMBB->addSuccessor(sinkMBB);
   fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);
 
@@ -30254,11 +30792,10 @@
                                            MI.memoperands_end());
 
   MVT PVT = getPointerTy(MF->getDataLayout());
-  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
-         "Invalid Pointer Size!");
+  assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
 
   const TargetRegisterClass *RC =
-    (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
+      (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
   unsigned Tmp = MRI.createVirtualRegister(RC);
   // Since FP is only updated here but NOT referenced, it's treated as GPR.
   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -30479,7 +31016,9 @@
   BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
       .addReg(IReg)
       .addImm(LPadList.size());
-  BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE);
+  BuildMI(DispatchBB, DL, TII->get(X86::JCC_1))
+      .addMBB(TrapBB)
+      .addImm(X86::COND_AE);
 
   if (Subtarget.is64Bit()) {
     unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass);
@@ -30610,7 +31149,8 @@
   DebugLoc DL = MI.getDebugLoc();
 
   switch (MI.getOpcode()) {
-  default: llvm_unreachable("Unexpected instr type to insert");
+  default:
+    llvm_unreachable("Unexpected instr type to insert");
   case X86::TLS_addr32:
   case X86::TLS_addr64:
   case X86::TLS_base_addr32:
@@ -30701,50 +31241,68 @@
     // Change the floating point control register to use "round towards zero"
     // mode when truncating to an integer value.
     int OrigCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
-    addFrameReference(BuildMI(*BB, MI, DL,
-                              TII->get(X86::FNSTCW16m)), OrigCWFrameIdx);
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)),
+                      OrigCWFrameIdx);
 
     // Load the old value of the control word...
-    unsigned OldCW =
-      MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+    unsigned OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
     addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
                       OrigCWFrameIdx);
 
     // OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
-    unsigned NewCW =
-      MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+    unsigned NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
     BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
-      .addReg(OldCW, RegState::Kill).addImm(0xC00);
+        .addReg(OldCW, RegState::Kill)
+        .addImm(0xC00);
 
     // Extract to 16 bits.
     unsigned NewCW16 =
-      MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
+        MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
     BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
-      .addReg(NewCW, RegState::Kill, X86::sub_16bit);
+        .addReg(NewCW, RegState::Kill, X86::sub_16bit);
 
     // Prepare memory for FLDCW.
     int NewCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
     addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
                       NewCWFrameIdx)
-      .addReg(NewCW16, RegState::Kill);
+        .addReg(NewCW16, RegState::Kill);
 
     // Reload the modified control word now...
-    addFrameReference(BuildMI(*BB, MI, DL,
-                              TII->get(X86::FLDCW16m)), NewCWFrameIdx);
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
+                      NewCWFrameIdx);
 
     // Get the X86 opcode to use.
     unsigned Opc;
     switch (MI.getOpcode()) {
-    default: llvm_unreachable("illegal opcode!");
-    case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
-    case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
-    case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
-    case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
-    case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
-    case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
-    case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
-    case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
-    case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
+    default:
+      llvm_unreachable("illegal opcode!");
+    case X86::FP32_TO_INT16_IN_MEM:
+      Opc = X86::IST_Fp16m32;
+      break;
+    case X86::FP32_TO_INT32_IN_MEM:
+      Opc = X86::IST_Fp32m32;
+      break;
+    case X86::FP32_TO_INT64_IN_MEM:
+      Opc = X86::IST_Fp64m32;
+      break;
+    case X86::FP64_TO_INT16_IN_MEM:
+      Opc = X86::IST_Fp16m64;
+      break;
+    case X86::FP64_TO_INT32_IN_MEM:
+      Opc = X86::IST_Fp32m64;
+      break;
+    case X86::FP64_TO_INT64_IN_MEM:
+      Opc = X86::IST_Fp64m64;
+      break;
+    case X86::FP80_TO_INT16_IN_MEM:
+      Opc = X86::IST_Fp16m80;
+      break;
+    case X86::FP80_TO_INT32_IN_MEM:
+      Opc = X86::IST_Fp32m80;
+      break;
+    case X86::FP80_TO_INT64_IN_MEM:
+      Opc = X86::IST_Fp64m80;
+      break;
     }
 
     X86AddressMode AM = getAddressFromInstr(&MI, 0);
@@ -30752,8 +31310,8 @@
         .addReg(MI.getOperand(X86::AddrNumOperands).getReg());
 
     // Reload the original control word now.
-    addFrameReference(BuildMI(*BB, MI, DL,
-                              TII->get(X86::FLDCW16m)), OrigCWFrameIdx);
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
+                      OrigCWFrameIdx);
 
     MI.eraseFromParent(); // The pseudo instruction is gone now.
     return BB;
@@ -30861,10 +31419,8 @@
 //                           X86 Optimization Hooks
 //===----------------------------------------------------------------------===//
 
-bool
-X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
-                                                const APInt &Demanded,
-                                                TargetLoweringOpt &TLO) const {
+bool X86TargetLowering::targetShrinkDemandedConstant(
+    SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
   // Only optimize Ands to prevent shrinking a constant that could be
   // matched by movzx.
   if (Op.getOpcode() != ISD::AND)
@@ -30928,16 +31484,15 @@
   unsigned BitWidth = Known.getBitWidth();
   unsigned Opc = Op.getOpcode();
   EVT VT = Op.getValueType();
-  assert((Opc >= ISD::BUILTIN_OP_END ||
-          Opc == ISD::INTRINSIC_WO_CHAIN ||
-          Opc == ISD::INTRINSIC_W_CHAIN ||
-          Opc == ISD::INTRINSIC_VOID) &&
+  assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN ||
+          Opc == ISD::INTRINSIC_W_CHAIN || Opc == ISD::INTRINSIC_VOID) &&
          "Should use MaskedValueIsZero if you don't know whether Op"
          " is a target node!");
 
   Known.resetAll();
   switch (Opc) {
-  default: break;
+  default:
+    break;
   case X86ISD::SETCC:
     Known.Zero.setBitsFrom(1);
     break;
@@ -31032,11 +31587,11 @@
     break;
   }
   case X86ISD::CMOV: {
-    Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
+    Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
     // If we don't know any bits, early out.
     if (Known.isUnknown())
       break;
-    KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
+    KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
 
     // Only known if known in both the LHS and RHS.
     Known.One &= Known2.One;
@@ -31057,7 +31612,8 @@
       unsigned NumElts = VT.getVectorNumElements();
       if (Mask.size() == NumElts) {
         SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
-        Known.Zero.setAllBits(); Known.One.setAllBits();
+        Known.Zero.setAllBits();
+        Known.One.setAllBits();
         for (unsigned i = 0; i != NumElts; ++i) {
           if (!DemandedElts[i])
             continue;
@@ -31168,16 +31724,18 @@
   case X86ISD::ANDNP: {
     unsigned Tmp0 =
         DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
-    if (Tmp0 == 1) return 1; // Early out.
+    if (Tmp0 == 1)
+      return 1; // Early out.
     unsigned Tmp1 =
         DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
     return std::min(Tmp0, Tmp1);
   }
 
   case X86ISD::CMOV: {
-    unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1);
-    if (Tmp0 == 1) return 1;  // Early out.
-    unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    if (Tmp0 == 1)
+      return 1; // Early out.
+    unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
     return std::min(Tmp0, Tmp1);
   }
   }
@@ -31225,8 +31783,8 @@
       }
       if (Match) {
         unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
-        MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() :
-                                            MVT::getIntegerVT(MaskEltSize);
+        MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType()
+                                          : MVT::getIntegerVT(MaskEltSize);
         SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
 
         if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
@@ -31480,7 +32038,8 @@
 
   // Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
   if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) ||
-      ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) ||
+      ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) &&
+       Subtarget.hasInt256()) ||
       ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
     if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
                                    Subtarget)) {
@@ -31791,8 +32350,9 @@
 
     // Attempt to match against broadcast-from-vector.
     // Limit AVX1 to cases where we're loading+broadcasting a scalar element.
-    if ((Subtarget.hasAVX2() || (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits))
-        && (!IsEVEXShuffle || NumRootElts == NumMaskElts)) {
+    if ((Subtarget.hasAVX2() ||
+         (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) &&
+        (!IsEVEXShuffle || NumRootElts == NumMaskElts)) {
       SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
       if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
         if (V1.getValueType() == MaskVT &&
@@ -31855,9 +32415,9 @@
 
   NewV1 = V1; // Save operands in case early exit happens.
   NewV2 = V2;
-  if (matchBinaryPermuteShuffle(
-          MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
-          NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
+  if (matchBinaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
+                                AllowIntDomain, NewV1, NewV2, DL, DAG,
+                                Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
       (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
     if (Depth == 1 && Root.getOpcode() == Shuffle)
       return SDValue(); // Nothing to do!
@@ -32431,10 +32991,9 @@
     }
 
     // Ok, we have non-zero lanes, map them through to one of the Op's inputs.
-    unsigned OpMaskedIdx =
-        OpRatio == 1
-            ? OpMask[OpIdx]
-            : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1));
+    unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx]
+                                        : (OpMask[OpIdx] << OpRatioLog2) +
+                                              (RootMaskedIdx & (OpRatio - 1));
 
     OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
     int InputIdx = OpMask[OpIdx] / (int)OpMask.size();
@@ -32573,9 +33132,9 @@
 /// We walk up the chain and look for a combinable shuffle, skipping over
 /// shuffles that we could hoist this shuffle's transformation past without
 /// altering anything.
-static SDValue
-combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
-                             SelectionDAG &DAG) {
+static SDValue combineRedundantDWordShuffle(SDValue N,
+                                            MutableArrayRef<int> Mask,
+                                            SelectionDAG &DAG) {
   assert(N.getOpcode() == X86ISD::PSHUFD &&
          "Called with something other than an x86 128-bit half shuffle!");
   SDLoc DL(N);
@@ -32970,8 +33529,7 @@
     if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
         (V.getOpcode() == X86ISD::PSHUFLW ||
          V.getOpcode() == X86ISD::PSHUFHW) &&
-        V.getOpcode() != N.getOpcode() &&
-        V.hasOneUse()) {
+        V.getOpcode() != N.getOpcode() && V.hasOneUse()) {
       SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
       if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
         SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
@@ -33045,11 +33603,11 @@
 /// operation. If true is returned then the operands of ADDSUB(SUBADD) operation
 /// are written to the parameters \p Opnd0 and \p Opnd1.
 ///
-/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle nodes
-/// so it is easier to generically match. We also insert dummy vector shuffle
-/// nodes for the operands which explicitly discard the lanes which are unused
-/// by this operation to try to flow through the rest of the combiner
-/// the fact that they're unused.
+/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle
+/// nodes so it is easier to generically match. We also insert dummy vector
+/// shuffle nodes for the operands which explicitly discard the lanes which are
+/// unused by this operation to try to flow through the rest of the combiner the
+/// fact that they're unused.
 static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
                              bool &IsSubAdd) {
@@ -33083,13 +33641,15 @@
   // commute the FADD operands.
   SDValue LHS, RHS;
   if (V1.getOpcode() == ISD::FSUB) {
-    LHS = V1->getOperand(0); RHS = V1->getOperand(1);
+    LHS = V1->getOperand(0);
+    RHS = V1->getOperand(1);
     if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
         (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
       return false;
   } else {
     assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode");
-    LHS = V2->getOperand(0); RHS = V2->getOperand(1);
+    LHS = V2->getOperand(0);
+    RHS = V2->getOperand(1);
     if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) &&
         (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS))
       return false;
@@ -33101,8 +33661,8 @@
     return false;
 
   // It's a subadd if the vector in the even parity is an FADD.
-  IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD
-                     : V2->getOpcode() == ISD::FADD;
+  IsSubAdd =
+      Op0Even ? V1->getOpcode() == ISD::FADD : V2->getOpcode() == ISD::FADD;
 
   Opnd0 = LHS;
   Opnd1 = RHS;
@@ -33270,8 +33830,8 @@
     // movddup (hadd X, X) --> hadd X, X
     // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
     assert((HOp.getValueType() == MVT::v2f64 ||
-            HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
-           "Unexpected type for h-op");
+            HOp.getValueType() == MVT::v4f64) &&
+           HOp.getValueType() == VT && "Unexpected type for h-op");
     return HOp;
   }
 
@@ -33378,7 +33938,8 @@
         TLI.isOperationLegal(Opcode, VT)) {
       bool CanFold = false;
       switch (Opcode) {
-      default : break;
+      default:
+        break;
       case ISD::ADD:
       case ISD::SUB:
       case ISD::MUL:
@@ -33439,7 +34000,8 @@
     // TODO - merge this into combineX86ShufflesRecursively.
     APInt KnownUndef, KnownZero;
     APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
-    if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI))
+    if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
+                                       DCI))
       return SDValue(N, 0);
   }
 
@@ -33455,13 +34017,20 @@
     switch (In.getOpcode()) {
     default:
       break;
-    case X86ISD::CVTP2SI:   case X86ISD::CVTP2UI:
-    case X86ISD::MCVTP2SI:  case X86ISD::MCVTP2UI:
-    case X86ISD::CVTTP2SI:  case X86ISD::CVTTP2UI:
-    case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI:
-    case X86ISD::CVTSI2P:   case X86ISD::CVTUI2P:
-    case X86ISD::MCVTSI2P:  case X86ISD::MCVTUI2P:
-    case X86ISD::VFPROUND:  case X86ISD::VMFPROUND:
+    case X86ISD::CVTP2SI:
+    case X86ISD::CVTP2UI:
+    case X86ISD::MCVTP2SI:
+    case X86ISD::MCVTP2UI:
+    case X86ISD::CVTTP2SI:
+    case X86ISD::CVTTP2UI:
+    case X86ISD::MCVTTP2SI:
+    case X86ISD::MCVTTP2UI:
+    case X86ISD::CVTSI2P:
+    case X86ISD::CVTUI2P:
+    case X86ISD::MCVTSI2P:
+    case X86ISD::MCVTUI2P:
+    case X86ISD::VFPROUND:
+    case X86ISD::VMFPROUND:
       if (In.getOperand(0).getValueType() == MVT::v2f64 ||
           In.getOperand(0).getValueType() == MVT::v2i64)
         return N->getOperand(0); // return the bitcast
@@ -33480,18 +34049,18 @@
     SDValue MULUDQ = BC.getOperand(0);
     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
     ArrayRef<int> Mask = SVOp->getMask();
-    if (BC.hasOneUse() && MULUDQ.hasOneUse() &&
-        Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) {
+    if (BC.hasOneUse() && MULUDQ.hasOneUse() && Mask[0] == 0 && Mask[1] == 2 &&
+        Mask[2] == -1 && Mask[3] == -1) {
       SDValue Op0 = MULUDQ.getOperand(0);
       SDValue Op1 = MULUDQ.getOperand(1);
       if (Op0.getOpcode() == ISD::BITCAST &&
           Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
           Op0.getOperand(0).getValueType() == MVT::v4i32) {
         ShuffleVectorSDNode *SVOp0 =
-          cast<ShuffleVectorSDNode>(Op0.getOperand(0));
+            cast<ShuffleVectorSDNode>(Op0.getOperand(0));
         ArrayRef<int> Mask2 = SVOp0->getMask();
-        if (Mask2[0] == 0 && Mask2[1] == -1 &&
-            Mask2[2] == 1 && Mask2[3] == -1) {
+        if (Mask2[0] == 0 && Mask2[1] == -1 && Mask2[2] == 1 &&
+            Mask2[3] == -1) {
           Op0 = SVOp0->getOperand(0);
           Op1 = DAG.getBitcast(MVT::v4i32, Op1);
           Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask);
@@ -33502,10 +34071,10 @@
           Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
           Op1.getOperand(0).getValueType() == MVT::v4i32) {
         ShuffleVectorSDNode *SVOp1 =
-          cast<ShuffleVectorSDNode>(Op1.getOperand(0));
+            cast<ShuffleVectorSDNode>(Op1.getOperand(0));
         ArrayRef<int> Mask2 = SVOp1->getMask();
-        if (Mask2[0] == 0 && Mask2[1] == -1 &&
-            Mask2[2] == 1 && Mask2[3] == -1) {
+        if (Mask2[0] == 0 && Mask2[1] == -1 && Mask2[2] == 1 &&
+            Mask2[3] == -1) {
           Op0 = DAG.getBitcast(MVT::v4i32, Op0);
           Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask);
           Op1 = SVOp1->getOperand(0);
@@ -33741,8 +34310,7 @@
       SDLoc DL(Op);
       SDValue Ext0 =
           extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
-      SDValue ExtOp =
-          TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0);
+      SDValue ExtOp = TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0);
       SDValue UndefVec = TLO.DAG.getUNDEF(VT);
       SDValue Insert =
           insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
@@ -33881,7 +34449,7 @@
   EVT VT = Op.getValueType();
   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
   unsigned Opc = Op.getOpcode();
-  switch(Opc) {
+  switch (Opc) {
   case X86ISD::PMULDQ:
   case X86ISD::PMULUDQ: {
     // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
@@ -34037,8 +34605,8 @@
         return true;
 
       KnownBits KnownVec;
-      if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts,
-                               KnownVec, TLO, Depth + 1))
+      if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, KnownVec,
+                               TLO, Depth + 1))
         return true;
 
       Known = KnownVec.zext(BitWidth, true);
@@ -34146,8 +34714,9 @@
 /// folded into a single element load.
 /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
 /// shuffles have been custom lowered so we need to handle those here.
-static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue
+XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -34219,7 +34788,7 @@
 
   LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
 
-  if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+  if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
     return SDValue();
 
   // If there's a bitcast before the shuffle, check if the load type and
@@ -34238,8 +34807,8 @@
 
   // Create shuffle node taking into account the case that its a unary shuffle
   SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1];
-  Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle,
-                                 ShuffleMask);
+  Shuffle =
+      DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle, ShuffleMask);
   Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
                      EltNo);
@@ -34375,8 +34944,7 @@
 // Convert a vXi1 constant build vector to the same width scalar integer.
 static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) {
   EVT SrcVT = Op.getValueType();
-  assert(SrcVT.getVectorElementType() == MVT::i1 &&
-         "Expected a vXi1 vector");
+  assert(SrcVT.getVectorElementType() == MVT::i1 && "Expected a vXi1 vector");
   assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
          "Expected a constant build vector");
 
@@ -34410,8 +34978,7 @@
     return SDValue();
 
   // Look for logic ops.
-  if (Op.getOpcode() != ISD::AND &&
-      Op.getOpcode() != ISD::OR &&
+  if (Op.getOpcode() != ISD::AND && Op.getOpcode() != ISD::OR &&
       Op.getOpcode() != ISD::XOR)
     return SDValue();
 
@@ -34586,7 +35153,7 @@
       bool LowUndef = true, AllUndefOrZero = true;
       for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) {
         SDValue Op = N0.getOperand(i);
-        LowUndef &= Op.isUndef() || (i >= e/2);
+        LowUndef &= Op.isUndef() || (i >= e / 2);
         AllUndefOrZero &= (Op.isUndef() || isNullConstant(Op));
       }
       if (AllUndefOrZero) {
@@ -34628,15 +35195,14 @@
 
   // Try to remove a bitcast of constant vXi1 vector. We have to legalize
   // most of these to scalar anyway.
-  if (Subtarget.hasAVX512() && VT.isScalarInteger() &&
-      SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
+  if (Subtarget.hasAVX512() && VT.isScalarInteger() && SrcVT.isVector() &&
+      SrcVT.getVectorElementType() == MVT::i1 &&
       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
     return combinevXi1ConstantToInteger(N0, DAG);
   }
 
-  if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() &&
-      VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
-      isa<ConstantSDNode>(N0)) {
+  if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && VT.isVector() &&
+      VT.getVectorElementType() == MVT::i1 && isa<ConstantSDNode>(N0)) {
     auto *C = cast<ConstantSDNode>(N0);
     if (C->isAllOnesValue())
       return DAG.getConstant(1, SDLoc(N0), VT);
@@ -34656,10 +35222,17 @@
   // transferring the SSE operand to integer register and back.
   unsigned FPOpcode;
   switch (N0.getOpcode()) {
-    case ISD::AND: FPOpcode = X86ISD::FAND; break;
-    case ISD::OR:  FPOpcode = X86ISD::FOR;  break;
-    case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
-    default: return SDValue();
+  case ISD::AND:
+    FPOpcode = X86ISD::FAND;
+    break;
+  case ISD::OR:
+    FPOpcode = X86ISD::FOR;
+    break;
+  case ISD::XOR:
+    FPOpcode = X86ISD::FXOR;
+    break;
+  default:
+    return SDValue();
   }
 
   if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
@@ -34735,7 +35308,7 @@
     return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops);
   };
   MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
-  return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, { SadOp0, SadOp1 },
+  return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, {SadOp0, SadOp1},
                           PSADBWBuilder);
 }
 
@@ -34817,7 +35390,8 @@
                      DAG.getIntPtrConstant(0, DL));
 }
 
-// Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK.
+// Attempt to replace an all_of/any_of/parity style horizontal reduction with a
+// MOVMSK.
 static SDValue combineHorizontalPredicateResult(SDNode *Extract,
                                                 SelectionDAG &DAG,
                                                 const X86Subtarget &Subtarget) {
@@ -34980,8 +35554,8 @@
   // (extends the sign bit which is zero).
   // So it is correct to skip the sign/zero extend instruction.
   if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND ||
-    Root.getOpcode() == ISD::ZERO_EXTEND ||
-    Root.getOpcode() == ISD::ANY_EXTEND))
+               Root.getOpcode() == ISD::ZERO_EXTEND ||
+               Root.getOpcode() == ISD::ANY_EXTEND))
     Root = Root.getOperand(0);
 
   // If there was a match, we want Root to be a select that is the root of an
@@ -35005,9 +35579,9 @@
   if (Stages > 3) {
     unsigned SadElems = SadVT.getVectorNumElements();
 
-    for(unsigned i = Stages - 3; i > 0; --i) {
+    for (unsigned i = Stages - 3; i > 0; --i) {
       SmallVector<int, 16> Mask(SadElems, -1);
-      for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
+      for (unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
         Mask[j] = MaskEnd + j;
 
       SDValue Shuffle =
@@ -35178,15 +35752,15 @@
     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                                Vec.getOperand(0).getValueType().getScalarType(),
                                Vec.getOperand(0), Index);
-    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
-                               Vec.getOperand(1), Index);
-    SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
-                               Vec.getOperand(2), Index);
+    SDValue Ext1 =
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Vec.getOperand(1), Index);
+    SDValue Ext2 =
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Vec.getOperand(2), Index);
     return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
   }
 
   // TODO: This switch could include FNEG and the x86-specific FP logic ops
-  // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid 
+  // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
   // missed load folding and fma+fneg combining.
   switch (Vec.getOpcode()) {
   case ISD::FMA: // Begin 3 operands
@@ -35330,8 +35904,7 @@
       }
       return false;
     };
-    if (all_of(InputVector->uses(), IsBoolExtract) &&
-        BoolExtracts.size() > 1) {
+    if (all_of(InputVector->uses(), IsBoolExtract) && BoolExtracts.size() > 1) {
       unsigned NumSrcElts = SrcVT.getVectorNumElements();
       EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts);
       if (SDValue BC =
@@ -35526,11 +36099,10 @@
 /// This function will also call SimplifyDemandedBits on already created
 /// BLENDV to perform additional simplifications.
 static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
-                                           TargetLowering::DAGCombinerInfo &DCI,
-                                           const X86Subtarget &Subtarget) {
+                                      TargetLowering::DAGCombinerInfo &DCI,
+                                      const X86Subtarget &Subtarget) {
   SDValue Cond = N->getOperand(0);
-  if ((N->getOpcode() != ISD::VSELECT &&
-       N->getOpcode() != X86ISD::BLENDV) ||
+  if ((N->getOpcode() != ISD::VSELECT && N->getOpcode() != X86ISD::BLENDV) ||
       ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
     return SDValue();
 
@@ -35593,8 +36165,8 @@
     if (U->getOpcode() == X86ISD::BLENDV)
       continue;
 
-    SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),
-                             Cond, U->getOperand(1), U->getOperand(2));
+    SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0), Cond,
+                             U->getOperand(1), U->getOperand(2));
     DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
     DCI.AddToWorklist(U);
   }
@@ -35645,7 +36217,8 @@
     if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
         DAG.isEqualTo(RHS, Cond.getOperand(1))) {
       switch (CC) {
-      default: break;
+      default:
+        break;
       case ISD::SETULT:
         // Converting this to a min would handle NaNs incorrectly, and swapping
         // the operands would cause it to handle comparisons between positive
@@ -35710,11 +36283,12 @@
         Opcode = X86ISD::FMAX;
         break;
       }
-    // Check for x CC y ? y : x -- a min/max with reversed arms.
+      // Check for x CC y ? y : x -- a min/max with reversed arms.
     } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
                DAG.isEqualTo(RHS, Cond.getOperand(0))) {
       switch (CC) {
-      default: break;
+      default:
+        break;
       case ISD::SETOGE:
         // Converting this to a min would handle comparisons between positive
         // and negative zero incorrectly, and swapping the operands would
@@ -35877,12 +36451,13 @@
       DAG.isEqualTo(RHS, Cond.getOperand(1))) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
     switch (CC) {
-    default: break;
+    default:
+      break;
     case ISD::SETLT:
     case ISD::SETGT: {
       ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
-      Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(),
-                          Cond.getOperand(0), Cond.getOperand(1), NewCC);
+      Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0),
+                          Cond.getOperand(1), NewCC);
       return DAG.getSelect(DL, VT, Cond, LHS, RHS);
     }
     }
@@ -36160,7 +36735,7 @@
   SDValue Op2 = Cmp.getOperand(1);
 
   SDValue SetCC;
-  const ConstantSDNode* C = nullptr;
+  const ConstantSDNode *C = nullptr;
   bool needOppositeCond = (CC == X86::COND_E);
   bool checkAgainstTrue = false; // Is it a comparison against 1?
 
@@ -36181,8 +36756,7 @@
   bool truncatedToBoolWithAnd = false;
   // Skip (zext $x), (trunc $x), or (and $x, 1) node.
   while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
-         SetCC.getOpcode() == ISD::TRUNCATE ||
-         SetCC.getOpcode() == ISD::AND) {
+         SetCC.getOpcode() == ISD::TRUNCATE || SetCC.getOpcode() == ISD::AND) {
     if (SetCC.getOpcode() == ISD::AND) {
       int OpIdx = -1;
       if (isOneConstant(SetCC.getOperand(0)))
@@ -36225,13 +36799,13 @@
     if (!FVal) {
       SDValue Op = SetCC.getOperand(0);
       // Skip 'zext' or 'trunc' node.
-      if (Op.getOpcode() == ISD::ZERO_EXTEND ||
-          Op.getOpcode() == ISD::TRUNCATE)
+      if (Op.getOpcode() == ISD::ZERO_EXTEND || Op.getOpcode() == ISD::TRUNCATE)
         Op = Op.getOperand(0);
       // A special case for rdrand/rdseed, where 0 is set if false cond is
       // found.
       if ((Op.getOpcode() != X86ISD::RDRAND &&
-           Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
+           Op.getOpcode() != X86ISD::RDSEED) ||
+          Op.getResNo() != 0)
         return SDValue();
     }
     // Quit if false value is not the constant 0 or 1.
@@ -36276,7 +36850,8 @@
 
   SDValue SetCC0, SetCC1;
   switch (Cond->getOpcode()) {
-  default: return false;
+  default:
+    return false;
   case ISD::AND:
   case X86ISD::AND:
     isAnd = true;
@@ -36307,12 +36882,12 @@
   if (EFLAGS.getOpcode() == X86ISD::ADD) {
     if (isAllOnesConstant(EFLAGS.getOperand(1))) {
       SDValue Carry = EFLAGS.getOperand(0);
-      while (Carry.getOpcode() == ISD::TRUNCATE ||
-             Carry.getOpcode() == ISD::ZERO_EXTEND ||
-             Carry.getOpcode() == ISD::SIGN_EXTEND ||
-             Carry.getOpcode() == ISD::ANY_EXTEND ||
-             (Carry.getOpcode() == ISD::AND &&
-              isOneConstant(Carry.getOperand(1))))
+      while (
+          Carry.getOpcode() == ISD::TRUNCATE ||
+          Carry.getOpcode() == ISD::ZERO_EXTEND ||
+          Carry.getOpcode() == ISD::SIGN_EXTEND ||
+          Carry.getOpcode() == ISD::ANY_EXTEND ||
+          (Carry.getOpcode() == ISD::AND && isOneConstant(Carry.getOperand(1))))
         Carry = Carry.getOperand(0);
       if (Carry.getOpcode() == X86ISD::SETCC ||
           Carry.getOpcode() == X86ISD::SETCC_CARRY) {
@@ -36340,8 +36915,7 @@
         }
         // If this is a check of the z flag of an add with 1, switch to the
         // C flag.
-        if (CarryCC == X86::COND_E &&
-            CarryOp1.getOpcode() == X86ISD::ADD &&
+        if (CarryCC == X86::COND_E && CarryOp1.getOpcode() == X86ISD::ADD &&
             isOneConstant(CarryOp1.getOperand(1)))
           return CarryOp1;
       }
@@ -36382,7 +36956,7 @@
   if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
     if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
       SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
-        Flags};
+                       Flags};
       return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
     }
   }
@@ -36417,12 +36991,11 @@
 
       // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.  This is efficient
       // for any integer data type, including i8/i16.
-      if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+      if (FalseC->getAPIntValue() + 1 == TrueC->getAPIntValue()) {
         Cond = getSETCC(CC, Cond, DL, DAG);
 
         // Zero extend the condition if needed.
-        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
-                           FalseC->getValueType(0), Cond);
+        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
         Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
                            SDValue(FalseC, 0));
         return Cond;
@@ -36431,31 +37004,33 @@
       // Optimize cases that will turn into an LEA instruction.  This requires
       // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
       if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
-        uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
-        if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+        uint64_t Diff = TrueC->getZExtValue() - FalseC->getZExtValue();
+        if (N->getValueType(0) == MVT::i32)
+          Diff = (unsigned)Diff;
 
         bool isFastMultiplier = false;
         if (Diff < 10) {
           switch ((unsigned char)Diff) {
-          default: break;
-          case 1:  // result = add base, cond
-          case 2:  // result = lea base(    , cond*2)
-          case 3:  // result = lea base(cond, cond*2)
-          case 4:  // result = lea base(    , cond*4)
-          case 5:  // result = lea base(cond, cond*4)
-          case 8:  // result = lea base(    , cond*8)
-          case 9:  // result = lea base(cond, cond*8)
+          default:
+            break;
+          case 1: // result = add base, cond
+          case 2: // result = lea base(    , cond*2)
+          case 3: // result = lea base(cond, cond*2)
+          case 4: // result = lea base(    , cond*4)
+          case 5: // result = lea base(cond, cond*4)
+          case 8: // result = lea base(    , cond*8)
+          case 9: // result = lea base(cond, cond*8)
             isFastMultiplier = true;
             break;
           }
         }
 
         if (isFastMultiplier) {
-          APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
-          Cond = getSETCC(CC, Cond, DL ,DAG);
+          APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
+          Cond = getSETCC(CC, Cond, DL, DAG);
           // Zero extend the condition if needed.
-          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
-                             Cond);
+          Cond =
+              DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
           // Scale the condition by the difference.
           if (Diff != 1)
             Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
@@ -36500,10 +37075,9 @@
         std::swap(TrueOp, FalseOp);
       }
 
-      if (CC == X86::COND_E &&
-          CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
-        SDValue Ops[] = { FalseOp, Cond.getOperand(0),
-                          DAG.getConstant(CC, DL, MVT::i8), Cond };
+      if (CC == X86::COND_E && CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
+        SDValue Ops[] = {FalseOp, Cond.getOperand(0),
+                         DAG.getConstant(CC, DL, MVT::i8), Cond};
         return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
       }
     }
@@ -36538,7 +37112,7 @@
       }
 
       SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8),
-        Flags};
+                        Flags};
       SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps);
       SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags};
       SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
@@ -36572,9 +37146,9 @@
       EVT VT = N->getValueType(0);
       // This should constant fold.
       SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
-      SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
-                                 DAG.getConstant(X86::COND_NE, DL, MVT::i8),
-                                 Cond);
+      SDValue CMov =
+          DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
+                      DAG.getConstant(X86::COND_NE, DL, MVT::i8), Cond);
       return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
     }
   }
@@ -36890,8 +37464,7 @@
     return SDValue();
 
   APInt Mask17 = APInt::getHighBitsSet(32, 17);
-  if (!DAG.MaskedValueIsZero(N1, Mask17) ||
-      !DAG.MaskedValueIsZero(N0, Mask17))
+  if (!DAG.MaskedValueIsZero(N1, Mask17) || !DAG.MaskedValueIsZero(N0, Mask17))
     return SDValue();
 
   // Use SplitOpsAndApply to handle AVX splitting.
@@ -36901,7 +37474,7 @@
     return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops);
   };
   return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
-                          { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) },
+                          {DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1)},
                           PMADDWDBuilder);
 }
 
@@ -36929,8 +37502,8 @@
                             ArrayRef<SDValue> Ops) {
       return DAG.getNode(X86ISD::PMULDQ, DL, Ops[0].getValueType(), Ops);
     };
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 },
-                            PMULDQBuilder, /*CheckBWI*/false);
+    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {N0, N1},
+                            PMULDQBuilder, /*CheckBWI*/ false);
   }
 
   // If the upper bits are zero we can use a single pmuludq.
@@ -36940,8 +37513,8 @@
                              ArrayRef<SDValue> Ops) {
       return DAG.getNode(X86ISD::PMULUDQ, DL, Ops[0].getValueType(), Ops);
     };
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 },
-                            PMULUDQBuilder, /*CheckBWI*/false);
+    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {N0, N1},
+                            PMULUDQBuilder, /*CheckBWI*/ false);
   }
 
   return SDValue();
@@ -36990,8 +37563,8 @@
     SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
                                  DAG.getConstant(AbsMulAmt, DL, VT));
     if (SignMulAmt < 0)
-      NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
-                           NewMul);
+      NewMul =
+          DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
 
     return NewMul;
   }
@@ -37015,9 +37588,8 @@
       (isPowerOf2_64(MulAmt2) ||
        (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
 
-    if (isPowerOf2_64(MulAmt2) &&
-        !(SignMulAmt >= 0 && N->hasOneUse() &&
-          N->use_begin()->getOpcode() == ISD::ADD))
+    if (isPowerOf2_64(MulAmt2) && !(SignMulAmt >= 0 && N->hasOneUse() &&
+                                    N->use_begin()->getOpcode() == ISD::ADD))
       // If second multiplifer is pow2, issue it first. We want the multiply by
       // 3, 5, or 9 to be folded into the addressing mode unless the lone use
       // is an add. Only do this for positive multiply amounts since the
@@ -37040,8 +37612,8 @@
 
     // Negate the result.
     if (SignMulAmt < 0)
-      NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
-                           NewMul);
+      NewMul =
+          DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
   } else if (!Subtarget.slowLEA())
     NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
 
@@ -37055,17 +37627,16 @@
       NewMul = DAG.getNode(
           ISD::ADD, DL, VT, N->getOperand(0),
           DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                      DAG.getConstant(Log2_64(AbsMulAmt - 1), DL,
-                                      MVT::i8)));
+                      DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, MVT::i8)));
       // To negate, subtract the number from zero
       if (SignMulAmt < 0)
-        NewMul = DAG.getNode(ISD::SUB, DL, VT,
-                             DAG.getConstant(0, DL, VT), NewMul);
+        NewMul =
+            DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
     } else if (isPowerOf2_64(AbsMulAmt + 1)) {
       // (mul x, 2^N - 1) => (sub (shl x, N), x)
-      NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                           DAG.getConstant(Log2_64(AbsMulAmt + 1),
-                                           DL, MVT::i8));
+      NewMul =
+          DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                      DAG.getConstant(Log2_64(AbsMulAmt + 1), DL, MVT::i8));
       // To negate, reverse the operands of the subtract.
       if (SignMulAmt < 0)
         NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul);
@@ -37073,16 +37644,16 @@
         NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0));
     } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2)) {
       // (mul x, 2^N + 2) => (add (add (shl x, N), x), x)
-      NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                           DAG.getConstant(Log2_64(AbsMulAmt - 2),
-                                           DL, MVT::i8));
+      NewMul =
+          DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                      DAG.getConstant(Log2_64(AbsMulAmt - 2), DL, MVT::i8));
       NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0));
       NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0));
     } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2)) {
       // (mul x, 2^N - 2) => (sub (sub (shl x, N), x), x)
-      NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                           DAG.getConstant(Log2_64(AbsMulAmt + 2),
-                                           DL, MVT::i8));
+      NewMul =
+          DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                      DAG.getConstant(Log2_64(AbsMulAmt + 2), DL, MVT::i8));
       NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0));
       NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0));
     }
@@ -37099,8 +37670,7 @@
 
   // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
   // since the result of setcc_c is all zero's or all ones.
-  if (VT.isInteger() && !VT.isVector() &&
-      N1C && N0.getOpcode() == ISD::AND &&
+  if (VT.isInteger() && !VT.isVector() && N1C && N0.getOpcode() == ISD::AND &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     SDValue N00 = N0.getOperand(0);
     APInt Mask = N0.getConstantOperandAPInt(1);
@@ -37181,7 +37751,7 @@
   if (SarConst.isNegative())
     return SDValue();
 
-  for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) {
+  for (MVT SVT : {MVT::i8, MVT::i16, MVT::i32}) {
     unsigned ShiftSize = SVT.getSizeInBits();
     // skipping types without corresponding sext/zext and
     // ShlConst that is not one of [56,48,32,24,16]
@@ -37327,8 +37897,8 @@
 
   // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
   // truncate to create a larger truncate.
-  if (Subtarget.hasAVX512() &&
-      N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
+  if (Subtarget.hasAVX512() && N0.getOpcode() == ISD::TRUNCATE &&
+      N1.isUndef() && VT == MVT::v16i8 &&
       N0.getOperand(0).getValueType() == MVT::v8i32) {
     if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
         (!IsSigned &&
@@ -37508,7 +38078,7 @@
 
     SDValue CMP00 = CMP0->getOperand(0);
     SDValue CMP01 = CMP0->getOperand(1);
-    EVT     VT    = CMP00.getValueType();
+    EVT VT = CMP00.getValueType();
 
     if (VT == MVT::f32 || VT == MVT::f64) {
       bool ExpectingFlags = false;
@@ -37530,8 +38100,10 @@
         }
 
       if (!ExpectingFlags) {
-        enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
-        enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+        enum X86::CondCode cc0 =
+            (enum X86::CondCode)N0.getConstantOperandVal(0);
+        enum X86::CondCode cc1 =
+            (enum X86::CondCode)N1.getConstantOperandVal(0);
 
         if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
           X86::CondCode tmp = cc0;
@@ -37539,7 +38111,7 @@
           cc1 = tmp;
         }
 
-        if ((cc0 == X86::COND_E  && cc1 == X86::COND_NP) ||
+        if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
             (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
           // FIXME: need symbolic constants for these magic numbers.
           // See X86ATTInstPrinter.cpp:printSSECC().
@@ -37549,17 +38121,17 @@
                 DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
                             DAG.getConstant(x86cc, DL, MVT::i8));
             // Need to fill with zeros to ensure the bitcast will produce zeroes
-            // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
+            // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee
+            // that.
             SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
                                       DAG.getConstant(0, DL, MVT::v16i1),
                                       FSetCC, DAG.getIntPtrConstant(0, DL));
             return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL,
                                       N->getSimpleValueType(0));
           }
-          SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
-                                              CMP00.getValueType(), CMP00, CMP01,
-                                              DAG.getConstant(x86cc, DL,
-                                                              MVT::i8));
+          SDValue OnesOrZeroesF =
+              DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00,
+                          CMP01, DAG.getConstant(x86cc, DL, MVT::i8));
 
           bool is64BitFP = (CMP00.getValueType() == MVT::f64);
           MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
@@ -37570,8 +38142,8 @@
             // OnesOrZeroesF is all ones of all zeroes, we don't need all the
             // bits, but can do this little dance to extract the lowest 32 bits
             // and work with those going forward.
-            SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
-                                           OnesOrZeroesF);
+            SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL,
+                                           MVT::v2f64, OnesOrZeroesF);
             SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64);
             OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
                                         Vector32, DAG.getIntPtrConstant(0, DL));
@@ -37581,8 +38153,8 @@
           SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
           SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
                                       DAG.getConstant(1, DL, IntVT));
-          SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
-                                              ANDed);
+          SDValue OneBitOfTruth =
+              DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
           return OneBitOfTruth;
         }
       }
@@ -37648,18 +38220,18 @@
 
   assert((N->getOpcode() == ISD::ANY_EXTEND ||
           N->getOpcode() == ISD::ZERO_EXTEND ||
-          N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
+          N->getOpcode() == ISD::SIGN_EXTEND) &&
+         "Invalid Node");
 
   SDValue Narrow = N->getOperand(0);
   EVT NarrowVT = Narrow.getValueType();
 
-  if (Narrow->getOpcode() != ISD::XOR &&
-      Narrow->getOpcode() != ISD::AND &&
+  if (Narrow->getOpcode() != ISD::XOR && Narrow->getOpcode() != ISD::AND &&
       Narrow->getOpcode() != ISD::OR)
     return SDValue();
 
-  SDValue N0  = Narrow->getOperand(0);
-  SDValue N1  = Narrow->getOperand(1);
+  SDValue N0 = Narrow->getOperand(0);
+  SDValue N1 = Narrow->getOperand(1);
   SDLoc DL(Narrow);
 
   // The Left side has to be a trunc.
@@ -37671,10 +38243,9 @@
     return SDValue();
 
   // The right side has to be a 'trunc' or a constant vector.
-  bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
-                  N1.getOperand(0).getValueType() == VT;
-  if (!RHSTrunc &&
-      !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()))
+  bool RHSTrunc =
+      N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getValueType() == VT;
+  if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()))
     return SDValue();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -37693,14 +38264,15 @@
   SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1);
   unsigned Opcode = N->getOpcode();
   switch (Opcode) {
-  default: llvm_unreachable("Unexpected opcode");
+  default:
+    llvm_unreachable("Unexpected opcode");
   case ISD::ANY_EXTEND:
     return Op;
   case ISD::ZERO_EXTEND:
     return DAG.getZeroExtendInReg(Op, DL, NarrowVT.getScalarType());
   case ISD::SIGN_EXTEND:
-    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
-                       Op, DAG.getValueType(NarrowVT));
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
+                       DAG.getValueType(NarrowVT));
   }
 }
 
@@ -37723,17 +38295,23 @@
   EVT N10Type = N10.getValueType();
 
   // Ensure that both types are the same and are legal scalar fp types.
-  if (N00Type != N10Type ||
-      !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
-        (Subtarget.hasSSE2() && N00Type == MVT::f64)))
+  if (N00Type != N10Type || !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
+                              (Subtarget.hasSSE2() && N00Type == MVT::f64)))
     return SDValue();
 
   unsigned FPOpcode;
   switch (N->getOpcode()) {
-  default: llvm_unreachable("Unexpected input node for FP logic conversion");
-  case ISD::AND: FPOpcode = X86ISD::FAND; break;
-  case ISD::OR:  FPOpcode = X86ISD::FOR;  break;
-  case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
+  default:
+    llvm_unreachable("Unexpected input node for FP logic conversion");
+  case ISD::AND:
+    FPOpcode = X86ISD::FAND;
+    break;
+  case ISD::OR:
+    FPOpcode = X86ISD::FOR;
+    break;
+  case ISD::XOR:
+    FPOpcode = X86ISD::FXOR;
+    break;
   }
 
   SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
@@ -37793,15 +38371,17 @@
     return SDValue();
 
   return ShiftedIndex.getOperand(0);
-
 }
 
 static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
   if (Subtarget.hasBMI2() && VT.isScalarInteger()) {
     switch (VT.getSizeInBits()) {
-    default: return false;
-    case 64: return Subtarget.is64Bit() ? true : false;
-    case 32: return true;
+    default:
+      return false;
+    case 64:
+      return Subtarget.is64Bit() ? true : false;
+    case 32:
+      return true;
     }
   }
   return false;
@@ -37831,7 +38411,7 @@
     SDValue N = Node->getOperand(i);
     LoadSDNode *Ld = dyn_cast<LoadSDNode>(N.getNode());
 
-     // continue if the operand is not a load instruction
+    // continue if the operand is not a load instruction
     if (!Ld)
       return SDValue();
 
@@ -37927,9 +38507,9 @@
   // If this is 64-bit, its always best to xor the two 32-bit pieces together
   // even if we have popcnt.
   if (VT == MVT::i64) {
-    SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
-                             DAG.getNode(ISD::SRL, DL, VT, X,
-                                         DAG.getConstant(32, DL, MVT::i8)));
+    SDValue Hi = DAG.getNode(
+        ISD::TRUNCATE, DL, MVT::i32,
+        DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(32, DL, MVT::i8)));
     SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
     X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
     // Generate a 32-bit parity idiom. This will bring us back here if we need
@@ -37942,16 +38522,16 @@
   assert(VT == MVT::i32 && "Unexpected VT!");
 
   // Xor the high and low 16-bits together using a 32-bit operation.
-  SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X,
-                             DAG.getConstant(16, DL, MVT::i8));
+  SDValue Hi16 =
+      DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(16, DL, MVT::i8));
   X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16);
 
   // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
   // This should allow an h-reg to be used to save a shift.
   // FIXME: We only get an h-reg in 32-bit mode.
-  SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
-                           DAG.getNode(ISD::SRL, DL, VT, X,
-                                       DAG.getConstant(8, DL, MVT::i8)));
+  SDValue Hi = DAG.getNode(
+      ISD::TRUNCATE, DL, MVT::i8,
+      DAG.getNode(ISD::SRL, DL, VT, X, DAG.getConstant(8, DL, MVT::i8)));
   SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
   SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
   SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
@@ -38603,15 +39183,25 @@
     return SDValue();
 
   switch (VT.getSimpleVT().SimpleTy) {
-  default: return SDValue();
+  default:
+    return SDValue();
   case MVT::v16i8:
   case MVT::v8i16:
-  case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break;
-  case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break;
+  case MVT::v4i32:
+    if (!Subtarget.hasSSE2())
+      return SDValue();
+    break;
+  case MVT::v2i64:
+    if (!Subtarget.hasSSE42())
+      return SDValue();
+    break;
   case MVT::v32i8:
   case MVT::v16i16:
   case MVT::v8i32:
-  case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break;
+  case MVT::v4i64:
+    if (!Subtarget.hasAVX2())
+      return SDValue();
+    break;
   }
 
   // There must be a shift right algebraic before the xor, and the xor must be a
@@ -38799,8 +39389,7 @@
       return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
   }
   if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
-      !Subtarget.hasAVX512() &&
-      (SVT == MVT::i8 || SVT == MVT::i16) &&
+      !Subtarget.hasAVX512() && (SVT == MVT::i8 || SVT == MVT::i16) &&
       (InSVT == MVT::i16 || InSVT == MVT::i32)) {
     if (auto USatVal = detectSSatPattern(In, VT, true)) {
       // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
@@ -38835,8 +39424,8 @@
   unsigned NumElems = VT.getVectorNumElements();
 
   EVT ScalarVT = VT.getVectorElementType();
-  if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
-        NumElems >= 2 && isPowerOf2_32(NumElems)))
+  if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2 &&
+        isPowerOf2_32(NumElems)))
     return SDValue();
 
   // InScalarVT is the intermediate type in AVG pattern and it should be greater
@@ -38907,7 +39496,7 @@
     Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
     Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
     return SplitOpsAndApply(DAG, Subtarget, DL, VT,
-                            { Operands[0].getOperand(0), Operands[1] },
+                            {Operands[0].getOperand(0), Operands[1]},
                             AVGBuilder);
   }
 
@@ -38976,13 +39565,13 @@
   // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
   ISD::LoadExtType Ext = Ld->getExtensionType();
   bool Fast;
-  unsigned AddressSpace = Ld->getAddressSpace();
   unsigned Alignment = Ld->getAlignment();
   if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
       Ext == ISD::NON_EXTLOAD &&
       ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) ||
        (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
-                               AddressSpace, Alignment, &Fast) && !Fast))) {
+                               *Ld->getMemOperand(), &Fast) &&
+        !Fast))) {
     unsigned NumElems = RegVT.getVectorNumElements();
     if (NumElems < 2)
       return SDValue();
@@ -38990,19 +39579,17 @@
     SDValue Ptr = Ld->getBasePtr();
 
     EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
-                                  NumElems/2);
+                                  NumElems / 2);
     SDValue Load1 =
         DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
                     Alignment, Ld->getMemOperand()->getFlags());
 
     Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);
-    SDValue Load2 =
-        DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
-                    Ld->getPointerInfo().getWithOffset(16),
-                    MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags());
+    SDValue Load2 = DAG.getLoad(
+        HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo().getWithOffset(16),
+        MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags());
     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                             Load1.getValue(1),
-                             Load2.getValue(1));
+                             Load1.getValue(1), Load2.getValue(1));
 
     SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);
     return DCI.CombineTo(N, NewVec, TF, true);
@@ -39135,8 +39722,8 @@
   if (LoadFirstElt && LoadLastElt) {
     SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
                                 ML->getMemOperand());
-    SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
-                                  ML->getPassThru());
+    SDValue Blend =
+        DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getPassThru());
     return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
   }
 
@@ -39154,12 +39741,11 @@
 
   // The new masked load has an undef pass-through operand. The select uses the
   // original pass-through operand.
-  SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
-                                    ML->getMask(), DAG.getUNDEF(VT),
-                                    ML->getMemoryVT(), ML->getMemOperand(),
-                                    ML->getExtensionType());
-  SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
-                                ML->getPassThru());
+  SDValue NewML = DAG.getMaskedLoad(
+      VT, DL, ML->getChain(), ML->getBasePtr(), ML->getMask(), DAG.getUNDEF(VT),
+      ML->getMemoryVT(), ML->getMemOperand(), ML->getExtensionType());
+  SDValue Blend =
+      DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getPassThru());
 
   return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
 }
@@ -39195,15 +39781,15 @@
   unsigned ToSz = VT.getScalarSizeInBits();
   unsigned FromSz = LdVT.getScalarSizeInBits();
   // From/To sizes and ElemCount must be pow of two.
-  assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
-    "Unexpected size for extending masked load");
+  assert(isPowerOf2_32(NumElems * FromSz * ToSz) &&
+         "Unexpected size for extending masked load");
 
-  unsigned SizeRatio  = ToSz / FromSz;
+  unsigned SizeRatio = ToSz / FromSz;
   assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
 
   // Create a type on which we perform the shuffle.
-  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
-          LdVT.getScalarType(), NumElems*SizeRatio);
+  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), LdVT.getScalarType(),
+                                   NumElems * SizeRatio);
   assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
 
   // Convert PassThru value.
@@ -39217,7 +39803,7 @@
     assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
            "WideVecVT should be legal");
     WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru,
-                                    DAG.getUNDEF(WideVecVT), ShuffleVec);
+                                        DAG.getUNDEF(WideVecVT), ShuffleVec);
   }
 
   // Prepare the new mask.
@@ -39231,15 +39817,13 @@
       ShuffleVec[i] = i * SizeRatio;
     for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
       ShuffleVec[i] = NumElems * SizeRatio;
-    NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
-                                   DAG.getConstant(0, dl, WideVecVT),
-                                   ShuffleVec);
+    NewMask = DAG.getVectorShuffle(
+        WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), ShuffleVec);
   } else {
     assert(Mask.getValueType().getVectorElementType() == MVT::i1);
-    unsigned WidenNumElts = NumElems*SizeRatio;
+    unsigned WidenNumElts = NumElems * SizeRatio;
     unsigned MaskNumElts = VT.getVectorNumElements();
-    EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(),  MVT::i1,
-                                     WidenNumElts);
+    EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WidenNumElts);
 
     unsigned NumConcat = WidenNumElts / MaskNumElts;
     SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
@@ -39248,10 +39832,9 @@
     NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
   }
 
-  SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
-                                     Mld->getBasePtr(), NewMask, WidePassThru,
-                                     Mld->getMemoryVT(), Mld->getMemOperand(),
-                                     ISD::NON_EXTLOAD);
+  SDValue WideLd = DAG.getMaskedLoad(
+      WideVecVT, dl, Mld->getChain(), Mld->getBasePtr(), NewMask, WidePassThru,
+      Mld->getMemoryVT(), Mld->getMemOperand(), ISD::NON_EXTLOAD);
 
   SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd);
   SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
@@ -39287,8 +39870,8 @@
   SDLoc DL(MS);
   EVT VT = MS->getValue().getValueType();
   EVT EltVT = VT.getVectorElementType();
-  SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
-                                MS->getValue(), VecIndex);
+  SDValue Extract =
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, MS->getValue(), VecIndex);
 
   // Store that element at the appropriate offset from the base pointer.
   return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(),
@@ -39329,8 +39912,8 @@
         TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
                               Mst->getMemoryVT())) {
       return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
-                                Mst->getBasePtr(), Mask,
-                                Mst->getMemoryVT(), Mst->getMemOperand(), true);
+                                Mst->getBasePtr(), Mask, Mst->getMemoryVT(),
+                                Mst->getMemOperand(), true);
     }
 
     return SDValue();
@@ -39351,19 +39934,19 @@
     return SDValue();
 
   // From/To sizes and ElemCount must be pow of two.
-  assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
-    "Unexpected size for truncating masked store");
+  assert(isPowerOf2_32(NumElems * FromSz * ToSz) &&
+         "Unexpected size for truncating masked store");
   // We are going to use the original vector elt for storing.
   // Accumulated smaller vector elements must be a multiple of the store size.
-  assert (((NumElems * FromSz) % ToSz) == 0 &&
-          "Unexpected ratio for truncating masked store");
+  assert(((NumElems * FromSz) % ToSz) == 0 &&
+         "Unexpected ratio for truncating masked store");
 
-  unsigned SizeRatio  = FromSz / ToSz;
+  unsigned SizeRatio = FromSz / ToSz;
   assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
 
   // Create a type on which we perform the shuffle.
-  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
-          StVT.getScalarType(), NumElems*SizeRatio);
+  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+                                   NumElems * SizeRatio);
 
   assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
 
@@ -39376,9 +39959,8 @@
   assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
          "WideVecVT should be legal");
 
-  SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
-                                              DAG.getUNDEF(WideVecVT),
-                                              ShuffleVec);
+  SDValue TruncatedVal = DAG.getVectorShuffle(
+      WideVecVT, dl, WideVec, DAG.getUNDEF(WideVecVT), ShuffleVec);
 
   SDValue NewMask;
   SDValue Mask = Mst->getMask();
@@ -39387,17 +39969,15 @@
     NewMask = DAG.getBitcast(WideVecVT, Mask);
     for (unsigned i = 0; i != NumElems; ++i)
       ShuffleVec[i] = i * SizeRatio;
-    for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
-      ShuffleVec[i] = NumElems*SizeRatio;
-    NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
-                                   DAG.getConstant(0, dl, WideVecVT),
-                                   ShuffleVec);
+    for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
+      ShuffleVec[i] = NumElems * SizeRatio;
+    NewMask = DAG.getVectorShuffle(
+        WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), ShuffleVec);
   } else {
     assert(Mask.getValueType().getVectorElementType() == MVT::i1);
-    unsigned WidenNumElts = NumElems*SizeRatio;
+    unsigned WidenNumElts = NumElems * SizeRatio;
     unsigned MaskNumElts = VT.getVectorNumElements();
-    EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(),  MVT::i1,
-                                     WidenNumElts);
+    EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WidenNumElts);
 
     unsigned NumConcat = WidenNumElts / MaskNumElts;
     SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
@@ -39456,15 +40036,16 @@
 
   // Turn vXi1 stores of constants into a scalar store.
   if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 ||
-       VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) &&
+       VT == MVT::v64i1) &&
+      VT == StVT && TLI.isTypeLegal(VT) &&
       ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) {
     // If its a v64i1 store without 64-bit support, we need two stores.
     if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
-      SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl,
-                                      StoredVal->ops().slice(0, 32));
+      SDValue Lo =
+          DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(0, 32));
       Lo = combinevXi1ConstantToInteger(Lo, DAG);
-      SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl,
-                                      StoredVal->ops().slice(32, 32));
+      SDValue Hi =
+          DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(32, 32));
       Hi = combinevXi1ConstantToInteger(Hi, DAG);
 
       unsigned Alignment = St->getAlignment();
@@ -39475,11 +40056,9 @@
       SDValue Ch0 =
           DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(),
                        Alignment, St->getMemOperand()->getFlags());
-      SDValue Ch1 =
-          DAG.getStore(St->getChain(), dl, Hi, Ptr1,
-                       St->getPointerInfo().getWithOffset(4),
-                       MinAlign(Alignment, 4U),
-                       St->getMemOperand()->getFlags());
+      SDValue Ch1 = DAG.getStore(
+          St->getChain(), dl, Hi, Ptr1, St->getPointerInfo().getWithOffset(4),
+          MinAlign(Alignment, 4U), St->getMemOperand()->getFlags());
       return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
     }
 
@@ -39492,11 +40071,9 @@
   // If we are saving a concatenation of two XMM registers and 32-byte stores
   // are slow, such as on Sandy Bridge, perform two 16-byte stores.
   bool Fast;
-  unsigned AddressSpace = St->getAddressSpace();
-  unsigned Alignment = St->getAlignment();
   if (VT.is256BitVector() && StVT == VT &&
       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
-                             AddressSpace, Alignment, &Fast) &&
+                             *St->getMemOperand(), &Fast) &&
       !Fast) {
     unsigned NumElems = VT.getVectorNumElements();
     if (NumElems < 2)
@@ -39518,17 +40095,16 @@
                           St->getPointerInfo(), St->getAlignment(),
                           St->getMemOperand()->getFlags());
 
-    if (SDValue Val =
-        detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
-                                TLI))
-      return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
-                             dl, Val, St->getBasePtr(),
-                             St->getMemoryVT(), St->getMemOperand(), DAG);
+    if (SDValue Val = detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(),
+                                              Subtarget, TLI))
+      return EmitTruncSStore(true /* Signed saturation */, St->getChain(), dl,
+                             Val, St->getBasePtr(), St->getMemoryVT(),
+                             St->getMemOperand(), DAG);
     if (SDValue Val = detectAVX512USatPattern(St->getValue(), St->getMemoryVT(),
                                               DAG, dl, Subtarget, TLI))
       return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
-                             dl, Val, St->getBasePtr(),
-                             St->getMemoryVT(), St->getMemOperand(), DAG);
+                             dl, Val, St->getBasePtr(), St->getMemoryVT(),
+                             St->getMemOperand(), DAG);
 
     unsigned NumElems = VT.getVectorNumElements();
     assert(StVT != VT && "Cannot truncate to the same type");
@@ -39543,18 +40119,20 @@
       return SDValue();
 
     // From, To sizes and ElemCount must be pow of two
-    if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
+    if (!isPowerOf2_32(NumElems * FromSz * ToSz))
+      return SDValue();
     // We are going to use the original vector elt for storing.
     // Accumulated smaller vector elements must be a multiple of the store size.
-    if (0 != (NumElems * FromSz) % ToSz) return SDValue();
+    if (0 != (NumElems * FromSz) % ToSz)
+      return SDValue();
 
-    unsigned SizeRatio  = FromSz / ToSz;
+    unsigned SizeRatio = FromSz / ToSz;
 
     assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
 
     // Create a type on which we perform the shuffle
-    EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
-            StVT.getScalarType(), NumElems*SizeRatio);
+    EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+                                     NumElems * SizeRatio);
 
     assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
 
@@ -39568,8 +40146,7 @@
       return SDValue();
 
     SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
-                                         DAG.getUNDEF(WideVecVT),
-                                         ShuffleVec);
+                                         DAG.getUNDEF(WideVecVT), ShuffleVec);
     // At this point all of the data is stored at the bottom of the
     // register. We now need to save it to mem.
 
@@ -39586,18 +40163,19 @@
       StoreType = MVT::f64;
 
     // Bitcast the original vector into a vector of store-size units
-    EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
-            StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
+    EVT StoreVecVT =
+        EVT::getVectorVT(*DAG.getContext(), StoreType,
+                         VT.getSizeInBits() / StoreType.getSizeInBits());
     assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
     SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
     SmallVector<SDValue, 8> Chains;
     SDValue Ptr = St->getBasePtr();
 
     // Perform one or more big stores into memory.
-    for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
-      SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                   StoreType, ShuffWide,
-                                   DAG.getIntPtrConstant(i, dl));
+    for (unsigned i = 0, e = (ToSz * NumElems) / StoreType.getSizeInBits();
+         i != e; ++i) {
+      SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StoreType,
+                                   ShuffWide, DAG.getIntPtrConstant(i, dl));
       SDValue Ch =
           DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(),
                        St->getAlignment(), St->getMemOperand()->getFlags());
@@ -39644,8 +40222,9 @@
     // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
     // pair instead.
     if (Subtarget.is64Bit() || F64IsLegal) {
-      MVT LdVT = (Subtarget.is64Bit() &&
-                  (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64;
+      MVT LdVT = (Subtarget.is64Bit() && (!VT.isFloatingPoint() || !F64IsLegal))
+                     ? MVT::i64
+                     : MVT::f64;
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
                                   Ld->getMemOperand());
 
@@ -40122,8 +40701,7 @@
 static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
                             SelectionDAG &DAG, const X86Subtarget &Subtarget) {
   // First instruction should be a right shift of a multiply.
-  if (Src.getOpcode() != ISD::SRL ||
-      Src.getOperand(0).getOpcode() != ISD::MUL)
+  if (Src.getOpcode() != ISD::SRL || Src.getOperand(0).getOpcode() != ISD::MUL)
     return SDValue();
 
   if (!Subtarget.hasSSE2())
@@ -40176,8 +40754,7 @@
 // (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),
 //                 (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))
 static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
-                               const X86Subtarget &Subtarget,
-                               const SDLoc &DL) {
+                               const X86Subtarget &Subtarget, const SDLoc &DL) {
   if (!VT.isVector() || !Subtarget.hasSSSE3())
     return SDValue();
 
@@ -40273,8 +40850,8 @@
       std::swap(IdxN01, IdxN11);
     }
     // N0 indices be the even element. N1 indices must be the next odd element.
-    if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
-        IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
+    if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i ||
+        IdxN11 != 2 * i + 1)
       return SDValue();
     SDValue N00In = N00Elt.getOperand(0);
     SDValue N01In = N01Elt.getOperand(0);
@@ -40285,8 +40862,8 @@
       ZExtIn = N00In;
       SExtIn = N01In;
     }
-    if (ZExtIn != N00In || SExtIn != N01In ||
-        ZExtIn != N10In || SExtIn != N11In)
+    if (ZExtIn != N00In || SExtIn != N01In || ZExtIn != N10In ||
+        SExtIn != N11In)
       return SDValue();
   }
 
@@ -40295,14 +40872,13 @@
     // Shrink by adding truncate nodes and let DAGCombine fold with the
     // sources.
     EVT InVT = Ops[0].getValueType();
-    assert(InVT.getScalarType() == MVT::i8 &&
-           "Unexpected scalar element type");
+    assert(InVT.getScalarType() == MVT::i8 && "Unexpected scalar element type");
     assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
     EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
                                  InVT.getVectorNumElements() / 2);
     return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);
   };
-  return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },
+  return SplitOpsAndApply(DAG, Subtarget, DL, VT, {ZExtIn, SExtIn},
                           PMADDBuilder);
 }
 
@@ -40451,22 +41027,38 @@
   unsigned NewOpcode = 0;
   if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) {
     switch (Arg.getOpcode()) {
-    case ISD::FMA:             NewOpcode = X86ISD::FNMSUB;       break;
-    case X86ISD::FMSUB:        NewOpcode = X86ISD::FNMADD;       break;
-    case X86ISD::FNMADD:       NewOpcode = X86ISD::FMSUB;        break;
-    case X86ISD::FNMSUB:       NewOpcode = ISD::FMA;             break;
-    case X86ISD::FMADD_RND:    NewOpcode = X86ISD::FNMSUB_RND;   break;
-    case X86ISD::FMSUB_RND:    NewOpcode = X86ISD::FNMADD_RND;   break;
-    case X86ISD::FNMADD_RND:   NewOpcode = X86ISD::FMSUB_RND;    break;
-    case X86ISD::FNMSUB_RND:   NewOpcode = X86ISD::FMADD_RND;    break;
-    // We can't handle scalar intrinsic node here because it would only
-    // invert one element and not the whole vector. But we could try to handle
-    // a negation of the lower element only.
+    case ISD::FMA:
+      NewOpcode = X86ISD::FNMSUB;
+      break;
+    case X86ISD::FMSUB:
+      NewOpcode = X86ISD::FNMADD;
+      break;
+    case X86ISD::FNMADD:
+      NewOpcode = X86ISD::FMSUB;
+      break;
+    case X86ISD::FNMSUB:
+      NewOpcode = ISD::FMA;
+      break;
+    case X86ISD::FMADD_RND:
+      NewOpcode = X86ISD::FNMSUB_RND;
+      break;
+    case X86ISD::FMSUB_RND:
+      NewOpcode = X86ISD::FNMADD_RND;
+      break;
+    case X86ISD::FNMADD_RND:
+      NewOpcode = X86ISD::FMSUB_RND;
+      break;
+    case X86ISD::FNMSUB_RND:
+      NewOpcode = X86ISD::FMADD_RND;
+      break;
+      // We can't handle scalar intrinsic node here because it would only
+      // invert one element and not the whole vector. But we could try to handle
+      // a negation of the lower element only.
     }
   }
   if (NewOpcode)
-    return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT,
-                                              Arg.getNode()->ops()));
+    return DAG.getBitcast(OrigVT,
+                          DAG.getNode(NewOpcode, DL, VT, Arg.getNode()->ops()));
 
   return SDValue();
 }
@@ -40488,17 +41080,25 @@
   SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
   unsigned IntOpcode;
   switch (N->getOpcode()) {
-  default: llvm_unreachable("Unexpected FP logic op");
-  case X86ISD::FOR:   IntOpcode = ISD::OR; break;
-  case X86ISD::FXOR:  IntOpcode = ISD::XOR; break;
-  case X86ISD::FAND:  IntOpcode = ISD::AND; break;
-  case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+  default:
+    llvm_unreachable("Unexpected FP logic op");
+  case X86ISD::FOR:
+    IntOpcode = ISD::OR;
+    break;
+  case X86ISD::FXOR:
+    IntOpcode = ISD::XOR;
+    break;
+  case X86ISD::FAND:
+    IntOpcode = ISD::AND;
+    break;
+  case X86ISD::FANDN:
+    IntOpcode = X86ISD::ANDNP;
+    break;
   }
   SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
   return DAG.getBitcast(VT, IntOp);
 }
 
-
 /// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val)
 static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) {
   if (N->getOpcode() != ISD::XOR)
@@ -40688,13 +41288,18 @@
   // into FMINC and FMAXC, which are Commutative operations.
   unsigned NewOp = 0;
   switch (N->getOpcode()) {
-    default: llvm_unreachable("unknown opcode");
-    case X86ISD::FMIN:  NewOp = X86ISD::FMINC; break;
-    case X86ISD::FMAX:  NewOp = X86ISD::FMAXC; break;
+  default:
+    llvm_unreachable("unknown opcode");
+  case X86ISD::FMIN:
+    NewOp = X86ISD::FMINC;
+    break;
+  case X86ISD::FMAX:
+    NewOp = X86ISD::FMAXC;
+    break;
   }
 
-  return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
-                     N->getOperand(0), N->getOperand(1));
+  return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), N->getOperand(0),
+                     N->getOperand(1));
 }
 
 static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
@@ -40732,8 +41337,8 @@
   if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
-  EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
-                                         VT);
+  EVT SetCCType =
+      TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 
   // There are 4 possibilities involving NaN inputs, and these are the required
   // outputs:
@@ -40793,8 +41398,8 @@
   // Turn ANDNP back to AND if input is inverted.
   if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR &&
       ISD::isBuildVectorAllOnes(N->getOperand(0).getOperand(1).getNode())) {
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       N->getOperand(0).getOperand(0), N->getOperand(1));
+    return DAG.getNode(ISD::AND, SDLoc(N), VT, N->getOperand(0).getOperand(0),
+                       N->getOperand(1));
   }
 
   // Attempt to recursively combine a bitmask ANDNP with shuffles.
@@ -40902,7 +41507,7 @@
   //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
   // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
   if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
-      N0.getOpcode() == ISD::SIGN_EXTEND)) {
+                           N0.getOpcode() == ISD::SIGN_EXTEND)) {
     SDValue N00 = N0.getOperand(0);
 
     // EXTLOAD has a better solution on AVX2,
@@ -40912,8 +41517,8 @@
         return SDValue();
 
     if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
-        SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
-                                  N00, N1);
+      SDValue Tmp =
+          DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1);
       return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
     }
   }
@@ -41091,8 +41696,7 @@
     //   i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections.
     assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale");
     unsigned Scale = NumElts / EltSizeInBits;
-    EVT BroadcastVT =
-        EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits);
+    EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits);
     Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
     Vec = DAG.getBitcast(VT, Vec);
 
@@ -41268,8 +41872,8 @@
 
   // Only combine legal element types.
   EVT SVT = VT.getVectorElementType();
-  if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
-      SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
+  if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && SVT != MVT::i64 &&
+      SVT != MVT::f32 && SVT != MVT::f64)
     return SDValue();
 
   // We can only do this if the vector size in 256 bits or less.
@@ -41343,29 +41947,63 @@
 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
   if (NegMul) {
     switch (Opcode) {
-    default: llvm_unreachable("Unexpected opcode");
-    case ISD::FMA:             Opcode = X86ISD::FNMADD;       break;
-    case X86ISD::FMADD_RND:    Opcode = X86ISD::FNMADD_RND;   break;
-    case X86ISD::FMSUB:        Opcode = X86ISD::FNMSUB;       break;
-    case X86ISD::FMSUB_RND:    Opcode = X86ISD::FNMSUB_RND;   break;
-    case X86ISD::FNMADD:       Opcode = ISD::FMA;             break;
-    case X86ISD::FNMADD_RND:   Opcode = X86ISD::FMADD_RND;    break;
-    case X86ISD::FNMSUB:       Opcode = X86ISD::FMSUB;        break;
-    case X86ISD::FNMSUB_RND:   Opcode = X86ISD::FMSUB_RND;    break;
+    default:
+      llvm_unreachable("Unexpected opcode");
+    case ISD::FMA:
+      Opcode = X86ISD::FNMADD;
+      break;
+    case X86ISD::FMADD_RND:
+      Opcode = X86ISD::FNMADD_RND;
+      break;
+    case X86ISD::FMSUB:
+      Opcode = X86ISD::FNMSUB;
+      break;
+    case X86ISD::FMSUB_RND:
+      Opcode = X86ISD::FNMSUB_RND;
+      break;
+    case X86ISD::FNMADD:
+      Opcode = ISD::FMA;
+      break;
+    case X86ISD::FNMADD_RND:
+      Opcode = X86ISD::FMADD_RND;
+      break;
+    case X86ISD::FNMSUB:
+      Opcode = X86ISD::FMSUB;
+      break;
+    case X86ISD::FNMSUB_RND:
+      Opcode = X86ISD::FMSUB_RND;
+      break;
     }
   }
 
   if (NegAcc) {
     switch (Opcode) {
-    default: llvm_unreachable("Unexpected opcode");
-    case ISD::FMA:             Opcode = X86ISD::FMSUB;        break;
-    case X86ISD::FMADD_RND:    Opcode = X86ISD::FMSUB_RND;    break;
-    case X86ISD::FMSUB:        Opcode = ISD::FMA;             break;
-    case X86ISD::FMSUB_RND:    Opcode = X86ISD::FMADD_RND;    break;
-    case X86ISD::FNMADD:       Opcode = X86ISD::FNMSUB;       break;
-    case X86ISD::FNMADD_RND:   Opcode = X86ISD::FNMSUB_RND;   break;
-    case X86ISD::FNMSUB:       Opcode = X86ISD::FNMADD;       break;
-    case X86ISD::FNMSUB_RND:   Opcode = X86ISD::FNMADD_RND;   break;
+    default:
+      llvm_unreachable("Unexpected opcode");
+    case ISD::FMA:
+      Opcode = X86ISD::FMSUB;
+      break;
+    case X86ISD::FMADD_RND:
+      Opcode = X86ISD::FMSUB_RND;
+      break;
+    case X86ISD::FMSUB:
+      Opcode = ISD::FMA;
+      break;
+    case X86ISD::FMSUB_RND:
+      Opcode = X86ISD::FMADD_RND;
+      break;
+    case X86ISD::FNMADD:
+      Opcode = X86ISD::FNMSUB;
+      break;
+    case X86ISD::FNMADD_RND:
+      Opcode = X86ISD::FNMSUB_RND;
+      break;
+    case X86ISD::FNMSUB:
+      Opcode = X86ISD::FNMADD;
+      break;
+    case X86ISD::FNMSUB_RND:
+      Opcode = X86ISD::FNMADD_RND;
+      break;
     }
   }
 
@@ -41437,11 +42075,20 @@
 
   unsigned NewOpcode;
   switch (N->getOpcode()) {
-  default: llvm_unreachable("Unexpected opcode!");
-  case X86ISD::FMADDSUB:     NewOpcode = X86ISD::FMSUBADD;     break;
-  case X86ISD::FMADDSUB_RND: NewOpcode = X86ISD::FMSUBADD_RND; break;
-  case X86ISD::FMSUBADD:     NewOpcode = X86ISD::FMADDSUB;     break;
-  case X86ISD::FMSUBADD_RND: NewOpcode = X86ISD::FMADDSUB_RND; break;
+  default:
+    llvm_unreachable("Unexpected opcode!");
+  case X86ISD::FMADDSUB:
+    NewOpcode = X86ISD::FMSUBADD;
+    break;
+  case X86ISD::FMADDSUB_RND:
+    NewOpcode = X86ISD::FMSUBADD_RND;
+    break;
+  case X86ISD::FMSUBADD:
+    NewOpcode = X86ISD::FMADDSUB;
+    break;
+  case X86ISD::FMSUBADD_RND:
+    NewOpcode = X86ISD::FMADDSUB_RND;
+    break;
   }
 
   if (N->getNumOperands() == 4)
@@ -41462,8 +42109,7 @@
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (N0.getOpcode() == ISD::AND &&
-      N0.hasOneUse() &&
+  if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
       N0.getOperand(0).hasOneUse()) {
     SDValue N00 = N0.getOperand(0);
     if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
@@ -41476,8 +42122,7 @@
     }
   }
 
-  if (N0.getOpcode() == ISD::TRUNCATE &&
-      N0.hasOneUse() &&
+  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
       N0.getOperand(0).hasOneUse()) {
     SDValue N00 = N0.getOperand(0);
     if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
@@ -41570,9 +42215,8 @@
   if ((OpSize == 128 && Subtarget.hasSSE2()) ||
       (OpSize == 256 && Subtarget.hasAVX2()) ||
       (OpSize == 512 && Subtarget.useAVX512Regs())) {
-    EVT VecVT = OpSize == 512 ? MVT::v16i32 :
-                OpSize == 256 ? MVT::v32i8 :
-                                MVT::v16i8;
+    EVT VecVT =
+        OpSize == 512 ? MVT::v16i32 : OpSize == 256 ? MVT::v32i8 : MVT::v16i8;
     EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
     SDValue Cmp;
     if (IsOrXorXorCCZero) {
@@ -41602,8 +42246,8 @@
     // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
     // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
     SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
-    SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
-                                    MVT::i32);
+    SDValue FFFFs =
+        DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL, MVT::i32);
     return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
   }
 
@@ -41679,8 +42323,8 @@
        VT.getVectorNumElements() > 4) &&
       (OpVT.getVectorElementType() == MVT::i8 ||
        OpVT.getVectorElementType() == MVT::i16)) {
-    SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
-                                N->getOperand(2));
+    SDValue Setcc =
+        DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS, N->getOperand(2));
     return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
   }
 
@@ -41769,8 +42413,9 @@
     unsigned ScalarSize = Index.getScalarValueSizeInBits();
     if (ScalarSize != 32 && ScalarSize != 64) {
       MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32;
-      EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
-                                   Index.getValueType().getVectorNumElements());
+      EVT IndexVT =
+          EVT::getVectorVT(*DAG.getContext(), EltVT,
+                           Index.getValueType().getVectorNumElements());
       Index = DAG.getSExtOrTrunc(Index, DL, IndexVT);
       SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
       NewOps[4] = Index;
@@ -41990,7 +42635,7 @@
   assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
 
   for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
-         UI != UE; ++UI) {
+       UI != UE; ++UI) {
     SDNode *User = *UI;
 
     X86::CondCode CC;
@@ -42011,12 +42656,18 @@
     }
 
     switch (CC) {
-    default: break;
-    case X86::COND_A: case X86::COND_AE:
-    case X86::COND_B: case X86::COND_BE:
-    case X86::COND_O: case X86::COND_NO:
-    case X86::COND_G: case X86::COND_GE:
-    case X86::COND_L: case X86::COND_LE:
+    default:
+      break;
+    case X86::COND_A:
+    case X86::COND_AE:
+    case X86::COND_B:
+    case X86::COND_BE:
+    case X86::COND_O:
+    case X86::COND_NO:
+    case X86::COND_G:
+    case X86::COND_GE:
+    case X86::COND_L:
+    case X86::COND_LE:
       return true;
     }
   }
@@ -42028,7 +42679,7 @@
   assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
 
   for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
-         UI != UE; ++UI) {
+       UI != UE; ++UI) {
     SDNode *User = *UI;
 
     unsigned CCOpNo;
@@ -42036,10 +42687,18 @@
     default:
       // Be conservative.
       return false;
-    case X86ISD::SETCC:       CCOpNo = 0; break;
-    case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
-    case X86ISD::BRCOND:      CCOpNo = 2; break;
-    case X86ISD::CMOV:        CCOpNo = 2; break;
+    case X86ISD::SETCC:
+      CCOpNo = 0;
+      break;
+    case X86ISD::SETCC_CARRY:
+      CCOpNo = 0;
+      break;
+    case X86ISD::BRCOND:
+      CCOpNo = 2;
+      break;
+    case X86ISD::CMOV:
+      CCOpNo = 2;
+      break;
     }
 
     X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo);
@@ -42097,7 +42756,8 @@
 
   unsigned NewOpc;
   switch (Op.getOpcode()) {
-  default: return SDValue();
+  default:
+    return SDValue();
   case ISD::AND:
     // Skip and with constant. We have special handling for and with immediate
     // during isel to generate test instructions.
@@ -42105,8 +42765,12 @@
       return SDValue();
     NewOpc = X86ISD::AND;
     break;
-  case ISD::OR:  NewOpc = X86ISD::OR;  break;
-  case ISD::XOR: NewOpc = X86ISD::XOR; break;
+  case ISD::OR:
+    NewOpc = X86ISD::OR;
+    break;
+  case ISD::XOR:
+    NewOpc = X86ISD::XOR;
+    break;
   case ISD::ADD:
     // If the carry or overflow flag is used, we can't truncate.
     if (needCarryOrOverflowFlag(SDValue(N, 0)))
@@ -42176,9 +42840,8 @@
   if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) {
     MVT VT = N->getSimpleValueType(0);
     SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-    return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs,
-                       N->getOperand(0), N->getOperand(1),
-                       Flags);
+    return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, N->getOperand(0),
+                       N->getOperand(1), Flags);
   }
 
   // Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry)
@@ -42199,29 +42862,27 @@
   // If the LHS and RHS of the ADC node are zero, then it can't overflow and
   // the result is either zero or one (depending on the input carry bit).
   // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
-  if (X86::isZeroNode(N->getOperand(0)) &&
-      X86::isZeroNode(N->getOperand(1)) &&
+  if (X86::isZeroNode(N->getOperand(0)) && X86::isZeroNode(N->getOperand(1)) &&
       // We don't have a good way to replace an EFLAGS use, so only do this when
       // dead right now.
       SDValue(N, 1).use_empty()) {
     SDLoc DL(N);
     EVT VT = N->getValueType(0);
     SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
-    SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
-                               DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
-                                           DAG.getConstant(X86::COND_B, DL,
-                                                           MVT::i8),
-                                           N->getOperand(2)),
-                               DAG.getConstant(1, DL, VT));
+    SDValue Res1 =
+        DAG.getNode(ISD::AND, DL, VT,
+                    DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+                                DAG.getConstant(X86::COND_B, DL, MVT::i8),
+                                N->getOperand(2)),
+                    DAG.getConstant(1, DL, VT));
     return DCI.CombineTo(N, Res1, CarryOut);
   }
 
   if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) {
     MVT VT = N->getSimpleValueType(0);
     SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-    return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs,
-                       N->getOperand(0), N->getOperand(1),
-                       Flags);
+    return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, N->getOperand(0),
+                       N->getOperand(1), Flags);
   }
 
   return SDValue();
@@ -42315,9 +42976,9 @@
     if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
         EFLAGS.getValueType().isInteger() &&
         !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
-      SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
-                                   EFLAGS.getNode()->getVTList(),
-                                   EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+      SDValue NewSub =
+          DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+                      EFLAGS.getOperand(1), EFLAGS.getOperand(0));
       SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
       return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
                          DAG.getVTList(VT, MVT::i32), X,
@@ -42411,8 +43072,8 @@
     return SDValue();
 
   SDLoc DL(N);
-  EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
-                                   VT.getVectorNumElements());
+  EVT ReducedVT =
+      EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements());
   EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
                                 VT.getVectorNumElements() / 2);
 
@@ -42428,8 +43089,8 @@
     SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0));
     SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1));
 
-    SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
-                                   PMADDWDBuilder);
+    SDValue Madd =
+        SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, {N0, N1}, PMADDWDBuilder);
     // Fill the rest of the output with 0
     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd,
                        DAG.getConstant(0, DL, MAddVT));
@@ -42517,8 +43178,7 @@
   Op0 = BuildPSADBW(SadOp0, SadOp1);
 
   // It's possible we have a sad on the other side too.
-  if (Op1.getOpcode() == ISD::ABS &&
-      detectZextAbsDiff(Op1, SadOp0, SadOp1)) {
+  if (Op1.getOpcode() == ISD::ABS && detectZextAbsDiff(Op1, SadOp0, SadOp1)) {
     Op1 = BuildPSADBW(SadOp0, SadOp1);
   }
 
@@ -42655,8 +43315,7 @@
                        DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[1]));
   };
   return SplitOpsAndApply(DAG, Subtarget, DL, VT,
-                          { Mul.getOperand(0), Mul.getOperand(1) },
-                          PMADDBuilder);
+                          {Mul.getOperand(0), Mul.getOperand(1)}, PMADDBuilder);
 }
 
 // Attempt to turn this pattern into PMADDWD.
@@ -42742,8 +43401,8 @@
       std::swap(IdxN01, IdxN11);
     }
     // N0 indices be the even element. N1 indices must be the next odd element.
-    if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
-        IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
+    if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i ||
+        IdxN11 != 2 * i + 1)
       return SDValue();
     SDValue N00In = N00Elt.getOperand(0);
     SDValue N01In = N01Elt.getOperand(0);
@@ -42776,8 +43435,7 @@
                                  OpVT.getVectorNumElements() / 2);
     return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]);
   };
-  return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 },
-                          PMADDBuilder);
+  return SplitOpsAndApply(DAG, Subtarget, DL, VT, {In0, In1}, PMADDBuilder);
 }
 
 static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
@@ -42867,8 +43525,8 @@
   // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
   // special preprocessing in some cases.
   if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
-                            { SubusLHS, SubusRHS }, USUBSATBuilder);
+    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {SubusLHS, SubusRHS},
+                            USUBSATBuilder);
 
   // Special preprocessing case can be only applied
   // if the value was zero extended from 16 bit,
@@ -42897,9 +43555,8 @@
   SDValue NewSubusLHS =
       DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType);
   SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
-  SDValue Psubus =
-      SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
-                       { NewSubusLHS, NewSubusRHS }, USUBSATBuilder);
+  SDValue Psubus = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
+                                    {NewSubusLHS, NewSubusRHS}, USUBSATBuilder);
   // Zero extend the result, it may be used somewhere as 32 bit,
   // if not zext and following trunc will shrink.
   return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
@@ -42920,8 +43577,7 @@
         isa<ConstantSDNode>(Op1.getOperand(1))) {
       const APInt &XorC = Op1.getConstantOperandAPInt(1);
       EVT VT = Op0.getValueType();
-      SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
-                                   Op1.getOperand(0),
+      SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0),
                                    DAG.getConstant(~XorC, SDLoc(Op1), VT));
       return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor,
                          DAG.getConstant(C->getAPIntValue() + 1, SDLoc(N), VT));
@@ -42990,11 +43646,9 @@
   // If needed, look through bitcasts to get to the load.
   if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
     bool Fast;
-    unsigned Alignment = FirstLd->getAlignment();
-    unsigned AS = FirstLd->getAddressSpace();
     const X86TargetLowering *TLI = Subtarget.getTargetLowering();
-    if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, AS,
-                                Alignment, &Fast) &&
+    if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+                                *FirstLd->getMemOperand(), &Fast) &&
         Fast) {
       if (SDValue Ld =
               EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))
@@ -43257,9 +43911,9 @@
   SDValue WideVec = peekThroughBitcasts(N->getOperand(0));
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
-      TLI.isTypeLegal(WideVecVT) &&
-      WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) {
-    auto isConcatenatedNot = [] (SDValue V) {
+      TLI.isTypeLegal(WideVecVT) && WideVecVT.getSizeInBits() == 256 &&
+      WideVec.getOpcode() == ISD::AND) {
+    auto isConcatenatedNot = [](SDValue V) {
       V = peekThroughBitcasts(V);
       if (!isBitwiseNot(V))
         return false;
@@ -43428,8 +44082,8 @@
 
   // Combine (ext_invec (ext_invec X)) -> (ext_invec X)
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (In.getOpcode() == N->getOpcode() &&
-      TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType()))
+  if (In.getOpcode() == N->getOpcode() && TLI.isTypeLegal(VT) &&
+      TLI.isTypeLegal(In.getOperand(0).getValueType()))
     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0));
 
   // Attempt to combine as a shuffle.
@@ -43448,7 +44102,8 @@
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   switch (N->getOpcode()) {
-  default: break;
+  default:
+    break;
   case ISD::SCALAR_TO_VECTOR:
     return combineScalarToVector(N, DAG);
   case ISD::EXTRACT_VECTOR_ELT:
@@ -43463,57 +44118,99 @@
     return combineExtractSubvector(N, DAG, DCI, Subtarget);
   case ISD::VSELECT:
   case ISD::SELECT:
-  case X86ISD::BLENDV:      return combineSelect(N, DAG, DCI, Subtarget);
-  case ISD::BITCAST:        return combineBitcast(N, DAG, DCI, Subtarget);
-  case X86ISD::CMOV:        return combineCMov(N, DAG, DCI, Subtarget);
-  case X86ISD::CMP:         return combineCMP(N, DAG);
-  case ISD::ADD:            return combineAdd(N, DAG, Subtarget);
-  case ISD::SUB:            return combineSub(N, DAG, Subtarget);
+  case X86ISD::BLENDV:
+    return combineSelect(N, DAG, DCI, Subtarget);
+  case ISD::BITCAST:
+    return combineBitcast(N, DAG, DCI, Subtarget);
+  case X86ISD::CMOV:
+    return combineCMov(N, DAG, DCI, Subtarget);
+  case X86ISD::CMP:
+    return combineCMP(N, DAG);
+  case ISD::ADD:
+    return combineAdd(N, DAG, Subtarget);
+  case ISD::SUB:
+    return combineSub(N, DAG, Subtarget);
   case X86ISD::ADD:
-  case X86ISD::SUB:         return combineX86AddSub(N, DAG, DCI);
-  case X86ISD::SBB:         return combineSBB(N, DAG);
-  case X86ISD::ADC:         return combineADC(N, DAG, DCI);
-  case ISD::MUL:            return combineMul(N, DAG, DCI, Subtarget);
-  case ISD::SHL:            return combineShiftLeft(N, DAG);
-  case ISD::SRA:            return combineShiftRightArithmetic(N, DAG);
-  case ISD::SRL:            return combineShiftRightLogical(N, DAG, DCI);
-  case ISD::AND:            return combineAnd(N, DAG, DCI, Subtarget);
-  case ISD::OR:             return combineOr(N, DAG, DCI, Subtarget);
-  case ISD::XOR:            return combineXor(N, DAG, DCI, Subtarget);
-  case X86ISD::BEXTR:       return combineBEXTR(N, DAG, DCI, Subtarget);
-  case ISD::LOAD:           return combineLoad(N, DAG, DCI, Subtarget);
-  case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
-  case ISD::STORE:          return combineStore(N, DAG, Subtarget);
-  case ISD::MSTORE:         return combineMaskedStore(N, DAG, DCI, Subtarget);
-  case ISD::SINT_TO_FP:     return combineSIntToFP(N, DAG, Subtarget);
-  case ISD::UINT_TO_FP:     return combineUIntToFP(N, DAG, Subtarget);
+  case X86ISD::SUB:
+    return combineX86AddSub(N, DAG, DCI);
+  case X86ISD::SBB:
+    return combineSBB(N, DAG);
+  case X86ISD::ADC:
+    return combineADC(N, DAG, DCI);
+  case ISD::MUL:
+    return combineMul(N, DAG, DCI, Subtarget);
+  case ISD::SHL:
+    return combineShiftLeft(N, DAG);
+  case ISD::SRA:
+    return combineShiftRightArithmetic(N, DAG);
+  case ISD::SRL:
+    return combineShiftRightLogical(N, DAG, DCI);
+  case ISD::AND:
+    return combineAnd(N, DAG, DCI, Subtarget);
+  case ISD::OR:
+    return combineOr(N, DAG, DCI, Subtarget);
+  case ISD::XOR:
+    return combineXor(N, DAG, DCI, Subtarget);
+  case X86ISD::BEXTR:
+    return combineBEXTR(N, DAG, DCI, Subtarget);
+  case ISD::LOAD:
+    return combineLoad(N, DAG, DCI, Subtarget);
+  case ISD::MLOAD:
+    return combineMaskedLoad(N, DAG, DCI, Subtarget);
+  case ISD::STORE:
+    return combineStore(N, DAG, Subtarget);
+  case ISD::MSTORE:
+    return combineMaskedStore(N, DAG, DCI, Subtarget);
+  case ISD::SINT_TO_FP:
+    return combineSIntToFP(N, DAG, Subtarget);
+  case ISD::UINT_TO_FP:
+    return combineUIntToFP(N, DAG, Subtarget);
   case ISD::FADD:
-  case ISD::FSUB:           return combineFaddFsub(N, DAG, Subtarget);
-  case ISD::FNEG:           return combineFneg(N, DAG, Subtarget);
-  case ISD::TRUNCATE:       return combineTruncate(N, DAG, Subtarget);
-  case X86ISD::ANDNP:       return combineAndnp(N, DAG, DCI, Subtarget);
-  case X86ISD::FAND:        return combineFAnd(N, DAG, Subtarget);
-  case X86ISD::FANDN:       return combineFAndn(N, DAG, Subtarget);
+  case ISD::FSUB:
+    return combineFaddFsub(N, DAG, Subtarget);
+  case ISD::FNEG:
+    return combineFneg(N, DAG, Subtarget);
+  case ISD::TRUNCATE:
+    return combineTruncate(N, DAG, Subtarget);
+  case X86ISD::ANDNP:
+    return combineAndnp(N, DAG, DCI, Subtarget);
+  case X86ISD::FAND:
+    return combineFAnd(N, DAG, Subtarget);
+  case X86ISD::FANDN:
+    return combineFAndn(N, DAG, Subtarget);
   case X86ISD::FXOR:
-  case X86ISD::FOR:         return combineFOr(N, DAG, Subtarget);
+  case X86ISD::FOR:
+    return combineFOr(N, DAG, Subtarget);
   case X86ISD::FMIN:
-  case X86ISD::FMAX:        return combineFMinFMax(N, DAG);
+  case X86ISD::FMAX:
+    return combineFMinFMax(N, DAG);
   case ISD::FMINNUM:
-  case ISD::FMAXNUM:        return combineFMinNumFMaxNum(N, DAG, Subtarget);
+  case ISD::FMAXNUM:
+    return combineFMinNumFMaxNum(N, DAG, Subtarget);
   case X86ISD::CVTSI2P:
-  case X86ISD::CVTUI2P:     return combineX86INT_TO_FP(N, DAG, DCI);
-  case X86ISD::BT:          return combineBT(N, DAG, DCI);
+  case X86ISD::CVTUI2P:
+    return combineX86INT_TO_FP(N, DAG, DCI);
+  case X86ISD::BT:
+    return combineBT(N, DAG, DCI);
   case ISD::ANY_EXTEND:
-  case ISD::ZERO_EXTEND:    return combineZext(N, DAG, DCI, Subtarget);
-  case ISD::SIGN_EXTEND:    return combineSext(N, DAG, DCI, Subtarget);
-  case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
+  case ISD::ZERO_EXTEND:
+    return combineZext(N, DAG, DCI, Subtarget);
+  case ISD::SIGN_EXTEND:
+    return combineSext(N, DAG, DCI, Subtarget);
+  case ISD::SIGN_EXTEND_INREG:
+    return combineSignExtendInReg(N, DAG, Subtarget);
   case ISD::ANY_EXTEND_VECTOR_INREG:
-  case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, Subtarget);
-  case ISD::SETCC:          return combineSetCC(N, DAG, Subtarget);
-  case X86ISD::SETCC:       return combineX86SetCC(N, DAG, Subtarget);
-  case X86ISD::BRCOND:      return combineBrCond(N, DAG, Subtarget);
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+    return combineExtInVec(N, DAG, Subtarget);
+  case ISD::SETCC:
+    return combineSetCC(N, DAG, Subtarget);
+  case X86ISD::SETCC:
+    return combineX86SetCC(N, DAG, Subtarget);
+  case X86ISD::BRCOND:
+    return combineBrCond(N, DAG, Subtarget);
   case X86ISD::PACKSS:
-  case X86ISD::PACKUS:      return combineVectorPack(N, DAG, DCI, Subtarget);
+  case X86ISD::PACKUS:
+    return combineVectorPack(N, DAG, DCI, Subtarget);
   case X86ISD::VSHL:
   case X86ISD::VSRA:
   case X86ISD::VSRL:
@@ -43523,8 +44220,9 @@
   case X86ISD::VSRLI:
     return combineVectorShiftImm(N, DAG, DCI, Subtarget);
   case X86ISD::PINSRB:
-  case X86ISD::PINSRW:      return combineVectorInsert(N, DAG, DCI, Subtarget);
-  case X86ISD::SHUFP:       // Handle all target specific shuffles
+  case X86ISD::PINSRW:
+    return combineVectorInsert(N, DAG, DCI, Subtarget);
+  case X86ISD::SHUFP: // Handle all target specific shuffles
   case X86ISD::INSERTPS:
   case X86ISD::EXTRQI:
   case X86ISD::INSERTQI:
@@ -43556,7 +44254,8 @@
   case X86ISD::VPERM2X128:
   case X86ISD::SHUF128:
   case X86ISD::VZEXT_MOVL:
-  case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
+  case ISD::VECTOR_SHUFFLE:
+    return combineShuffle(N, DAG, DCI, Subtarget);
   case X86ISD::FMADD_RND:
   case X86ISD::FMSUB:
   case X86ISD::FMSUB_RND:
@@ -43564,20 +44263,26 @@
   case X86ISD::FNMADD_RND:
   case X86ISD::FNMSUB:
   case X86ISD::FNMSUB_RND:
-  case ISD::FMA: return combineFMA(N, DAG, Subtarget);
+  case ISD::FMA:
+    return combineFMA(N, DAG, Subtarget);
   case X86ISD::FMADDSUB_RND:
   case X86ISD::FMSUBADD_RND:
   case X86ISD::FMADDSUB:
-  case X86ISD::FMSUBADD:    return combineFMADDSUB(N, DAG, Subtarget);
-  case X86ISD::MOVMSK:      return combineMOVMSK(N, DAG, DCI);
+  case X86ISD::FMSUBADD:
+    return combineFMADDSUB(N, DAG, Subtarget);
+  case X86ISD::MOVMSK:
+    return combineMOVMSK(N, DAG, DCI);
   case X86ISD::MGATHER:
   case X86ISD::MSCATTER:
   case ISD::MGATHER:
-  case ISD::MSCATTER:       return combineGatherScatter(N, DAG, DCI, Subtarget);
+  case ISD::MSCATTER:
+    return combineGatherScatter(N, DAG, DCI, Subtarget);
   case X86ISD::PCMPEQ:
-  case X86ISD::PCMPGT:      return combineVectorCompare(N, DAG, Subtarget);
+  case X86ISD::PCMPGT:
+    return combineVectorCompare(N, DAG, Subtarget);
   case X86ISD::PMULDQ:
-  case X86ISD::PMULUDQ:     return combinePMULDQ(N, DAG, DCI);
+  case X86ISD::PMULUDQ:
+    return combinePMULDQ(N, DAG, DCI);
   }
 
   return SDValue();
@@ -43629,7 +44334,7 @@
   return true;
 }
 
-SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
+SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc &dl,
                                                   SDValue Value, SDValue Addr,
                                                   SelectionDAG &DAG) const {
   const Module *M = DAG.getMachineFunction().getMMI().getModule();
@@ -43683,7 +44388,8 @@
 
   bool Commute = false;
   switch (Op.getOpcode()) {
-  default: return false;
+  default:
+    return false;
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:
@@ -43716,8 +44422,7 @@
         ((Commute && !isa<ConstantSDNode>(N1)) ||
          (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
       return false;
-    if (IsFoldableAtomicRMW(N0, Op) ||
-        (Commute && IsFoldableAtomicRMW(N1, Op)))
+    if (IsFoldableAtomicRMW(N0, Op) || (Commute && IsFoldableAtomicRMW(N1, Op)))
       return false;
   }
   }
@@ -43726,9 +44431,8 @@
   return true;
 }
 
-bool X86TargetLowering::
-    isDesirableToCombineBuildVectorToShuffleTruncate(
-        ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const {
+bool X86TargetLowering::isDesirableToCombineBuildVectorToShuffleTruncate(
+    ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const {
 
   assert(SrcVT.getVectorNumElements() == ShuffleMask.size() &&
          "Element count mismatch");
@@ -43800,7 +44504,8 @@
   SplitString(AsmStr, AsmPieces, ";\n");
 
   switch (AsmPieces.size()) {
-  default: return false;
+  default:
+    return false;
   case 1:
     // FIXME: this should verify that we are targeting a 486 or better.  If not,
     // we will turn this bswap into something that will be lowered to logical
@@ -43847,9 +44552,9 @@
 
     if (CI->getType()->isIntegerTy(64)) {
       InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
-      if (Constraints.size() >= 2 &&
-          Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
-          Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+      if (Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 &&
+          Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 &&
+          Constraints[1].Codes[0] == "0") {
         // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
         if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
             matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
@@ -43936,8 +44641,7 @@
     default:
       break;
     }
-  }
-  else if (Constraint.size() == 2) {
+  } else if (Constraint.size() == 2) {
     switch (Constraint[0]) {
     default:
       break;
@@ -43965,12 +44669,12 @@
 /// This object must already have been set up with the operand type
 /// and the current alternative constraint selected.
 TargetLowering::ConstraintWeight
-  X86TargetLowering::getSingleConstraintMatchWeight(
+X86TargetLowering::getSingleConstraintMatchWeight(
     AsmOperandInfo &info, const char *constraint) const {
   ConstraintWeight weight = CW_Invalid;
   Value *CallOperandVal = info.CallOperandVal;
-    // If we don't have a value, we can't do a match,
-    // but allow it at the lowest weight.
+  // If we don't have a value, we can't do a match,
+  // but allow it at the lowest weight.
   if (!CallOperandVal)
     return CW_Default;
   Type *type = CallOperandVal->getType();
@@ -44004,36 +44708,37 @@
     break;
   case 'Y': {
     unsigned Size = StringRef(constraint).size();
-    // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching 'Y'
+    // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching
+    // 'Y'
     char NextChar = Size == 2 ? constraint[1] : 'i';
     if (Size > 2)
       break;
     switch (NextChar) {
-      default:
-        return CW_Invalid;
-      // XMM0
-      case 'z':
-      case '0':
-        if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1())
-          return CW_SpecificReg;
-        return CW_Invalid;
-      // Conditional OpMask regs (AVX512)
-      case 'k':
-        if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
-          return CW_Register;
-        return CW_Invalid;
-      // Any MMX reg
-      case 'm':
-        if (type->isX86_MMXTy() && Subtarget.hasMMX())
-          return weight;
+    default:
+      return CW_Invalid;
+    // XMM0
+    case 'z':
+    case '0':
+      if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1())
+        return CW_SpecificReg;
+      return CW_Invalid;
+    // Conditional OpMask regs (AVX512)
+    case 'k':
+      if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
+        return CW_Register;
+      return CW_Invalid;
+    // Any MMX reg
+    case 'm':
+      if (type->isX86_MMXTy() && Subtarget.hasMMX())
+        return weight;
+      return CW_Invalid;
+    // Any SSE reg when ISA >= SSE2, same as 'Y'
+    case 'i':
+    case 't':
+    case '2':
+      if (!Subtarget.hasSSE2())
         return CW_Invalid;
-      // Any SSE reg when ISA >= SSE2, same as 'Y'
-      case 'i':
-      case 't':
-      case '2':
-        if (!Subtarget.hasSSE2())
-          return CW_Invalid;
-        break;
+      break;
     }
     // Fall through (handle "Y" constraint).
     LLVM_FALLTHROUGH;
@@ -44114,8 +44819,7 @@
 /// Try to replace an X constraint, which matches anything, with another that
 /// has more specific requirements based on the type of the corresponding
 /// operand.
-const char *X86TargetLowering::
-LowerXConstraint(EVT ConstraintVT) const {
+const char *X86TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   // FP X constraints get lowered to SSE1/2 registers if available, otherwise
   // 'f' like normal targets.
   if (ConstraintVT.isFloatingPoint()) {
@@ -44158,16 +44862,18 @@
 /// If it is invalid, don't add anything to Ops.
 void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                      std::string &Constraint,
-                                                     std::vector<SDValue>&Ops,
+                                                     std::vector<SDValue> &Ops,
                                                      SelectionDAG &DAG) const {
   SDValue Result;
 
   // Only support length 1 constraints for now.
-  if (Constraint.length() > 1) return;
+  if (Constraint.length() > 1)
+    return;
 
   char ConstraintLetter = Constraint[0];
   switch (ConstraintLetter) {
-  default: break;
+  default:
+    break;
   case 'I':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 31) {
@@ -44241,8 +44947,8 @@
         Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
         break;
       }
-    // FIXME gcc accepts some relocatable values here too, but only in certain
-    // memory models; it's complicated.
+      // FIXME gcc accepts some relocatable values here too, but only in certain
+      // memory models; it's complicated.
     }
     return;
   }
@@ -44265,8 +44971,8 @@
     if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
       bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
       BooleanContent BCont = getBooleanContents(MVT::i64);
-      ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
-                                    : ISD::SIGN_EXTEND;
+      ISD::NodeType ExtOpc =
+          IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
       int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue()
                                                   : CST->getSExtValue();
       Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64);
@@ -44339,7 +45045,8 @@
   if (Constraint.size() == 1) {
     // GCC Constraint Letters
     switch (Constraint[0]) {
-    default: break;
+    default:
+      break;
     // 'A' means [ER]AX + [ER]DX.
     case 'A':
       if (Subtarget.is64Bit())
@@ -44367,7 +45074,7 @@
           return std::make_pair(0U, &X86::VK64RegClass);
       }
       break;
-    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+    case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
       if (Subtarget.is64Bit()) {
         if (VT == MVT::i32 || VT == MVT::f32)
           return std::make_pair(0U, &X86::GR32RegClass);
@@ -44381,7 +45088,7 @@
       }
       LLVM_FALLTHROUGH;
       // 32-bit fallthrough
-    case 'Q':   // Q_REGS
+    case 'Q': // Q_REGS
       if (VT == MVT::i32 || VT == MVT::f32)
         return std::make_pair(0U, &X86::GR32_ABCDRegClass);
       if (VT == MVT::i16)
@@ -44391,8 +45098,8 @@
       if (VT == MVT::i64)
         return std::make_pair(0U, &X86::GR64_ABCDRegClass);
       break;
-    case 'r':   // GENERAL_REGS
-    case 'l':   // INDEX_REGS
+    case 'r': // GENERAL_REGS
+    case 'l': // INDEX_REGS
       if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, &X86::GR8RegClass);
       if (VT == MVT::i16)
@@ -44400,7 +45107,7 @@
       if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit())
         return std::make_pair(0U, &X86::GR32RegClass);
       return std::make_pair(0U, &X86::GR64RegClass);
-    case 'R':   // LEGACY_REGS
+    case 'R': // LEGACY_REGS
       if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, &X86::GR8_NOREXRegClass);
       if (VT == MVT::i16)
@@ -44408,7 +45115,7 @@
       if (VT == MVT::i32 || !Subtarget.is64Bit())
         return std::make_pair(0U, &X86::GR32_NOREXRegClass);
       return std::make_pair(0U, &X86::GR64_NOREXRegClass);
-    case 'f':  // FP Stack registers.
+    case 'f': // FP Stack registers.
       // If SSE is enabled for this VT, use f80 to ensure the isel moves the
       // value to the correct fpstack register class.
       if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
@@ -44416,19 +45123,23 @@
       if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
         return std::make_pair(0U, &X86::RFP64RegClass);
       return std::make_pair(0U, &X86::RFP80RegClass);
-    case 'y':   // MMX_REGS if MMX allowed.
-      if (!Subtarget.hasMMX()) break;
+    case 'y': // MMX_REGS if MMX allowed.
+      if (!Subtarget.hasMMX())
+        break;
       return std::make_pair(0U, &X86::VR64RegClass);
-    case 'Y':   // SSE_REGS if SSE2 allowed
-      if (!Subtarget.hasSSE2()) break;
+    case 'Y': // SSE_REGS if SSE2 allowed
+      if (!Subtarget.hasSSE2())
+        break;
       LLVM_FALLTHROUGH;
     case 'v':
-    case 'x':   // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
-      if (!Subtarget.hasSSE1()) break;
+    case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
+      if (!Subtarget.hasSSE1())
+        break;
       bool VConstraint = (Constraint[0] == 'v');
 
       switch (VT.SimpleTy) {
-      default: break;
+      default:
+        break;
       // Scalar SSE types.
       case MVT::f32:
       case MVT::i32:
@@ -44467,7 +45178,8 @@
       case MVT::v16f32:
       case MVT::v16i32:
       case MVT::v8i64:
-        if (!Subtarget.hasAVX512()) break;
+        if (!Subtarget.hasAVX512())
+          break;
         if (VConstraint)
           return std::make_pair(0U, &X86::VR512RegClass);
         return std::make_pair(0U, &X86::VR512_0_15RegClass);
@@ -44483,11 +45195,13 @@
     case '2':
       return getRegForInlineAsmConstraint(TRI, "Y", VT);
     case 'm':
-      if (!Subtarget.hasMMX()) break;
+      if (!Subtarget.hasMMX())
+        break;
       return std::make_pair(0U, &X86::VR64RegClass);
     case 'z':
     case '0':
-      if (!Subtarget.hasSSE1()) break;
+      if (!Subtarget.hasSSE1())
+        break;
       return std::make_pair(X86::XMM0, &X86::VR128RegClass);
     case 'k':
       // This register class doesn't allocate k0 for masked vector operation.
@@ -44514,7 +45228,7 @@
 
   // Use the default implementation in TargetLowering to convert the register
   // constraint into a member of a register class.
-  std::pair<unsigned, const TargetRegisterClass*> Res;
+  std::pair<unsigned, const TargetRegisterClass *> Res;
   Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 
   // Not found as a standard register?
@@ -44572,7 +45286,7 @@
   // turn into {ax},{dx}.
   // MVT::Other is used to specify clobber names.
   if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other)
-    return Res;   // Correct type already, nothing to do.
+    return Res; // Correct type already, nothing to do.
 
   // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
   // return "eax". This should even work for things like getting 64bit integer
@@ -44584,16 +45298,22 @@
   // Therefore, use a helper method.
   if (isGRClass(*Class)) {
     unsigned Size = VT.getSizeInBits();
-    if (Size == 1) Size = 8;
+    if (Size == 1)
+      Size = 8;
     unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
     if (DestReg > 0) {
       bool is64Bit = Subtarget.is64Bit();
       const TargetRegisterClass *RC =
-          Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
-        : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
-        : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
-        : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr)
-        : nullptr;
+          Size == 8
+              ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
+              : Size == 16
+                    ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
+                    : Size == 32
+                          ? (is64Bit ? &X86::GR32RegClass
+                                     : &X86::GR32_NOREXRegClass)
+                          : Size == 64
+                                ? (is64Bit ? &X86::GR64RegClass : nullptr)
+                                : nullptr;
       if (Size == 64 && !is64Bit) {
         // Model GCC's behavior here and select a fixed pair of 32-bit
         // registers.
Index: lib/Target/XCore/XCoreISelLowering.cpp
===================================================================
--- lib/Target/XCore/XCoreISelLowering.cpp
+++ lib/Target/XCore/XCoreISelLowering.cpp
@@ -415,7 +415,8 @@
   assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
   if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(),
                                      LD->getAddressSpace(),
-                                     LD->getAlignment()))
+                                     LD->getAlignment(),
+                                     LD->getMemOperand()->getFlags()))
     return SDValue();
 
   auto &TD = DAG.getDataLayout();
@@ -497,7 +498,8 @@
   assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
   if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
                                      ST->getAddressSpace(),
-                                     ST->getAlignment())) {
+                                     ST->getAlignment(),
+                                     ST->getMemOperand()->getFlags())) {
     return SDValue();
   }
   unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
@@ -1792,11 +1794,11 @@
   break;
   case ISD::STORE: {
     // Replace unaligned store of unaligned load with memmove.
-    StoreSDNode *ST  = cast<StoreSDNode>(N);
+    StoreSDNode *ST = cast<StoreSDNode>(N);
     if (!DCI.isBeforeLegalize() ||
-        allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
-                                       ST->getAddressSpace(),
-                                       ST->getAlignment()) ||
+        allowsMisalignedMemoryAccesses(ST->getMemoryVT(), ST->getAddressSpace(),
+                                       ST->getAlignment(),
+                                       ST->getMemOperand()->getFlags()) ||
         ST->isVolatile() || ST->isIndexed()) {
       break;
     }