Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -558,6 +558,9 @@
   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
                                            MachineBasicBlock *BB) const;
 
+  MachineBasicBlock *EmitExpandedAND(MachineInstr &MI, MachineBasicBlock *BB,
+                                     unsigned RegSize) const;
+
   MachineBasicBlock *
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *MBB) const override;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2256,6 +2256,88 @@
   return BB;
 }
 
+MachineBasicBlock *
+AArch64TargetLowering::EmitExpandedAND(MachineInstr &MI, MachineBasicBlock *BB,
+                                       unsigned RegSize) const {
+  // Try below transformation.
+  //
+  // MOVi32imm + ANDWrr
+  // ==>
+  // ANDWri + ANDWri
+  //
+  // The MOVi32imm pseudo instruction could be expanded to multiple mov
+  // instructions later. Let's try to split the constant operand of MOVi32imm
+  // into two bitmask immediates. It makes only two AND instructions intead of
+  // multiple mov + and instructions.
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  MachineInstr *DefMI = MRI.getUniqueVRegDef(MI.getOperand(2).getReg());
+  if (DefMI->getOpcode() != AArch64::MOVi32imm)
+    return BB;
+
+  uint64_t OrgImm = DefMI->getOperand(1).getImm();
+  if (AArch64_AM::isLogicalImmediate(OrgImm, RegSize))
+    return BB;
+
+  uint64_t Mask = 0xFFFFULL;
+  for (unsigned i = 0; i < 4; i++) {
+    Mask <<= i * 32;
+    // This immediate can be suitable for single MOV instruction.
+    if ((OrgImm & Mask) == OrgImm)
+      return BB;
+  }
+
+  Mask = 0xFFFFULL;
+  uint64_t OrgNImm = ~OrgImm;
+  for (unsigned i = 0; i < 4; i++) {
+    Mask <<= i * 32;
+    // This immediate can be suitable for single MOV instruction.
+    if ((OrgNImm & Mask) == OrgNImm)
+      return BB;
+  }
+
+  // The bitmask immediate consists of consecutive ones.  Let's say there is
+  // constant 0b00000000001000000000010000000000 which does not consist of
+  // consecutive ones. We can split it in to two bitmask immediate like
+  // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
+  // If we do AND with these two bitmask immediate, we can see original one.
+  unsigned LowestBitSet = countTrailingZeros(OrgImm);
+  unsigned HighestBitSet = Log2_64(OrgImm);
+
+  // Creat a mask which is filled with one from the position of lowest bit set
+  // to the position of highest bit set.
+  unsigned NewImm1 = (2U << HighestBitSet) - (1U << LowestBitSet);
+  // Creat a mask which is filled with one outside the position of lowest bit
+  // set and the position of highest bit set.
+  unsigned NewImm2 = OrgImm | ~NewImm1;
+
+  // If the splitted value is not valid bitmask immediate, do not split this
+  // node.
+  if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
+    return BB;
+
+  // Create bitwise and MIs splitted bitmask immediates.
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  Register NewTmpReg = MRI.createVirtualRegister(MRI.getRegClass(DstReg));
+  uint64_t NewImm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
+  uint64_t NewImm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
+  unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
+
+  BuildMI(*BB, MI, DL, TII->get(Opcode), NewTmpReg)
+      .addReg(SrcReg)
+      .addImm(NewImm1Enc);
+
+  BuildMI(*BB, MI, DL, TII->get(Opcode), DstReg)
+      .addReg(NewTmpReg)
+      .addImm(NewImm2Enc);
+
+  MI.eraseFromParent();
+
+  return BB;
+}
+
 MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *BB) const {
   switch (MI.getOpcode()) {
@@ -2275,6 +2357,10 @@
 
   case AArch64::CATCHRET:
     return EmitLoweredCatchRet(MI, BB);
+  case AArch64::ANDWrr:
+    return EmitExpandedAND(MI, BB, 32);
+  case AArch64::ANDXrr:
+    return EmitExpandedAND(MI, BB, 64);
   }
 }
 
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -2916,7 +2916,9 @@
 // Split from LogicalImm as not all instructions have both.
 multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
                       SDPatternOperator OpNode> {
-  let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  let isReMaterializable = 1, isAsCheapAsAMove = 1,
+      // Try to expand AND after instruction selection.
+      usesCustomInserter = !cond(!eq(mnemonic, "and") : true, 1 : false) in {
   def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
   def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
   }
Index: llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i8 @test(i32 %a) {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and w8, w0, #0x3ffc00
+; CHECK-NEXT:    and w8, w8, #0xffe007ff
+; CHECK-NEXT:    cmp w8, #1024
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %a, 2098176
+  %cmp = icmp eq i32 %and, 1024
+  %conv = zext i1 %cmp to i8
+  ret i8 %conv
+}
Index: llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll
===================================================================
--- llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll
+++ llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll
@@ -245,10 +245,9 @@
 define i32 @n0_badconstmask(i32 %x, i32 %y) {
 ; CHECK-LABEL: n0_badconstmask:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w9, #256
-; CHECK-NEXT:    movk w9, #65280, lsl #16
+; CHECK-NEXT:    and w9, w1, #0xffffff00
 ; CHECK-NEXT:    and w8, w0, #0xffff00
-; CHECK-NEXT:    and w9, w1, w9
+; CHECK-NEXT:    and w9, w9, #0xff0001ff
 ; CHECK-NEXT:    orr w0, w8, w9
 ; CHECK-NEXT:    ret
   %mx = and i32 %x, 16776960