diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17135,14 +17135,22 @@ if (resolveBuildVector(BVN, DefBits, UndefBits)) { SDValue NewOp; - DefBits = ~DefBits; + // Any bits known to already be 0 need not be cleared again, which can help + // reduce the size of the immediate to one supported by the instruction. + KnownBits Known = DAG.computeKnownBits(LHS); + APInt ZeroSplat(VT.getSizeInBits(), 0); + for (unsigned I = 0; I < VT.getSizeInBits() / Known.Zero.getBitWidth(); I++) + ZeroSplat |= Known.Zero.zext(VT.getSizeInBits()) + << (Known.Zero.getBitWidth() * I); + + DefBits = ~(DefBits | ZeroSplat); if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG, DefBits, &LHS)) || (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG, DefBits, &LHS))) return NewOp; - UndefBits = ~UndefBits; + UndefBits = ~(UndefBits | ZeroSplat); if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG, UndefBits, &LHS)) || (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG, diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1483,9 +1483,8 @@ define <8 x i16> @bic_shifted_knownbits(<8 x i16> %v) { ; CHECK-LABEL: bic_shifted_knownbits: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.8h, #1 ; CHECK-NEXT: ushr v0.8h, v0.8h, #9 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bic v0.8h, #126 ; CHECK-NEXT: ret entry: %vshr_n = lshr <8 x i16> %v, @@ -1496,12 +1495,10 @@ define <8 x i32> @bic_shifted_knownbits2(<8 x i16> %v) { ; CHECK-LABEL: bic_shifted_knownbits2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-1048321 // =0xfff000ff ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: bic v1.4s, #255, lsl #8 +; CHECK-NEXT: bic v0.4s, #255, lsl #8 ; CHECK-NEXT: ret entry: %vshr_n = zext <8 x i16> %v to <8 x i32> @@ -1525,11 +1522,10 @@ define <8 x i32> @bic_shifted_knownbits4(<8 x i32> %v) { ; CHECK-LABEL: bic_shifted_knownbits4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0xffff0000ffff0000 ; CHECK-NEXT: shl v0.4s, v0.4s, #8 ; CHECK-NEXT: shl v1.4s, v1.4s, #8 -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-NEXT: bic v0.4s, #255, lsl #8 +; CHECK-NEXT: bic v1.4s, #255, lsl #8 ; CHECK-NEXT: ret entry: %vshr_n = shl <8 x i32> %v, diff --git a/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll b/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll --- a/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll +++ b/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll @@ -157,7 +157,7 @@ ; CHECK-LABEL: shiftedreg_from_and_negative_type: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.2s, v0.2s, #2 -; CHECK-NEXT: bic v0.2s, #31 +; CHECK-NEXT: bic v0.2s, #28 ; CHECK-NEXT: sub v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ret %shl = shl <2 x i32> %a, diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -218,7 +218,7 @@ ; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: bic v1.4s, #255, lsl #24 +; CHECK-NEXT: bic v1.4s, #1, lsl #24 ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 ; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s @@ -251,12 +251,13 @@ ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: shl v1.4h, v0.4h, #15 -; CHECK-NEXT: and v2.8b, v0.8b, v2.8b -; CHECK-NEXT: cmeq v0.4h, v2.4h, v0.4h -; CHECK-NEXT: cmlt v1.4h, v1.4h, #0 -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: fmov d1, d0 +; CHECK-NEXT: shl v2.4h, v0.4h, #15 +; CHECK-NEXT: bic v1.4h, #2 +; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h +; CHECK-NEXT: cmlt v1.4h, v2.4h, #0 ; CHECK-NEXT: and v1.8b, v1.8b, v3.8b +; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: addv h1, v1.4h ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: fmov w8, s1