diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -442,6 +442,15 @@ isIntImmediate(N->getOperand(1).getNode(), Imm); } +// isIntImmediateEq - This method tests to see if N is a constant operand that +// is equivalent to 'ImmExpected'. +static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) { + uint64_t Imm; + if (!isIntImmediate(N.getNode(), Imm)) + return false; + return Imm == ImmExpected; +} + bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { switch(ConstraintID) { @@ -2591,6 +2600,40 @@ return true; } +// For node (shl (and val, mask), N)), returns true if the node is equivalent to +// UBFIZ. +static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, + SDValue &Src, int &DstLSB, + int &Width) { + // Caller should have verified that N is a left shift with constant shift + // amount; asserts that. + assert(Op.getOpcode() == ISD::SHL && + "Op.getNode() should be a SHL node to call this function"); + assert(isIntImmediateEq(Op.getOperand(1), ShlImm) && + "Op.getNode() should shift ShlImm to call this function"); + + uint64_t AndImm = 0; + SDValue Op0 = Op.getOperand(0); + if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm)) + return false; + + const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm); + if (isMask_64(ShiftedAndImm)) { + // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm + // should end with Mask, and could be prefixed with random bits if those + // bits are shifted out. + // + // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3; + // the AND result corresponding to those bits are shifted out, so it's fine + // to not extract them. + Width = countTrailingOnes(ShiftedAndImm); + DstLSB = ShlImm; + Src = Op0.getOperand(0); + return true; + } + return false; +} + static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, @@ -2609,6 +2652,9 @@ if (!BiggerPattern && !Op.hasOneUse()) return false; + if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width)) + return true; + DstLSB = countTrailingZeros(NonZeroBits); Width = countTrailingOnes(NonZeroBits >> DstLSB); diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -9114,13 +9114,12 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: and x8, x1, #0x7 +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: bfi x9, x8, #1, #3 +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: bfi x8, x1, #1, #3 ; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldrh w0, [x9] +; CHECK-NEXT: ldrh w0, [x8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %lv = load <8 x i16>, <8 x i16>* %A diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1222,12 +1222,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: and x8, x0, #0x7 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x9, x8, #1, #3 -; CHECK-NEXT: ldr h1, [x9] +; CHECK-NEXT: bfi x8, x0, #1, #3 +; CHECK-NEXT: ldr h1, [x8] ; CHECK-NEXT: mov v1.h[1], v0.h[1] ; CHECK-NEXT: mov v1.h[2], v0.h[2] ; CHECK-NEXT: mov v1.h[3], v0.h[3] @@ -1250,11 +1249,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: and x8, x0, #0x3 -; CHECK-NEXT: add x9, sp, #8 -; CHECK-NEXT: bfi x9, x8, #1, #2 -; CHECK-NEXT: str h0, [x9] +; CHECK-NEXT: bfi x8, x0, #1, #2 +; CHECK-NEXT: str h0, [x8] ; CHECK-NEXT: ldr d1, [sp, #8] ; CHECK-NEXT: mov v1.h[1], v0.h[1] ; CHECK-NEXT: mov v1.h[2], v0.h[2] diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll --- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll +++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll @@ -6,13 +6,12 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: and x8, x1, #0x3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: fmov.2d v0, #2.00000000 -; CHECK-NEXT: bfi x9, x8, #2, #2 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: bfi x8, x1, #2, #2 ; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldr s0, [x9] +; CHECK-NEXT: ldr s0, [x8] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -27,13 +26,12 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: and x8, x1, #0x3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: movi.16b v0, #63 -; CHECK-NEXT: bfi x9, x8, #2, #2 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: bfi x8, x1, #2, #2 ; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldr s0, [x9] +; CHECK-NEXT: ldr s0, [x8] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/pr58350.ll b/llvm/test/CodeGen/AArch64/pr58350.ll --- a/llvm/test/CodeGen/AArch64/pr58350.ll +++ b/llvm/test/CodeGen/AArch64/pr58350.ll @@ -11,13 +11,12 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: and x9, x0, #0x1 -; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: bfi x9, x0, #2, #1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: bfi x10, x9, #2, #1 ; CHECK-NEXT: str d1, [sp] -; CHECK-NEXT: ldr s1, [x10] +; CHECK-NEXT: ldr s1, [x9] ; CHECK-NEXT: mov v1.s[1], v0.s[0] ; CHECK-NEXT: str d1, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -250,19 +250,17 @@ ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: umov w10, v0.h[0] +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and v1.8b, v0.8b, v2.8b ; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: bfi w9, w8, #1, #1 +; CHECK-NEXT: bfi w9, w10, #2, #1 ; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: bfi w9, w11, #3, #29 +; CHECK-NEXT: and w8, w9, #0xf ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: bfi w10, w8, #1, #1 -; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 -; CHECK-NEXT: and w8, w10, #0xf ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -300,15 +300,13 @@ ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: and v1.8b, v2.8b, v1.8b ; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: umov w9, v1.h[2] -; CHECK-NEXT: umov w10, v1.h[0] +; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: umov w11, v1.h[3] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: bfi w10, w8, #1, #1 -; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 -; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: bfi w9, w8, #1, #1 +; CHECK-NEXT: bfi w9, w10, #2, #1 +; CHECK-NEXT: bfi w9, w11, #3, #29 +; CHECK-NEXT: and w8, w9, #0xf ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)