Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1941,6 +1941,171 @@ def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, logical_imm64:$imm), 0>; +// If the constant operand of AND(Immediate) is not valid bitmask immediate, +// try to split it into two valid bitmask immediate. +def and_split_bitmask_imm64 : ImmLeaf(Imm); + if (AArch64_AM::isLogicalImmediate(UImm64, 64)) + return false; + + auto isSingleInstImm64 = [](uint64_t Imm) { + uint64_t Mask = APInt(0xFFFF, 16).getZExtValue(); + for (unsigned i = 0; i < 4; i++) { + Mask <<= i * 16; + // This immediate can be suitable for single MOV instruction. + if ((Imm & Mask) == Imm) + return true; + } + return false; + }; + + if (isSingleInstImm64(UImm64)) + return false; + if (isSingleInstImm64(~UImm64)) + return false; + + // The bitmask immediate consists of consecutive ones. Let's say there is + // constant 0b00000000001000000000010000000000 which does not consist of + // consecutive ones. We can split it in to two bitmask immediate like + // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. + // If we do AND with these two bitmask immediate, we can see original one. + unsigned LowestBitSet = countTrailingZeros(UImm64); + unsigned HighestBitSet = Log2_64(UImm64); + + // Creat a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + uint64_t NewImm1 = (2U << HighestBitSet) - (1U << LowestBitSet); + // Creat a mask which is filled with one outside the position of lowest bit + // set and the position of highest bit set. + uint64_t NewImm2 = UImm64 | ~NewImm1; + + // If the splitted value is not valid bitmask immediate, do not split this + // constant. + if (!AArch64_AM::isLogicalImmediate(NewImm2, 64)) + return false; + return true; +}]>; + +def and_split_bitmask_imm32 : ImmLeaf(Imm); + if (AArch64_AM::isLogicalImmediate(UImm32, 32)) + return false; + + auto isSingleInstImm32 = [](uint32_t Imm) { + uint32_t Mask = APInt(0xFFFF, 16).getZExtValue(); + for (unsigned i = 0; i < 2; i++) { + Mask <<= i * 16; + // This immediate can be suitable for single MOV instruction. + if ((Imm & Mask) == Imm) + return true; + } + return false; + }; + + if (isSingleInstImm32(UImm32)) + return false; + if (isSingleInstImm32(~UImm32)) + return false; + + // The bitmask immediate consists of consecutive ones. Let's say there is + // constant 0b00000000001000000000010000000000 which does not consist of + // consecutive ones. We can split it in to two bitmask immediate like + // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. + // If we do AND with these two bitmask immediate, we can see original one. + unsigned LowestBitSet = countTrailingZeros(UImm32); + unsigned HighestBitSet = Log2_64(UImm32); + + // Creat a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + uint32_t NewImm1 = (2U << HighestBitSet) - (1U << LowestBitSet); + // Creat a mask which is filled with one outside the position of lowest bit + // set and the position of highest bit set. + uint32_t NewImm2 = UImm32 | ~NewImm1; + + // If the splitted value is not valid bitmask immediate, do not split this + // constant. + if (!AArch64_AM::isLogicalImmediate(NewImm2, 32)) + return false; + return true; +}]>; + +def first_bitmask_imm64_XFORM : SDNodeXFormgetZExtValue(); + unsigned LowestBitSet = countTrailingZeros(Imm); + unsigned HighestBitSet = Log2_64(Imm); + + // Creat a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + uint64_t Imm1 = (2U << HighestBitSet) - (1U << LowestBitSet); + uint64_t Imm1Enc = AArch64_AM::encodeLogicalImmediate(Imm1, 64); + return CurDAG->getTargetConstant(Imm1Enc, SDLoc(N), MVT::i64); +}]>; + +def second_bitmask_imm64_XFORM : SDNodeXFormgetZExtValue(); + unsigned LowestBitSet = countTrailingZeros(Imm); + unsigned HighestBitSet = Log2_64(Imm); + + // Creat a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + uint64_t Imm1 = (2U << HighestBitSet) - (1U << LowestBitSet); + // Creat a mask which is filled with one outside the position of lowest bit + // set and the position of highest bit set. + uint64_t Imm2 = Imm | ~Imm1; + uint64_t Imm2Enc = AArch64_AM::encodeLogicalImmediate(Imm2, 64); + return CurDAG->getTargetConstant(Imm2Enc, SDLoc(N), MVT::i64); +}]>; + +def first_bitmask_imm32_XFORM : SDNodeXFormgetZExtValue(); + unsigned LowestBitSet = countTrailingZeros(Imm); + unsigned HighestBitSet = Log2_64(Imm); + + // Creat a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + uint32_t Imm1 = (2U << HighestBitSet) - (1U << LowestBitSet); + uint32_t Imm1Enc = AArch64_AM::encodeLogicalImmediate(Imm1, 32); + return CurDAG->getTargetConstant(Imm1Enc, SDLoc(N), MVT::i32); +}]>; + +def second_bitmask_imm32_XFORM : SDNodeXFormgetZExtValue(); + unsigned LowestBitSet = countTrailingZeros(Imm); + unsigned HighestBitSet = Log2_64(Imm); + + // Creat a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + uint32_t Imm1 = (2U << HighestBitSet) - (1U << LowestBitSet); + // Creat a mask which is filled with one outside the position of lowest bit + // set and the position of highest bit set. + uint32_t Imm2 = Imm | ~Imm1; + uint32_t Imm2Enc = AArch64_AM::encodeLogicalImmediate(Imm2, 32); + return CurDAG->getTargetConstant(Imm2Enc, SDLoc(N), MVT::i32); +}]>; + +def first_bitmask_imm64 : Operand, IntImmLeaf {} + +def second_bitmask_imm64 : Operand, IntImmLeaf {} + +def first_bitmask_imm32 : Operand, IntImmLeaf {} + +def second_bitmask_imm32 : Operand, IntImmLeaf {} + +def : Pat<(i64 (and GPR64:$src, (i64 and_split_bitmask_imm64:$imm))), + (ANDXri (ANDXri GPR64:$src, (i64 (first_bitmask_imm64:$imm))), + (i64 (second_bitmask_imm64:$imm)))>; + +def : Pat<(i32 (and GPR32:$src, (i32 and_split_bitmask_imm32:$imm))), + (ANDWri (ANDWri GPR32:$src, (i32 (first_bitmask_imm32:$imm))), + (i32 (second_bitmask_imm32:$imm)))>; // (register) defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; Index: llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +define i8 @test(i32 %a) { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w0, #0x3ffc00 +; CHECK-NEXT: and w8, w8, #0xffe007ff +; CHECK-NEXT: cmp w8, #1024 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %and = and i32 %a, 2098176 + %cmp = icmp eq i32 %and, 1024 + %conv = zext i1 %cmp to i8 + ret i8 %conv +} Index: llvm/test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -370,9 +370,8 @@ ; CHECK-NEXT: cmp w8, #37 ; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: lsl x8, x8, xzr -; CHECK-NEXT: mov x9, #31 -; CHECK-NEXT: movk x9, #48, lsl #32 -; CHECK-NEXT: and x8, x8, x9 +; CHECK-NEXT: and x8, x8, #0x3f +; CHECK-NEXT: and x8, x8, #0xffffffffffffffdf ; CHECK-NEXT: ccmp x8, #0, #4, ls ; CHECK-NEXT: b.eq LBB11_2 ; CHECK-NEXT: ; %bb.1: ; %if.end85 Index: llvm/test/CodeGen/AArch64/bitfield-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -9,8 +9,8 @@ define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone { ; CHECK-LABEL: from_clang: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 -; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: and w8, w8, #0xffffff87 ; CHECK-NEXT: bfi w8, w1, #3, #4 ; CHECK-NEXT: and x9, x0, #0xffffff00 ; CHECK-NEXT: orr x0, x8, x9 @@ -97,8 +97,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: mov w10, #135 -; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: and w8, w8, #0xff +; CHECK-NEXT: and w8, w8, #0xffffff87 ; CHECK-NEXT: bfi w8, w9, #3, #4 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret @@ -167,10 +167,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: mov w10, #135 -; CHECK-NEXT: mov w11, #632 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w9, w11, w9, lsl #3 +; CHECK-NEXT: mov w10, #632 +; CHECK-NEXT: and w8, w8, #0xff +; CHECK-NEXT: and w8, w8, #0xffffff87 +; CHECK-NEXT: and w9, w10, w9, lsl #3 ; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret @@ -193,10 +193,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: mov w10, #135 -; CHECK-NEXT: and x8, x8, x10 -; CHECK-NEXT: lsl w9, w9, #3 ; CHECK-NEXT: mov w10, #664 +; CHECK-NEXT: and x8, x8, #0xff +; CHECK-NEXT: lsl w9, w9, #3 +; CHECK-NEXT: and x8, x8, #0xffffffffffffff87 ; CHECK-NEXT: and x9, x9, x10 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] Index: llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll =================================================================== --- llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -245,10 +245,9 @@ define i32 @n0_badconstmask(i32 %x, i32 %y) { ; CHECK-LABEL: n0_badconstmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #256 -; CHECK-NEXT: movk w9, #65280, lsl #16 +; CHECK-NEXT: and w9, w1, #0xffffff00 ; CHECK-NEXT: and w8, w0, #0xffff00 -; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: and w9, w9, #0xff0001ff ; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %mx = and i32 %x, 16776960 Index: llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll =================================================================== --- llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -21,10 +21,10 @@ define i16 @out16_constmask(i16 %x, i16 %y) { ; CHECK-LABEL: out16_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3855 -; CHECK-NEXT: mov w9, #-3856 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: and w8, w0, #0xfff +; CHECK-NEXT: and w9, w1, #0xfffffff0 +; CHECK-NEXT: and w8, w8, #0xffffff0f +; CHECK-NEXT: and w9, w9, #0xfffff0ff ; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %mx = and i16 %x, 3855 @@ -80,8 +80,8 @@ ; CHECK-LABEL: in16_constmask: ; CHECK: // %bb.0: ; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: mov w9, #3855 -; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: and w8, w8, #0xfff +; CHECK-NEXT: and w8, w8, #0xffffff0f ; CHECK-NEXT: eor w0, w8, w1 ; CHECK-NEXT: ret %n0 = xor i16 %x, %y