Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1941,6 +1941,65 @@ def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, logical_imm64:$imm), 0>; +// If the constant operand of AND(Immediate) is not valid bitmask immediate, +// try to split it into two valid bitmask immediate. +def and_split_bitmask_imm64 : ImmLeaf(Imm); + return AArch64_AM::isValidAndSplitBitmaskImm(UImm64, 64); +}]>; + +def and_split_bitmask_imm32 : ImmLeaf(Imm); + return AArch64_AM::isValidAndSplitBitmaskImm(UImm32, 32); +}]>; + +def first_bitmask_imm64_XFORM : SDNodeXForm(N->getZExtValue()); + uint64_t Imm1Enc = AArch64_AM::splitAndBitmaskImm(UImm64, 64, true); + return CurDAG->getTargetConstant(Imm1Enc, SDLoc(N), MVT::i64); +}]>; + +def second_bitmask_imm64_XFORM : SDNodeXForm(N->getZExtValue()); + uint64_t Imm2Enc = AArch64_AM::splitAndBitmaskImm(UImm64, 64, false); + return CurDAG->getTargetConstant(Imm2Enc, SDLoc(N), MVT::i64); +}]>; + +def first_bitmask_imm32_XFORM : SDNodeXForm(N->getZExtValue()); + uint32_t Imm1Enc = AArch64_AM::splitAndBitmaskImm(UImm32, 32, true); + return CurDAG->getTargetConstant(Imm1Enc, SDLoc(N), MVT::i32); +}]>; + +def second_bitmask_imm32_XFORM : SDNodeXForm(N->getZExtValue()); + uint32_t Imm2Enc = AArch64_AM::splitAndBitmaskImm(UImm32, 32, false); + return CurDAG->getTargetConstant(Imm2Enc, SDLoc(N), MVT::i32); +}]>; + +def first_bitmask_imm64 : Operand, IntImmLeaf {} + +def second_bitmask_imm64 : Operand, IntImmLeaf {} + +def first_bitmask_imm32 : Operand, IntImmLeaf {} + +def second_bitmask_imm32 : Operand, IntImmLeaf {} + +def : Pat<(i64 (and GPR64:$src, (i64 and_split_bitmask_imm64:$imm))), + (ANDXri (ANDXri GPR64:$src, (i64 (first_bitmask_imm64:$imm))), + (i64 (second_bitmask_imm64:$imm)))>; + +def : Pat<(i32 (and GPR32:$src, (i32 and_split_bitmask_imm32:$imm))), + (ANDWri (ANDWri GPR32:$src, (i32 (first_bitmask_imm32:$imm))), + (i32 (second_bitmask_imm32:$imm)))>; // (register) defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; Index: llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h =================================================================== --- llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h +++ llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H +#include "AArch64ExpandImm.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/bit.h" @@ -337,6 +338,66 @@ return true; } +template +static inline bool isValidAndSplitBitmaskImm(T Imm, unsigned regSize) { + assert((regSize == 32 || regSize == 64) && + "Invalid regSize for AndsplitBitmaskImm"); + T UImm = static_cast(Imm); + if (isLogicalImmediate(UImm, regSize)) + return false; + + // If this immediate can be handled by one instruction, do not split it. + SmallVector Insn; + AArch64_IMM::expandMOVImm(UImm, regSize, Insn); + if (Insn.size() == 1) + return false; + + // The bitmask immediate consists of consecutive ones. Let's say there is + // constant 0b00000000001000000000010000000000 which does not consist of + // consecutive ones. We can split it in to two bitmask immediate like + // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. + // If we do AND with these two bitmask immediate, we can see original one. + unsigned LowestBitSet = countTrailingZeros(UImm); + unsigned HighestBitSet = Log2_64(UImm); + + // Creat a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + T NewImm1 = (static_cast(2) << HighestBitSet) - + (static_cast(1) << LowestBitSet); + // Creat a mask which is filled with one outside the position of lowest bit + // set and the position of highest bit set. + T NewImm2 = UImm | ~NewImm1; + + // If the splitted value is not valid bitmask immediate, do not split this + // constant. + if (!isLogicalImmediate(NewImm2, regSize)) + return false; + return true; +} + +template +static inline T splitAndBitmaskImm(T Imm, unsigned regSize, bool FirstImm) { + assert((regSize == 32 || regSize == 64) && + "Invalid regSize for AndsplitBitmaskImm"); + + unsigned LowestBitSet = countTrailingZeros(Imm); + unsigned HighestBitSet = Log2_64(Imm); + + // Creat a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + T Imm1 = (static_cast(2) << HighestBitSet) - + (static_cast(1) << LowestBitSet); + T Imm1Enc = encodeLogicalImmediate(Imm1, regSize); + if (FirstImm) + return Imm1Enc; + + // Creat a mask which is filled with one outside the position of lowest bit + // set and the position of highest bit set. + T Imm2 = Imm | ~Imm1; + T Imm2Enc = AArch64_AM::encodeLogicalImmediate(Imm2, regSize); + return Imm2Enc; +} + //===----------------------------------------------------------------------===// // Floating-point Immediates // Index: llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +define i8 @test1(i32 %a) { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w0, #0x3ffc00 +; CHECK-NEXT: and w8, w8, #0xffe007ff +; CHECK-NEXT: cmp w8, #1024 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %and = and i32 %a, 2098176 + %cmp = icmp eq i32 %and, 1024 + %conv = zext i1 %cmp to i8 + ret i8 %conv +} + +; This constant should not be split because it can be handled by one mov. +define i8 @test2(i32 %a) { +; CHECK-LABEL: test2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #135 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: cmp w8, #1024 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %and = and i32 %a, 135 + %cmp = icmp eq i32 %and, 1024 + %conv = zext i1 %cmp to i8 + ret i8 %conv +} + +; This constant should not be split because the split immediate is not valid +; bitmask immediate. +define i8 @test3(i32 %a) { +; CHECK-LABEL: test3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: movk w8, #33, lsl #16 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: cmp w8, #1024 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %and = and i32 %a, 2163712 + %cmp = icmp eq i32 %and, 1024 + %conv = zext i1 %cmp to i8 + ret i8 %conv +} + +define i8 @test4(i64 %a) { +; CHECK-LABEL: test4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and x8, x0, #0x3ffc00 +; CHECK-NEXT: and x8, x8, #0xffffffffffe007ff +; CHECK-NEXT: cmp x8, #1024 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %and = and i64 %a, 2098176 + %cmp = icmp eq i64 %and, 1024 + %conv = zext i1 %cmp to i8 + ret i8 %conv +} + +; This constant should not be split because it can be handled by one mov. +define i8 @test5(i64 %a) { +; CHECK-LABEL: test5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #135 +; CHECK-NEXT: and x8, x0, x8 +; CHECK-NEXT: cmp x8, #1024 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %and = and i64 %a, 135 + %cmp = icmp eq i64 %and, 1024 + %conv = zext i1 %cmp to i8 + ret i8 %conv +} + +; This constant should not be split because the split immediate is not valid +; bitmask immediate. +define i8 @test6(i64 %a) { +; CHECK-LABEL: test6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: movk w8, #33, lsl #16 +; CHECK-NEXT: and x8, x0, x8 +; CHECK-NEXT: cmp x8, #1024 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %and = and i64 %a, 2163712 + %cmp = icmp eq i64 %and, 1024 + %conv = zext i1 %cmp to i8 + ret i8 %conv +} + +; The split bitmask immediates should be hoisted outside loop. +define void @test7(i64 %a, i64* noalias %src, i64* noalias %dst, i64 %n) { +; CHECK-LABEL: test7: +; CHECK: // %bb.0: // %loop.ph +; CHECK-NEXT: and x9, x0, #0x3ffc00 +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: and x9, x9, #0xffffffffffe007ff +; CHECK-NEXT: b .LBB6_2 +; CHECK-NEXT: .LBB6_1: // %for.inc +; CHECK-NEXT: // in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, x3 +; CHECK-NEXT: b.gt .LBB6_4 +; CHECK-NEXT: .LBB6_2: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.hs .LBB6_1 +; CHECK-NEXT: // %bb.3: // %if.then +; CHECK-NEXT: // in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: lsl x10, x8, #3 +; CHECK-NEXT: ldr x11, [x1, x10] +; CHECK-NEXT: str x11, [x2, x10] +; CHECK-NEXT: b .LBB6_1 +; CHECK-NEXT: .LBB6_4: // %exit +; CHECK-NEXT: ret +loop.ph: + br label %loop + +loop: + %iv = phi i64 [ %inc, %for.inc ], [ 0, %loop.ph ] + %and = and i64 %a, 2098176 + %cmp = icmp ult i64 %iv, %and + br i1 %cmp, label %if.then, label %if.else + +if.then: + %src.arrayidx = getelementptr inbounds i64, i64* %src, i64 %iv + %val = load i64, i64* %src.arrayidx + %dst.arrayidx = getelementptr inbounds i64, i64* %dst, i64 %iv + store i64 %val, i64* %dst.arrayidx + br label %for.inc + +if.else: + br label %for.inc + +for.inc: + %inc = add nuw nsw i64 %iv, 1 + %cond = icmp sgt i64 %inc, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} Index: llvm/test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -370,9 +370,8 @@ ; CHECK-NEXT: cmp w8, #37 ; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: lsl x8, x8, xzr -; CHECK-NEXT: mov x9, #31 -; CHECK-NEXT: movk x9, #48, lsl #32 -; CHECK-NEXT: and x8, x8, x9 +; CHECK-NEXT: and x8, x8, #0x3fffffffff +; CHECK-NEXT: and x8, x8, #0xfffffff00000001f ; CHECK-NEXT: ccmp x8, #0, #4, ls ; CHECK-NEXT: b.eq LBB11_2 ; CHECK-NEXT: ; %bb.1: ; %if.end85 Index: llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll =================================================================== --- llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -245,10 +245,9 @@ define i32 @n0_badconstmask(i32 %x, i32 %y) { ; CHECK-LABEL: n0_badconstmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #256 -; CHECK-NEXT: movk w9, #65280, lsl #16 +; CHECK-NEXT: and w9, w1, #0xffffff00 ; CHECK-NEXT: and w8, w0, #0xffff00 -; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: and w9, w9, #0xff0001ff ; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %mx = and i32 %x, 16776960