diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp @@ -374,7 +374,276 @@ } } - // FIXME: Add more two-instruction sequences. + // Check if we can use one instruction to load the low half, and a MOVK + // to load the high half. + if (getChunk(UImm, 2) == 0 || getChunk(UImm, 3) == 0) { + uint64_t Low32Imm = UImm & 0xFFFFFFFFULL; + // 32-bit MOVN followed by MOVK. + if (getChunk(UImm, 0) == Mask || getChunk(UImm, 1) == Mask) { + // Create the MOVN instruction. + unsigned Imm16_Low = getChunk(UImm, 0); + unsigned Shift_Low = 0; + if (Imm16_Low == Mask) { + Imm16_Low = getChunk(UImm, 1); + Shift_Low = 16; + } + Insn.push_back({AArch64::MOVNWi, Imm16_Low, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift_Low)}); + + // Create the MOVK instruction. + unsigned Imm16_High = getChunk(UImm, 2); + unsigned Shift_High = 32; + if (Imm16_High == 0) { + Imm16_High = getChunk(UImm, 3); + Shift_High = 48; + } + Insn.push_back({AArch64::MOVKXi, Imm16_High, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift_High)}); + return; + } + + // 32-bit ORR followed by MOVK. + if (AArch64_AM::processLogicalImmediate(Low32Imm, 32, Encoding)) { + // Create the ORR-immediate instruction. + Insn.push_back({AArch64::ORRWri, 0, Encoding}); + + // Create the MOVK instruction. + unsigned Imm16 = getChunk(UImm, 2); + unsigned Shift = 32; + if (Imm16 == 0) { + Imm16 = getChunk(UImm, 3); + Shift = 48; + } + Insn.push_back({AArch64::MOVKXi, Imm16, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)}); + return; + } + } + + // Try 32-bit MOVN followed by 64-bit ORR. + // + // We try to construct the ORR immediate in two different ways: either it + // only sets bits in the high half, or it sets the same bits in both halves. + if (getChunk(UImm, 0) == Mask || getChunk(UImm, 1) == Mask) { + uint64_t HighHalf = UImm & 0xFFFFFFFF00000000; + uint64_t HighReplicate = HighHalf | (HighHalf >> 32); + if (AArch64_AM::processLogicalImmediate(HighHalf, BitSize, Encoding) || + (UImm == (UImm | HighReplicate) && + AArch64_AM::processLogicalImmediate(HighReplicate, BitSize, + Encoding))) { + // Create the MOVN instruction. + unsigned Imm16_Low = getChunk(UImm, 0); + unsigned Shift_Low = 0; + if (Imm16_Low == Mask) { + Imm16_Low = getChunk(UImm, 1); + Shift_Low = 16; + } + Insn.push_back({AArch64::MOVNWi, Imm16_Low, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift_Low)}); + + // Create the ORR instruction + Insn.push_back({AArch64::ORRXri, 0, Encoding}); + return; + } + } + + // Try 32-bit MOVN followed by 64-bit EOR. + // + // We try to construct the EOR immediate in two different ways: either it's + // a bitmask which inverts bits from the MOVN, or it inverts the same bits + // in the top and bottom halves. (A bitmask which doesn't invert any bits + // in the low half is redundant with MOVN+ORR.) + { + uint64_t HighHalf = UImm & 0xFFFFFFFF00000000; + uint64_t HighReplicate = HighHalf | (HighHalf >> 32); + uint64_t XorImm = UImm ^ HighReplicate; + if ((getChunk(XorImm, 0) == Mask || getChunk(XorImm, 1) == Mask) && + AArch64_AM::processLogicalImmediate(HighReplicate, BitSize, + Encoding)) { + // Create the MOVN instruction. + unsigned Imm16_Low = getChunk(XorImm, 0) ^ 0xFFFF; + unsigned Shift_Low = 0; + if (Imm16_Low == 0) { + Imm16_Low = getChunk(XorImm, 1) ^ 0xFFFF; + Shift_Low = 16; + } + Insn.push_back({AArch64::MOVNWi, Imm16_Low, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift_Low)}); + + // Create the EOR instruction + Insn.push_back({AArch64::EORXri, 0, Encoding}); + return; + } + } + { + uint64_t XorImm1 = UImm ^ ((UImm & 0xFFFF) | 0xFFFF0000); + if (AArch64_AM::processLogicalImmediate(XorImm1, BitSize, Encoding)) { + unsigned Imm16_Low = getChunk(UImm, 0) ^ 0xFFFF; + unsigned Shift_Low = 0; + Insn.push_back({AArch64::MOVNWi, Imm16_Low, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift_Low)}); + + // Create the EOR instruction + Insn.push_back({AArch64::EORXri, 0, Encoding}); + return; + } + uint64_t XorImm2 = UImm ^ ((UImm & 0xFFFF0000) | 0xFFFF); + if (AArch64_AM::processLogicalImmediate(XorImm2, BitSize, Encoding)) { + unsigned Imm16_Low = getChunk(UImm, 1) ^ 0xFFFF; + unsigned Shift_Low = 16; + Insn.push_back({AArch64::MOVNWi, Imm16_Low, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift_Low)}); + + // Create the EOR instruction + Insn.push_back({AArch64::EORXri, 0, Encoding}); + return; + } + } + + // Try 64-bit ORR+ORR immediate. + { + auto IsTwoMasks = [](uint64_t I) { + return isShiftedMask_64(I & ((I | (I - 1)) + 1)); + }; + unsigned Size = 64; + uint64_t OrrImm = UImm; + do { + // Check for two bitmasks with the same width. + // FIXME: Handle the case where one of the masks wraps: + // IsTwoMasks(~SignExtend64(OrrImm, Size)) + if (IsTwoMasks(OrrImm)) { + uint64_t HighMask = OrrImm & ((OrrImm | (OrrImm - 1)) + 1); + uint64_t LowMask = OrrImm ^ HighMask; + assert(isShiftedMask_64(LowMask) && isShiftedMask_64(HighMask) && + OrrImm == (LowMask | HighMask)); + while (Size < 64) { + HighMask |= HighMask << Size; + LowMask |= LowMask << Size; + Size *= 2; + } + // Create the low ORR instruction + bool Result = AArch64_AM::processLogicalImmediate(LowMask, BitSize, + Encoding); + assert(Result); + (void)Result; + Insn.push_back({AArch64::ORRXri, 0, Encoding}); + + // Create the high ORR instruction + bool Result2 = AArch64_AM::processLogicalImmediate(HighMask, BitSize, + Encoding); + assert(Result2); + (void)Result2; + Insn.push_back({AArch64::ORRXri, 0, Encoding}); + return; + } + Size /= 2; + uint64_t OrrHalf = (1ULL << Size) - 1; + if ((OrrImm >> Size) != (OrrImm & OrrHalf)) + break; + OrrImm >>= Size; + } while (Size > 4); + // TODO: Look for a bitmask plus a narrower pattern (0xAFFFFFAAAAAAAAAA) + } + + // MOVZ+ADD, no carry + // Only check for the case where the MOVZ sets bits in the high half, + // and the ADD immediate is shifted; otherwise, we emit MOVZ+MOVK. + // MOVZ+ADD with carry is redundant with either MOVZ+MOVK or ORR+ADD. + if (!(UImm & ~0xFFFF000000FFF000) || + !(UImm & ~0x0000FFFF00FFF000)) { + unsigned Shift = (UImm & 0x0000FFFF00000000) ? 32 : 48; + unsigned Imm16 = (UImm >> Shift) & 0xFFFF; + Insn.push_back({AArch64::MOVZXi, Imm16, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)}); + Insn.push_back({AArch64::ADDXri, (UImm >> 12) & 0xFFF, 12}); + return; + } + + // MOVN+SUB, no carry + // Only check for the case where the MOVN sets bits in the high half, + // and the SUB immediate is shifted; otherwise, we emit MOVN+MOVK. + // MOVN+SUB with carry is redundant with either MOVN+MOVK or ORR+SUB. + if (!(~UImm & ~0xFFFF000000FFF000) || + !(~UImm & ~0x0000FFFF00FFF000)) { + unsigned Shift = (~UImm & 0x0000FFFF00000000) ? 32 : 48; + unsigned Imm16 = (~UImm >> Shift) & 0xFFFF; + Insn.push_back({AArch64::MOVNXi, Imm16, + AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)}); + Insn.push_back({AArch64::SUBXri, 0xFFF - ((UImm >> 12) & 0xFFF), 12}); + return; + } + + // ORR+ADD/SUB, no carry out of the bottom 24 bits. + { + uint64_t ReplaceZeros = UImm & 0xFFFFFFFFFF000FFF; + uint64_t ReplaceOnes = UImm | 0x0000000000FFF000; + uint64_t ReplaceRepl = (UImm & ~0x00FFF000) | ((UImm >> 32) & 0x00FFF000); + uint64_t OrrImm = 0; + if (AArch64_AM::processLogicalImmediate(ReplaceZeros, BitSize, Encoding)) + OrrImm = ReplaceZeros; + else if (AArch64_AM::processLogicalImmediate(ReplaceOnes, BitSize, + Encoding)) + OrrImm = ReplaceOnes; + else if (AArch64_AM::processLogicalImmediate(ReplaceRepl, BitSize, + Encoding)) + OrrImm = ReplaceRepl; + if (OrrImm) { + // Create ORR + Insn.push_back({AArch64::ORRXri, 0, Encoding}); + + // Create either ADD or SUB, depending on the difference between the + // result of the ORR and the expected value. + if ((int64_t)(UImm - OrrImm) < 0) + Insn.push_back({AArch64::SUBXri, ((OrrImm - UImm) >> 12) & 0xFFF, 12}); + else + Insn.push_back({AArch64::ADDXri, ((UImm - OrrImm) >> 12) & 0xFFF, 12}); + return; + } + } + +#if 0 + // TODO: ORR+ADD/SUB with carry, top and bottom half of ORR are equal. + // We divide this into two checks, based on whether or not the carry + // propagates into the top half. + // (0xFFFEFFFF00FEEFFE == 0xFFFEFFFEFFFEFFFE+0000000000FFF000) + // HalfNoCarry = UImm >> 32; + // HalfCarry = (uint32_t)(HalfNoCarry + 1); + // ImmNoCarry = HalfNoCarry | (HalfNoCarry << 32); + // ImmCarry = HalfCarry | (HalfCarry << 32); + + // TODO: ORR+ADD/SUB with carry, ORR is a bitmask. + // (e.g. 0x1000000000ABCFFF == 0x0FFFFFFFFFFFFFFF + 0x00ABD000) + // (e.g. 0xFFFFFFFEFFEFCFFF == 0xFFFFFFFF0000FFFF - 0x00113000) + // For add, subtract 0x01000000, mask out 0x00FFF000, check the + // result is a mask or inverted mask. For sub, add 0x01000000. + + // TODO: MOVZ+ADD with carry (0xFFFE0000 + 0x00123000) + // XXX is this redundant with ORR+ADD/SUB with carry? + if (!(UImm & 0xFFF) && UImm - 0xFFFF0000 <= 0x00FFF000) { + uint64_t AddAmt = (UImm - 0xFFFF0000) >> 12; + // Emit mov x0, #0xFFFF0000 + // Emit add x0, x0, #0x00XXX000 + } + + // TODO: MOVN+SUB with carry (0xFFFFFFFF0001FFFF - 0x00123000) + // XXX is this redundant with ORR+ADD/SUB with carry? + if ((UImm & 0xFFF) == 0xFFF && UImm + 0x00FFF000 >= 0xFFFFFFFF0000FFFF) { + // Emit mov x0, #0xFFFFFFFF0000FFFF + // Emit sub x0, x0, ??? + } + // TODO: MOVZ+SUB (with carry) (0x1234000000000000 - 0x00123000): + // add 0x01000000, check that only bits 0xFFFF000000FFF000 are set. + // TODO: MOVN+ADD (with carry) + + // TODO: Try 64-bit ORR followed by AND. (0x0AAAA0000AAAA000) + // TODO: Try 64-bit ORR followed by EOR (0xAA55555AAAAAAAAA) + // TODO: Try 32-bit ORR followed by 64-bit ORR (0x003FFFE0AAAAAAAA) + // TODO: Try 32-bit ORR followed by 64-bit EOR (0x000FFFFF11EEEEEE) + // TODO: Try any MOV followed by ROR imm (0xFF000000001234FF) + // TODO: Try any MOV followed by SBFM/UBFM (0xFFFAAAAAAA000000) + // TODO: Try misc slow sequences with optsize: MOV+BFM, + // MOV+MLA, MOV+ORR/EOR/ADD shifted register. +#endif // Three instruction sequences. // diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -134,38 +134,67 @@ { default: llvm_unreachable("unhandled!"); break; + case AArch64::EORXri: case AArch64::ORRWri: - case AArch64::ORRXri: + case AArch64::ORRXri: { + // If BitSize is 64, but we're using a 32-bit instruction, fix the + // destination register. + Register CurDst = DstReg; + if (BitSize == 64 && I->Opcode == AArch64::ORRWri) { + auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + CurDst = TRI->getSubReg(DstReg, AArch64::sub_32); + } + // The source is xzr/wzr if this is the first instruction of the + // sequence; otherwise, the source is the value produced by the + // previous instruction. + Register CurSrc = CurDst; + if (I == Insn.begin()) + CurSrc = I->Opcode == AArch64::ORRWri ? AArch64::WZR : AArch64::XZR; + bool DstIsDead = MI.getOperand(0).isDead(); MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) - .add(MI.getOperand(0)) - .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) + .addReg(CurDst, RegState::Define | + getDeadRegState(DstIsDead && LastItem) | + RenamableState) + .addReg(CurSrc) .addImm(I->Op2)); break; + } case AArch64::MOVNWi: case AArch64::MOVNXi: case AArch64::MOVZWi: case AArch64::MOVZXi: { + // If BitSize is 64, but we're using a 32-bit instruction, fix the + // destination register. + assert((BitSize == 32 || I->Opcode != AArch64::MOVZWi) && + "MOVZWi should only be used to produce 32-bit values"); + Register CurDst = DstReg; + if (BitSize == 64 && I->Opcode == AArch64::MOVNWi) { + auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + CurDst = TRI->getSubReg(DstReg, AArch64::sub_32); + } bool DstIsDead = MI.getOperand(0).isDead(); MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) - .addReg(DstReg, RegState::Define | + .addReg(CurDst, RegState::Define | getDeadRegState(DstIsDead && LastItem) | RenamableState) .addImm(I->Op1) .addImm(I->Op2)); - } break; + break; + } + case AArch64::ADDXri: + case AArch64::SUBXri: case AArch64::MOVKWi: case AArch64::MOVKXi: { - Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) - .addReg(DstReg, - RegState::Define | + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead && LastItem) | RenamableState) .addReg(DstReg) .addImm(I->Op1) .addImm(I->Op2)); - } break; + break; + } } } transferImpOps(MI, MIBS.front(), MIBS.back()); diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll --- a/llvm/test/CodeGen/AArch64/arm64-movi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-eabi | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s ;==--------------------------------------------------------------------------== ; Tests for MOV-immediate implemented with ORR-immediate. @@ -132,15 +132,15 @@ ret i64 -279156097024 } -; FIXME: prefer "mov w0, #-63; movk x0, #17, lsl #32" -define i64 @mvn32_pattern_2() nounwind { -; CHECK-LABEL: mvn32_pattern_2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #65473 -; CHECK-NEXT: movk x0, #65535, lsl #16 -; CHECK-NEXT: movk x0, #17, lsl #32 +; rdar://13944082 +define i64 @g() nounwind { +; CHECK-LABEL: g: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x0, #2 +; CHECK-NEXT: movk x0, #65535, lsl #48 ; CHECK-NEXT: ret - ret i64 77309411265 +entry: + ret i64 -281474976710654 } ;==--------------------------------------------------------------------------== @@ -164,6 +164,16 @@ ret i64 -176093720012 } +define i64 @movn_movk_movk() nounwind { +; CHECK-LABEL: movn_movk_movk: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-65281 +; CHECK-NEXT: movk x0, #57005, lsl #16 +; CHECK-NEXT: movk x0, #65520, lsl #48 +; CHECK-NEXT: ret + ret i64 -4222125209747201 +} + ;==--------------------------------------------------------------------------== ; Tests for ORR with MOVK. ;==--------------------------------------------------------------------------== @@ -264,16 +274,6 @@ ret i64 1152921504047824640 } -define i64 @orr_movk11() nounwind { -; CHECK-LABEL: orr_movk11: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #-65281 -; CHECK-NEXT: movk x0, #57005, lsl #16 -; CHECK-NEXT: movk x0, #65520, lsl #48 -; CHECK-NEXT: ret - ret i64 -4222125209747201 -} - define i64 @orr_movk12() nounwind { ; CHECK-LABEL: orr_movk12: ; CHECK: // %bb.0: @@ -293,17 +293,6 @@ ret i64 -2401245434149282131 } -; rdar://13944082 -define i64 @g() nounwind { -; CHECK-LABEL: g: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x0, #2 -; CHECK-NEXT: movk x0, #65535, lsl #48 -; CHECK-NEXT: ret -entry: - ret i64 -281474976710654 -} - define i64 @orr_movk14() nounwind { ; CHECK-LABEL: orr_movk14: ; CHECK: // %bb.0: @@ -321,29 +310,17 @@ ; CHECK-NEXT: ret ret i64 549621596159 } - -; FIXME: prefer "mov x0, #2147483646; orr x0, x0, #36028659580010496" define i64 @orr_movk16() nounwind { ; CHECK-LABEL: orr_movk16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #36028659580010496 -; CHECK-NEXT: movk x0, #65534 -; CHECK-NEXT: movk x0, #32767, lsl #16 -; CHECK-NEXT: ret - ret i64 36028661727494142 -} - -define i64 @orr_movk17() nounwind { -; CHECK-LABEL: orr_movk17: -; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #-1099511627776 ; CHECK-NEXT: movk x0, #65280, lsl #16 ; CHECK-NEXT: ret ret i64 -1095233437696 } -define i64 @orr_movk18() nounwind { -; CHECK-LABEL: orr_movk18: +define i64 @orr_movk17() nounwind { +; CHECK-LABEL: orr_movk17: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #137438887936 ; CHECK-NEXT: movk x0, #65473 @@ -362,88 +339,213 @@ ret i64 1103823438080 } -; FIXME: prefer "mov w0, #-1431655766; movk x0, #9, lsl #32" -define i64 @movn_movk() nounwind { -; CHECK-LABEL: movn_movk: +; FIXME: prefer "mov x0, #-6148914691236517206; orr x0, x0, #0x0FFFFF0000000000" +define i64 @orr_64_orr_8() nounwind { +; CHECK-LABEL: orr_64_orr_8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-6148914691236517206 +; CHECK-NEXT: movk x0, #65450, lsl #32 +; CHECK-NEXT: movk x0, #45055, lsl #48 +; CHECK-NEXT: ret + ret i64 -5764607889538110806 +} + +;==--------------------------------------------------------------------------== +; Tests for 32-bit MOVN with 64-bit MOVK. +;==--------------------------------------------------------------------------== + +define i64 @mvn32_pattern() nounwind { +; CHECK-LABEL: mvn32_pattern: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #43690 -; CHECK-NEXT: movk x0, #43690, lsl #16 +; CHECK-NEXT: mov w0, #-65474 +; CHECK-NEXT: movk x0, #17, lsl #32 +; CHECK-NEXT: ret + ret i64 77309411265 +} + +;==--------------------------------------------------------------------------== +; Tests for 32-bit ORR with 64-bit MOVK. +;==--------------------------------------------------------------------------== + +define i64 @orr32_movk() nounwind { +; CHECK-LABEL: orr32_movk: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #-1431655766 ; CHECK-NEXT: movk x0, #9, lsl #32 ; CHECK-NEXT: ret ret i64 41518017194 } -; FIXME: prefer "mov w0, #-13690; orr x0, x0, #0x1111111111111111" +;==--------------------------------------------------------------------------== +; Tests for 32-bit MOVN with 64-bit ORR. +;==--------------------------------------------------------------------------== + define i64 @movn_orr() nounwind { ; CHECK-LABEL: movn_orr: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #-51847 -; CHECK-NEXT: movk x0, #4369, lsl #32 -; CHECK-NEXT: movk x0, #4369, lsl #48 +; CHECK-NEXT: mov w0, #-13690 +; CHECK-NEXT: orr x0, x0, #0x1111111111111111 ; CHECK-NEXT: ret ret i64 1229782942255887737 } -; FIXME: prefer "mov w0, #-305397761; eor x0, x0, #0x3333333333333333" +define i64 @movn_orr_2() nounwind { +; CHECK-LABEL: movn_orr_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #559087615 +; CHECK-NEXT: orr x0, x0, #0xfff0000000000 +; CHECK-NEXT: ret + ret i64 4502503851687935 +} + +;==--------------------------------------------------------------------------== +; Tests for 32-bit MOVN with 64-bit EOR. +;==--------------------------------------------------------------------------== + define i64 @movn_eor() nounwind { ; CHECK-LABEL: movn_eor: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #3689348814741910323 -; CHECK-NEXT: movk x0, #52428 -; CHECK-NEXT: movk x0, #8455, lsl #16 +; CHECK-NEXT: mov w0, #305463295 +; CHECK-NEXT: eor x0, x0, #0x3333333333333333 ; CHECK-NEXT: ret ret i64 3689348814437076172 } -; FIXME: prefer "mov x0, #536866816; orr x0, x0, #0x3fff800000000000" +define i64 @movn_eor_2() nounwind { +; CHECK-LABEL: movn_eor_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #305463295 +; CHECK-NEXT: eor x0, x0, #0xff000000000000ff +; CHECK-NEXT: ret + ret i64 -72057593732464896 +} + +define i64 @movn_eor_3() nounwind { +; CHECK-LABEL: movn_eor_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #-60876 +; CHECK-NEXT: eor x0, x0, #0xffff000000 +; CHECK-NEXT: ret + ret i64 1095233376820 +} + +;==--------------------------------------------------------------------------== +; Tests for two 64-bit ORRs. +;==--------------------------------------------------------------------------== + define i64 @orr_orr_64() nounwind { ; CHECK-LABEL: orr_orr_64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #4611545280939032576 -; CHECK-NEXT: movk x0, #61440 -; CHECK-NEXT: movk x0, #8191, lsl #16 +; CHECK-NEXT: mov x0, #2147483646 +; CHECK-NEXT: orr x0, x0, #0x7fffe000000000 +; CHECK-NEXT: ret + ret i64 36028661727494142 +} + +define i64 @orr_orr_64_2() nounwind { +; CHECK-LABEL: orr_orr_64_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #536866816 +; CHECK-NEXT: orr x0, x0, #0x3fff800000000000 ; CHECK-NEXT: ret ret i64 4611545281475899392 } -; FIXME: prefer "mov x0, #558551907040256; orr x0, x0, #0x1000100010001000" define i64 @orr_orr_32() nounwind { ; CHECK-LABEL: orr_orr_32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #-287953294993589248 -; CHECK-NEXT: movk x0, #7169, lsl #16 -; CHECK-NEXT: movk x0, #7169, lsl #48 +; CHECK-NEXT: mov x0, #558551907040256 +; CHECK-NEXT: orr x0, x0, #0x1c0000001c000000 ; CHECK-NEXT: ret ret i64 2018171185438784512 } -; FIXME: prefer "mov x0, #281479271743489; orr x0, x0, #0x1000100010001000" define i64 @orr_orr_16() nounwind { ; CHECK-LABEL: orr_orr_16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #4097 -; CHECK-NEXT: movk x0, #4097, lsl #16 -; CHECK-NEXT: movk x0, #4097, lsl #32 -; CHECK-NEXT: movk x0, #4097, lsl #48 +; CHECK-NEXT: mov x0, #281479271743489 +; CHECK-NEXT: orr x0, x0, #0x1000100010001000 ; CHECK-NEXT: ret ret i64 1153220576333074433 } -; FIXME: prefer "mov x0, #144680345676153346; orr x0, x0, #0x1818181818181818" define i64 @orr_orr_8() nounwind { ; CHECK-LABEL: orr_orr_8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #6682 -; CHECK-NEXT: movk x0, #6682, lsl #16 -; CHECK-NEXT: movk x0, #6682, lsl #32 -; CHECK-NEXT: movk x0, #6682, lsl #48 +; CHECK-NEXT: mov x0, #144680345676153346 +; CHECK-NEXT: orr x0, x0, #0x1818181818181818 ; CHECK-NEXT: ret ret i64 1880844493789993498 } -; FIXME: prefer "mov x0, #-6148914691236517206; orr x0, x0, #0x0FFFFF0000000000" -define i64 @orr_64_orr_8() nounwind { -; CHECK-LABEL: orr_64_orr_8: +;==--------------------------------------------------------------------------== +; Tests for 64-bit ORR followed by ADD/SUB +;==--------------------------------------------------------------------------== +; ORR followed by ADD +; 0xFFFEFFFF00FEEFFE == 0xFFFEFFFEFFFEFFFE + 0x0000000000FFF000 +define i64 @orr_add_1() nounwind { +; CHECK-LABEL: orr_add_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4098 +; CHECK-NEXT: movk x0, #254, lsl #16 +; CHECK-NEXT: movk x0, #65534, lsl #48 +; CHECK-NEXT: ret + ret i64 -281479254970370 +} + +; ORR followed by ADD, variant 2 +; 0x000000FFFF123000 == 0x000000FFFF000000 + 0x0000000000123000 +define i64 @orr_add_2() nounwind { +; CHECK-LABEL: orr_add_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #1099494850560 +; CHECK-NEXT: add x0, x0, #291, lsl #12 // =1191936 +; CHECK-NEXT: ret + ret i64 1099496042496 +} + +; ORR followed by ADD, variant 3 +; 0xFFFEFFFEFFFF0FFD = 0xFFFEFFFEFFFEFFFE + 0x0000000000000FFF +define i64 @orr_add_3() nounwind { +; CHECK-LABEL: orr_add_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-61443 +; CHECK-NEXT: movk x0, #65534, lsl #32 +; CHECK-NEXT: movk x0, #65534, lsl #48 +; CHECK-NEXT: ret + ret i64 -281479271739395 +} + +; ORR followed by ADD, variant 4 +; 0xAAAAAAAAAABCDAAA = 0xAAAAAAAAAAAAAAAA + 0x0000000000123000 +define i64 @orr_add_4() nounwind { +; CHECK-LABEL: orr_add_4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-6148914691236517206 +; CHECK-NEXT: add x0, x0, #291, lsl #12 // =1191936 +; CHECK-NEXT: ret + ret i64 -6148914691235325270 +} + +; ORR followed by sub +; 0xAAAAAAAAAA987AAA = 0xAAAAAAAAAAAAAAAA - 0x123000 +define i64 @orr_sub_1() nounwind { +; CHECK-LABEL: orr_sub_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-6148914691236517206 +; CHECK-NEXT: sub x0, x0, #291, lsl #12 // =1191936 +; CHECK-NEXT: ret + ret i64 -6148914691237709142 +} + +;==--------------------------------------------------------------------------== +; Misc immediates which could be materialized in two instructions, but not +; implemented yet. +;==--------------------------------------------------------------------------== + +; ORR followed by ORR (0xAFFFFFAAAAAAAAAA) +define i64 @todo_1() nounwind { +; CHECK-LABEL: todo_1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #-6148914691236517206 ; CHECK-NEXT: movk x0, #65450, lsl #32 @@ -451,3 +553,200 @@ ; CHECK-NEXT: ret ret i64 -5764607889538110806 } + +; ORR followed by AND (0x0AAAA0000AAAA000) +define i64 @todo_2() nounwind { +; CHECK-LABEL: todo_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #40960 +; CHECK-NEXT: movk x0, #2730, lsl #16 +; CHECK-NEXT: movk x0, #40960, lsl #32 +; CHECK-NEXT: movk x0, #2730, lsl #48 +; CHECK-NEXT: ret + ret i64 768602608459489280 +} + +; ORR followed by EOR (0xAA55555AAAAAAAAA) +define i64 @todo_3() nounwind { +; CHECK-LABEL: todo_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-6148914691236517206 +; CHECK-NEXT: movk x0, #21850, lsl #32 +; CHECK-NEXT: movk x0, #43605, lsl #48 +; CHECK-NEXT: ret + ret i64 -6172933866342667606 +} + +; 32-bit ORR followed by 64-bit ORR (0x003FFFE0AAAAAAAA) +define i64 @todo_5() nounwind { +; CHECK-LABEL: todo_5: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-6148914691236517206 +; CHECK-NEXT: movk x0, #65504, lsl #32 +; CHECK-NEXT: movk x0, #63, lsl #48 +; CHECK-NEXT: ret + ret i64 18014263933840042 +} + +; 32-bit ORR followed by 64-bit EOR (0x000FFFFF11EEEEEE) +define i64 @todo_6() nounwind { +; CHECK-LABEL: todo_6: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4370 +; CHECK-NEXT: movk x0, #4590, lsl #16 +; CHECK-NEXT: movk x0, #15, lsl #48 +; CHECK-NEXT: ret + ret i64 4503595633274606 +} + +; 32-but MOVN followed by 64-bit ROR (0xFF000000001234FF) +define i64 @todo_7() nounwind { +; CHECK-LABEL: todo_7: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #13567 +; CHECK-NEXT: movk x0, #18, lsl #16 +; CHECK-NEXT: movk x0, #65280, lsl #48 +; CHECK-NEXT: ret + ret i64 -72057594036734721 +} + +; 32-bit ORR followed by SBFM (0xFFFAAAAAAA000000) +define i64 @todo_8() nounwind { +; CHECK-LABEL: todo_8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #2852126720 +; CHECK-NEXT: movk x0, #43690, lsl #32 +; CHECK-NEXT: movk x0, #65530, lsl #48 +; CHECK-NEXT: ret + ret i64 -1501199886974976 +} + +; ORR imm + ORR shifted register +; 0x233333331 = 0x11111111 | (0x11111111 << 5) +define i64 @todo_9() nounwind { +; CHECK-LABEL: todo_9: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #13105 +; CHECK-NEXT: movk x0, #13107, lsl #16 +; CHECK-NEXT: movk x0, #2, lsl #32 +; CHECK-NEXT: ret + ret i64 9448928049 +} + +; ORR imm + EOR shifted register +; 0x1EEEEEEEF0 = 0xF0F0F0F0 ^ (0xF0F0F0F0 << 5) +define i64 @todo_10() nounwind { +; CHECK-LABEL: todo_10: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #61168 +; CHECK-NEXT: movk x0, #61166, lsl #16 +; CHECK-NEXT: movk x0, #30, lsl #32 +; CHECK-NEXT: ret + ret i64 132857655024 +} + +; MOVN + EOR shifted register +; 0x1F001D54B4 = 0xFFFF1234 ^ (0xFFFF1234 << 5) +define i64 @todo_11() nounwind { +; CHECK-LABEL: todo_11: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #21684 +; CHECK-NEXT: movk x0, #29, lsl #16 +; CHECK-NEXT: movk x0, #31, lsl #32 +; CHECK-NEXT: ret + ret i64 133145908404 +} + +; MOV + MADD +; 0x123456798765432 = 0x11111111 * 0x11111111 + 0x11111111 +define i64 @todo_12() nounwind { +; CHECK-LABEL: todo_12: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #21554 +; CHECK-NEXT: movk x0, #39030, lsl #16 +; CHECK-NEXT: movk x0, #17767, lsl #32 +; CHECK-NEXT: movk x0, #291, lsl #48 +; CHECK-NEXT: ret + ret i64 81985529464640562 +} + +; MOV+BFM: +; 0xF0F0F0F0F0787870 = (0xf0f0f0f0f0f0f0f0 & ~(0xFFFFFLL << 7)) | ((0xf0f0f0f0f0f0f0f0 & 0xFFFFFLL) << 7) +define i64 @todo_13() nounwind { +; CHECK-LABEL: todo_13: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-1085102592571150096 +; CHECK-NEXT: movk x0, #30832 +; CHECK-NEXT: movk x0, #61560, lsl #16 +; CHECK-NEXT: ret + ret i64 -1085102592579045264 +} + +; MOVZ+ADD +define i64 @todo_14() nounwind { +; CHECK-LABEL: todo_14: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #1311673391471656960 +; CHECK-NEXT: add x0, x0, #291, lsl #12 // =1191936 +; CHECK-NEXT: ret + ret i64 1311673391472848896 +} + +; MOVN+ADD +; 0xFFFF123500000122 = 0xFFFF1234FFFFFFFF + 0x0000000000000123 +define i64 @todo_15() nounwind { +; CHECK-LABEL: todo_15: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #290 +; CHECK-NEXT: movk x0, #4661, lsl #32 +; CHECK-NEXT: movk x0, #65535, lsl #48 +; CHECK-NEXT: ret + ret i64 -261456134143710 +} + +; MOVN+ADD +; 0xFFFF123500122FFF = 0xFFFF1234FFFFFFFF + 0x0000000000123000 +define i64 @todo_15a() nounwind { +; CHECK-LABEL: todo_15a: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-53249 +; CHECK-NEXT: movk x0, #18, lsl #16 +; CHECK-NEXT: movk x0, #4661, lsl #32 +; CHECK-NEXT: ret + ret i64 -261456132952065 +} + +; MOVN+ADD +; 0x1235000000122FFF = 0x1234FFFFFFFFFFFF + 0x0000000000123000 +define i64 @todo_15b() nounwind { +; CHECK-LABEL: todo_15b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #12287 +; CHECK-NEXT: movk x0, #18, lsl #16 +; CHECK-NEXT: movk x0, #4661, lsl #48 +; CHECK-NEXT: ret + ret i64 1311954866449559551 +} + +; MOVN+SUB +; 0x1234FFFFFFEDCFFF = 0x1234FFFFFFFFFFFF - 0x0000000000123000 +define i64 @todo_16() nounwind { +; CHECK-LABEL: todo_16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #1311954866448367615 +; CHECK-NEXT: sub x0, x0, #291, lsl #12 // =1191936 +; CHECK-NEXT: ret + ret i64 1311954866447175679 +} + +; MOVZ+SUB +; 0x1233FFFFFFEDD000 = 0x1234000000000000 - 0x0000000000123000 +define i64 @todo_17() nounwind { +; CHECK-LABEL: todo_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-12288 +; CHECK-NEXT: movk x0, #65517, lsl #16 +; CHECK-NEXT: movk x0, #4659, lsl #48 +; CHECK-NEXT: ret + ret i64 1311673391470465024 +}