diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp @@ -239,6 +239,80 @@ return true; } +static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) { + uint64_t NumOnes = llvm::countTrailingOnes(V >> StartPosition); + + uint64_t UnshiftedOnes; + if (NumOnes == 64) { + UnshiftedOnes = ~0ULL; + } else { + UnshiftedOnes = (1ULL << NumOnes) - 1; + } + return UnshiftedOnes << StartPosition; +} + +static uint64_t rotl(uint64_t n, uint64_t d) { + return (n << d) | (n >> (64 - d)); +} + +static uint64_t rotr(uint64_t n, uint64_t d) { + return (n >> d) | (n << (64 - d)); +} + +static uint64_t MaximallyReplicateSubImmediate(uint64_t V, uint64_t Subset) { + uint64_t Result = Subset; + + // 64, 32, 16, 8, 4, 2 + for (uint64_t i = 0; i < 6; ++i) { + uint64_t Rotation = 1 << (6 - i); + uint64_t Closure = Result | rotl(Result, Rotation); + if (Closure != (Closure & V)) { + break; + } + Result = Closure; + } + + return Result; +} + +// Attempt to expand an immediate as a pair of ORRs of logical immediates. +static bool tryOrrOfLogicalImmediates(uint64_t UImm, + SmallVectorImpl &Insn) { + if (UImm == 0 || ~UImm == 0) return false; + + uint32_t InitialTrailingOnes = llvm::countLeadingOnes(UImm); + uint64_t RemainingBits = rotr(UImm, InitialTrailingOnes); + + // Find the first set bit. + uint32_t Position = llvm::countTrailingZeros(RemainingBits); + + // Get the first run of set bits. + uint64_t FirstRun = GetRunOfOnesStartingAt(RemainingBits, Position); + + // Replicate the run as many times as possible, as long as the bits are set in + // RemainingBits. + uint64_t MaximalImm = MaximallyReplicateSubImmediate(RemainingBits, FirstRun); + + // Remove all bits that can are set by this mask. + RemainingBits &= ~MaximalImm; + if (!RemainingBits) return false; + + uint64_t Encoding1, Encoding2; + bool Imm1Success = AArch64_AM::processLogicalImmediate( + rotl(MaximalImm, InitialTrailingOnes), 64, Encoding1); + bool Imm2Success = AArch64_AM::processLogicalImmediate( + rotl(RemainingBits, InitialTrailingOnes), 64, Encoding2); + + if (Imm1Success && Imm2Success) { + // Create the ORR-immediate instructions. + Insn.push_back({AArch64::ORRXri, 0, Encoding1}); + Insn.push_back({AArch64::ORRXri, 1, Encoding2}); + return true; + } + + return false; +} + /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a /// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions. static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize, @@ -372,6 +446,9 @@ } } + // Attempt to use a sequence of two ORR-immediate instructions. + if (tryOrrOfLogicalImmediates(Imm, Insn)) return; + // FIXME: Add more two-instruction sequences. // Three instruction sequences. diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -148,10 +148,22 @@ case AArch64::ORRWri: case AArch64::ORRXri: - MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) - .add(MI.getOperand(0)) - .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) - .addImm(I->Op2)); + if (I->Op1 == 0) { + MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) + .add(MI.getOperand(0)) + .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) + .addImm(I->Op2)); + } else { + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(DstIsDead && LastItem) | + RenamableState) + .addReg(DstReg) + .addImm(I->Op2)); + } break; case AArch64::MOVNWi: case AArch64::MOVNXi: diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll --- a/llvm/test/CodeGen/AArch64/arm64-movi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll @@ -326,9 +326,8 @@ define i64 @orr_movk16() nounwind { ; CHECK-LABEL: orr_movk16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #36028659580010496 -; CHECK-NEXT: movk x0, #65534 -; CHECK-NEXT: movk x0, #32767, lsl #16 +; CHECK-NEXT: mov x0, #2147483646 +; CHECK-NEXT: orr x0, x0, #0x7fffe000000000 ; CHECK-NEXT: ret ret i64 36028661727494142 } @@ -399,9 +398,8 @@ define i64 @orr_orr_64() nounwind { ; CHECK-LABEL: orr_orr_64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #4611545280939032576 -; CHECK-NEXT: movk x0, #61440 -; CHECK-NEXT: movk x0, #8191, lsl #16 +; CHECK-NEXT: mov x0, #536866816 +; CHECK-NEXT: orr x0, x0, #0x3fff800000000000 ; CHECK-NEXT: ret ret i64 4611545281475899392 }