Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -103,22 +103,6 @@ } } -/// Helper function which extracts the specified 16-bit chunk from a -/// 64-bit value. -static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { - assert(ChunkIdx < 4 && "Out of range chunk index specified!"); - - return (Imm >> (ChunkIdx * 16)) & 0xFFFF; -} - -/// Check whether the given 16-bit chunk replicated to full 64-bit width -/// can be materialized with an ORR instruction. -static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { - Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; - - return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding); -} - /// Check for identical 16-bit chunks within the constant and if so /// materialize them with a single ORR instruction. The remaining one or two /// 16-bit chunks will be materialized with MOVK instructions. @@ -130,121 +114,70 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const AArch64InstrInfo *TII) { - using CountMap = DenseMap; - - CountMap Counts; - - // Scan the constant and count how often every chunk occurs. - for (unsigned Idx = 0; Idx < 4; ++Idx) - ++Counts[getChunk(UImm, Idx)]; - - // Traverse the chunks to find one which occurs more than once. - for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); - Chunk != End; ++Chunk) { - const uint64_t ChunkVal = Chunk->first; - const unsigned Count = Chunk->second; - - uint64_t Encoding = 0; - - // We are looking for chunks which have two or three instances and can be - // materialized with an ORR instruction. - if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) - continue; - - const bool CountThree = Count == 3; - // Create the ORR-immediate instruction. - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) - .add(MI.getOperand(0)) - .addReg(AArch64::XZR) - .addImm(Encoding); - - const unsigned DstReg = MI.getOperand(0).getReg(); - const bool DstIsDead = MI.getOperand(0).isDead(); - - unsigned ShiftAmt = 0; - uint64_t Imm16 = 0; - // Find the first chunk not materialized with the ORR instruction. - for (; ShiftAmt < 64; ShiftAmt += 16) { - Imm16 = (UImm >> ShiftAmt) & 0xFFFF; - - if (Imm16 != ChunkVal) - break; - } + AArch64_AM::ReplicableChunk RC = AArch64_AM::replicableWithChunks(UImm); + if (!RC.hasValue()) + return false; + const unsigned Count = std::get<0>(RC.getValue()); + const uint64_t ChunkVal = std::get<1>(RC.getValue()); + const uint64_t Encoding = std::get<2>(RC.getValue()); - // Create the first MOVK instruction. - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) - .addReg(DstReg, - RegState::Define | getDeadRegState(DstIsDead && CountThree)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); + const bool CountThree = Count == 3; + // Create the ORR-immediate instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) + .add(MI.getOperand(0)) + .addReg(AArch64::XZR) + .addImm(Encoding); - // In case we have three instances the whole constant is now materialized - // and we can exit. - if (CountThree) { - transferImpOps(MI, MIB, MIB1); - MI.eraseFromParent(); - return true; - } + const unsigned DstReg = MI.getOperand(0).getReg(); + const bool DstIsDead = MI.getOperand(0).isDead(); - // Find the remaining chunk which needs to be materialized. - for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { - Imm16 = (UImm >> ShiftAmt) & 0xFFFF; + unsigned ShiftAmt = 0; + uint64_t Imm16 = 0; + // Find the first chunk not materialized with the ORR instruction. + for (; ShiftAmt < 64; ShiftAmt += 16) { + Imm16 = (UImm >> ShiftAmt) & 0xFFFF; - if (Imm16 != ChunkVal) - break; - } + if (Imm16 != ChunkVal) + break; + } - // Create the second MOVK instruction. - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); + // Create the first MOVK instruction. + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) + .addReg(DstReg, + RegState::Define | getDeadRegState(DstIsDead && CountThree)) + .addReg(DstReg) + .addImm(Imm16) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); - transferImpOps(MI, MIB, MIB2); + // In case we have three instances the whole constant is now materialized + // and we can exit. + if (CountThree) { + transferImpOps(MI, MIB, MIB1); MI.eraseFromParent(); return true; } - return false; -} + // Find the remaining chunk which needs to be materialized. + for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { + Imm16 = (UImm >> ShiftAmt) & 0xFFFF; -/// Check whether this chunk matches the pattern '1...0...'. This pattern -/// starts a contiguous sequence of ones if we look at the bits from the LSB -/// towards the MSB. -static bool isStartChunk(uint64_t Chunk) { - if (Chunk == 0 || Chunk == std::numeric_limits::max()) - return false; - - return isMask_64(~Chunk); -} - -/// Check whether this chunk matches the pattern '0...1...' This pattern -/// ends a contiguous sequence of ones if we look at the bits from the LSB -/// towards the MSB. -static bool isEndChunk(uint64_t Chunk) { - if (Chunk == 0 || Chunk == std::numeric_limits::max()) - return false; - - return isMask_64(Chunk); -} - -/// Clear or set all bits in the chunk at the given index. -static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { - const uint64_t Mask = 0xFFFF; + if (Imm16 != ChunkVal) + break; + } - if (Clear) - // Clear chunk in the immediate. - Imm &= ~(Mask << (Idx * 16)); - else - // Set all bits in the immediate for the particular chunk. - Imm |= Mask << (Idx * 16); + // Create the second MOVK instruction. + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addImm(Imm16) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); - return Imm; + transferImpOps(MI, MIB, MIB2); + MI.eraseFromParent(); + return true; } /// Check whether the constant contains a sequence of contiguous ones, @@ -264,73 +197,13 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const AArch64InstrInfo *TII) { - const int NotSet = -1; - const uint64_t Mask = 0xFFFF; - - int StartIdx = NotSet; - int EndIdx = NotSet; - // Try to find the chunks which start/end a contiguous sequence of ones. - for (int Idx = 0; Idx < 4; ++Idx) { - int64_t Chunk = getChunk(UImm, Idx); - // Sign extend the 16-bit chunk to 64-bit. - Chunk = (Chunk << 48) >> 48; - - if (isStartChunk(Chunk)) - StartIdx = Idx; - else if (isEndChunk(Chunk)) - EndIdx = Idx; - } - - // Early exit in case we can't find a start/end chunk. - if (StartIdx == NotSet || EndIdx == NotSet) + AArch64_AM::SequenceOneIdx SOI = AArch64_AM::sequenceOfOnes(UImm); + if (!SOI.hasValue()) return false; - - // Outside of the contiguous sequence of ones everything needs to be zero. - uint64_t Outside = 0; - // Chunks between the start and end chunk need to have all their bits set. - uint64_t Inside = Mask; - - // If our contiguous sequence of ones wraps around from the MSB into the LSB, - // just swap indices and pretend we are materializing a contiguous sequence - // of zeros surrounded by a contiguous sequence of ones. - if (StartIdx > EndIdx) { - std::swap(StartIdx, EndIdx); - std::swap(Outside, Inside); - } - - uint64_t OrrImm = UImm; - int FirstMovkIdx = NotSet; - int SecondMovkIdx = NotSet; - - // Find out which chunks we need to patch up to obtain a contiguous sequence - // of ones. - for (int Idx = 0; Idx < 4; ++Idx) { - const uint64_t Chunk = getChunk(UImm, Idx); - - // Check whether we are looking at a chunk which is not part of the - // contiguous sequence of ones. - if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { - OrrImm = updateImm(OrrImm, Idx, Outside == 0); - - // Remember the index we need to patch. - if (FirstMovkIdx == NotSet) - FirstMovkIdx = Idx; - else - SecondMovkIdx = Idx; - - // Check whether we are looking a chunk which is part of the contiguous - // sequence of ones. - } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { - OrrImm = updateImm(OrrImm, Idx, Inside != Mask); - - // Remember the index we need to patch. - if (FirstMovkIdx == NotSet) - FirstMovkIdx = Idx; - else - SecondMovkIdx = Idx; - } - } - assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); + const uint64_t OrrImm = std::get<0>(SOI.getValue()); + const int FirstMovkIdx = std::get<1>(SOI.getValue()); + const int SecondMovkIdx = std::get<2>(SOI.getValue()); + const int NotSet = -1; // Create the ORR-immediate instruction. uint64_t Encoding = 0; @@ -351,7 +224,7 @@ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) .addReg(DstReg) - .addImm(getChunk(UImm, FirstMovkIdx)) + .addImm(AArch64_AM::getChunk(UImm, FirstMovkIdx)) .addImm( AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16)); @@ -367,7 +240,7 @@ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg) - .addImm(getChunk(UImm, SecondMovkIdx)) + .addImm(AArch64_AM::getChunk(UImm, SecondMovkIdx)) .addImm( AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16)); @@ -459,7 +332,7 @@ .addImm(Encoding); // Create the MOVK instruction. - const unsigned Imm16 = getChunk(UImm, Shift / 16); + const unsigned Imm16 = AArch64_AM::getChunk(UImm, Shift / 16); const unsigned DstReg = MI.getOperand(0).getReg(); const bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5394,15 +5394,25 @@ IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero(); else if (VT == MVT::f16 && Subtarget->hasFullFP16()) IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero(); - // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to + // TODO: fmov h0, w0 is also legal, however we don't have an isel pattern to // generate that fmov. // If we can not materialize in immediate field for fmov, check if the // value can be encoded as the immediate operand of a logical instruction. // The immediate value will be created with either MOVZ, MOVN, or ORR. - if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) - IsLegal = AArch64_AM::isAnyMOVWMovAlias(ImmInt.getZExtValue(), - VT.getSizeInBits()); + // The cost is actually exactly the same for mov+fmov vs. adrp+ldr; however + // the mov+fmov sequence is always better because of the reduced cache + // pressure. The timings are still the same if you consider movw+movk+fmov + // vs. adrp+ldr (it's one instruction longer, but the movw+movk is fused). + // So we limit up to 2 instrdduction at most. + if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) { + // For f64 is not obviously worthwhile to emit up a five-instruction + // sequence vs. a two instruction constant-pool load. So we limit to + // at maximum of 2 moves to match and adrl+ldr cost. + int NumInst = AArch64_AM::getExpandImmCost(ImmInt.getZExtValue(), + VT.getSizeInBits()); + IsLegal = NumInst <= forCodeSize ? 1 : 2; + } LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString() << " imm value: "; Imm.dump();); Index: lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h +++ lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h @@ -16,8 +16,10 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/bit.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include #include namespace llvm { @@ -841,6 +843,245 @@ return isAnyMOVZMovAlias(Value, RegWidth); } +/// Helper function which extracts the specified 16-bit chunk from a +/// 64-bit value. +inline static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { + assert(ChunkIdx < 4 && "Out of range chunk index specified!"); + + return (Imm >> (ChunkIdx * 16)) & 0xFFFF; +} + +/// Check whether the given 16-bit chunk replicated to full 64-bit width +/// can be materialized with an ORR instruction. +inline static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { + Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; + + return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding); +} + +/// Check whether this chunk matches the pattern '1...0...'. This pattern +/// starts a contiguous sequence of ones if we look at the bits from the LSB +/// towards the MSB. +inline static bool isStartChunk(uint64_t Chunk) { + if (Chunk == 0 || Chunk == std::numeric_limits::max()) + return false; + + return isMask_64(~Chunk); +} + +/// Check whether this chunk matches the pattern '0...1...' This pattern +/// ends a contiguous sequence of ones if we look at the bits from the LSB +/// towards the MSB. +inline static bool isEndChunk(uint64_t Chunk) { + if (Chunk == 0 || Chunk == std::numeric_limits::max()) + return false; + + return isMask_64(Chunk); +} + +typedef Optional> ReplicableChunk; + +// Check for identical 16-bit chunks within the constant that can be +// materialized with a single ORR instruction plust MOVK for the remaining +// chunks. +inline static ReplicableChunk replicableWithChunks(uint64_t UImm) { + using CountMap = DenseMap; + + CountMap Counts; + + // Scan the constant and count how often every chunk occurs. + for (unsigned Idx = 0; Idx < 4; ++Idx) + ++Counts[getChunk(UImm, Idx)]; + + for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); + Chunk != End; ++Chunk) { + const uint64_t ChunkVal = Chunk->first; + const unsigned Count = Chunk->second; + + uint64_t Encoding = 0; + + // We are looking for chunks which have two or three instances and can be + // materialized with an ORR instruction. + if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) + continue; + + return std::make_tuple(Count, ChunkVal, Encoding); + } + + return ReplicableChunk(); +} + +inline static bool isReplicableWithChunks(uint64_t UImm, unsigned &Count) { + AArch64_AM::ReplicableChunk RC = AArch64_AM::replicableWithChunks(UImm); + if (!RC.hasValue()) + return false; + // In case we have three instances the whole constant is now materialized + // and we can exit. + Count = std::get<0>(RC.getValue()) == 3 ? 2 : 3; + return true; +} + + +typedef Optional> SequenceOneIdx; + +/// Clear or set all bits in the chunk at the given index. +static inline uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { + const uint64_t Mask = 0xFFFF; + + if (Clear) + // Clear chunk in the immediate. + Imm &= ~(Mask << (Idx * 16)); + else + // Set all bits in the immediate for the particular chunk. + Imm |= Mask << (Idx * 16); + + return Imm; +} + +// Check if the constant contains a sequence of contiguous ones, which might +// interrupted by or one or two chunks that can be materialized with an +// ORR instruction plus MOVK instructions. +inline static SequenceOneIdx sequenceOfOnes(uint64_t UImm) { + const int NotSet = -1; + const uint64_t Mask = 0xFFFF; + + int StartIdx = NotSet; + int EndIdx = NotSet; + + // Try to find the chunks which start/end a contiguous sequence of ones. + for (int Idx = 0; Idx < 4; ++Idx) { + int64_t Chunk = getChunk(UImm, Idx); + // Sign extend the 16-bit chunk to 64-bit. + Chunk = (Chunk << 48) >> 48; + if (isStartChunk(Chunk)) + StartIdx = Idx; + else if (isEndChunk(Chunk)) + EndIdx = Idx; + } + + // Early exit in case we can't find a start/end chunk. + if (StartIdx == NotSet || EndIdx == NotSet) + return SequenceOneIdx(); + + // Outside of the contiguous sequence of ones everything needs to be zero. + uint64_t Outside = 0; + // Chunks between the start and end chunk need to have all their bits set. + uint64_t Inside = Mask; + + // If our contiguous sequence of ones wraps around from the MSB into the LSB, + // just swap indices and pretend we are materializing a contiguous sequence + // of zeros surrounded by a contiguous sequence of ones. + if (StartIdx > EndIdx) { + std::swap(StartIdx, EndIdx); + std::swap(Outside, Inside); + } + + uint64_t OrrImm = UImm; + int FirstMovkIdx = NotSet; + int SecondMovkIdx = NotSet; + + // Find out which chunks we need to patch up to obtain a contiguous sequence + // of ones. + for (int Idx = 0; Idx < 4; ++Idx) { + const uint64_t Chunk = AArch64_AM::getChunk(UImm, Idx); + + // Check whether we are looking at a chunk which is not part of the + // contiguous sequence of ones. + if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { + OrrImm = updateImm(OrrImm, Idx, Outside == 0); + + // Remember the index we need to patch. + if (FirstMovkIdx == NotSet) + FirstMovkIdx = Idx; + else + SecondMovkIdx = Idx; + + // Check whether we are looking a chunk which is part of the contiguous + // sequence of ones. + } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { + OrrImm = updateImm(OrrImm, Idx, Inside != Mask); + + // Remember the index we need to patch. + if (FirstMovkIdx == NotSet) + FirstMovkIdx = Idx; + else + SecondMovkIdx = Idx; + } + } + assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); + + return std::make_tuple(OrrImm, FirstMovkIdx, SecondMovkIdx); +} + + +static inline bool isSequenceOfOnes(uint64_t UImm, unsigned &Count) { + SequenceOneIdx SOI = sequenceOfOnes(UImm); + if (!SOI.hasValue()) + return false; + const bool SingleMovk = std::get<2>(SOI.getValue()) == -1; + Count = SingleMovk ? 2 : 3; + return true; +} + +// Return the number of instruction requires to materialize the constant. +// It follows the strategy used on AArch64ExpandPseudo::expandMOVImm. +static inline int getExpandImmCost(uint64_t Imm, unsigned BitSize) { + const unsigned Mask = 0xFFFF; + uint64_t Encoding; + + if (Imm == 0) + return 0; + + // Single ORR. + uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); + if (processLogicalImmediate(UImm, BitSize, Encoding)) + return 1; + + // Scan the immediate and count the number of 16-bit chunks which are either + // all ones or all zeros. + unsigned OneChunks = 0; + unsigned ZeroChunks = 0; + for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { + const unsigned Chunk = (Imm >> Shift) & Mask; + if (Chunk == Mask) + OneChunks++; + else if (Chunk == 0) + ZeroChunks++; + } + + // MOVZ/MOVN followed by MOVK. + if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2) + return 2; + + // All 32-bit immediates can be expanded with a MOVZ/MOVK pair. + + // 64-bit ORR followed by MOVK. + for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { + uint64_t ShiftedMask = (0xFFFFULL << Shift); + uint64_t ZeroChunk = UImm & ~ShiftedMask; + uint64_t OneChunk = UImm | ShiftedMask; + uint64_t RotatedImm = (UImm << 32) | (UImm >> 32); + uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask); + if (processLogicalImmediate(ZeroChunk, BitSize, Encoding) || + processLogicalImmediate(OneChunk, BitSize, Encoding) || + processLogicalImmediate(ReplicateChunk, BitSize, Encoding)) { + return 2; + } + } + + // MOVZ/MOVN followed by two MOVK; + if (OneChunks || ZeroChunks) + return 3; + + unsigned Count; + if (BitSize == 64 && isReplicableWithChunks(UImm, Count)) + return Count; + if (BitSize == 64 && isSequenceOfOnes(UImm, Count)) + return Count; + + return 4; +} + } // end namespace AArch64_AM } // end namespace llvm Index: test/CodeGen/AArch64/arm64-fp-imm.ll =================================================================== --- test/CodeGen/AArch64/arm64-fp-imm.ll +++ test/CodeGen/AArch64/arm64-fp-imm.ll @@ -10,12 +10,11 @@ ret double 0x400921FB54442D18 } -; CHECK: literal4 -; CHECK: .long 1078530011 define float @bar() { ; CHECK: _bar: -; CHECK: adrp x[[REG:[0-9]+]], lCPI1_0@PAGE -; CHECK: ldr s0, [x[[REG]], lCPI1_0@PAGEOFF] +; CHECK: mov [[REG:w[0-9]+]], #4059 +; CHECK: movk [[REG]], #16457, lsl #16 +; CHECK: fmov s0, [[REG]] ; CHECK-NEXT: ret ret float 0x400921FB60000000 } Index: test/CodeGen/AArch64/fpimm.ll =================================================================== --- test/CodeGen/AArch64/fpimm.ll +++ test/CodeGen/AArch64/fpimm.ll @@ -45,6 +45,13 @@ ; TINY-DAG: mov [[X128:x[0-9]+]], #4638707616191610880 ; TINY-DAG: fmov {{d[0-9]+}}, [[X128]] +; 64-bit ORR followed by MOVK. +; CHECK-DAG: mov [[XFP0:x[0-9]+]], #1082331758844 +; CHECK-DAG: movk [[XFP0]], #64764, lsl #16 +; CHECk-DAG: fmov {{d[0-9]+}}, [[XFP0]] + %newval3 = fadd double %val, 0xFCFCFC00FC + store volatile double %newval3, double* @varf64 + ; CHECK: ret ; TINY: ret ret void @@ -54,8 +61,9 @@ ; LARGE: mov [[REG:w[0-9]+]], #4059 ; LARGE-NEXT: movk [[REG]], #16457, lsl #16 ; LARGE-NEXT: fmov s0, [[REG]] -; TINY-LABEL: check_float2 -; TINY: ldr s0, .LCPI2_0 +; TINY-LABEL: check_float2 +; TINY: mov [[REG:w[0-9]+]], #4059 +; TINY-NEXT: movk [[REG]], #16457, lsl #16 define float @check_float2() { ret float 3.14159274101257324218750 } Index: test/CodeGen/AArch64/literal_pools_float.ll =================================================================== --- test/CodeGen/AArch64/literal_pools_float.ll +++ test/CodeGen/AArch64/literal_pools_float.ll @@ -31,16 +31,19 @@ %doubleval = load double, double* @vardouble %newdouble = fadd double %doubleval, 129.0 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]] -; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] +; CHECK: mov [[W129:x[0-9]+]], #35184372088832 +; CHECK: movk [[W129]], #16480, lsl #48 +; CHECK: fmov {{d[0-9]+}}, [[W129]] ; CHECK-NOFP-NOT: ldr {{d[0-9]+}}, ; CHECK-NOFP-NOT: fadd -; CHECK-TINY: ldr [[LIT129:d[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]] +; CHECK-TINY: mov [[W129:x[0-9]+]], #35184372088832 +; CHECK-TINY: movk [[W129]], #16480, lsl #48 +; CHECK-TINY: fmov {{d[0-9]+}}, [[W129]] ; CHECK-NOFP-TINY-NOT: ldr {{d[0-9]+}}, ; CHECK-NOFP-TINY-NOT: fadd -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:.LCPI[0-9]+_[0-9]+]] +; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:vardouble]] ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g3:[[CURLIT]] Index: test/CodeGen/AArch64/win_cst_pool.ll =================================================================== --- test/CodeGen/AArch64/win_cst_pool.ll +++ test/CodeGen/AArch64/win_cst_pool.ll @@ -2,22 +2,22 @@ ; RUN: llc < %s -mtriple=aarch64-win32-gnu | FileCheck -check-prefix=MINGW %s define double @double() { - ret double 0x0000000000800001 + ret double 0x2000000000800001 } -; CHECK: .globl __real@0000000000800001 -; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800001 +; CHECK: .globl __real@2000000000800001 +; CHECK-NEXT: .section .rdata,"dr",discard,__real@2000000000800001 ; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: __real@0000000000800001: -; CHECK-NEXT: .xword 8388609 +; CHECK-NEXT: __real@2000000000800001: +; CHECK-NEXT: .xword 2305843009222082561 ; CHECK: double: -; CHECK: adrp x8, __real@0000000000800001 -; CHECK-NEXT: ldr d0, [x8, __real@0000000000800001] +; CHECK: adrp x8, __real@2000000000800001 +; CHECK-NEXT: ldr d0, [x8, __real@2000000000800001] ; CHECK-NEXT: ret ; MINGW: .section .rdata,"dr" ; MINGW-NEXT: .p2align 3 ; MINGW-NEXT: [[LABEL:\.LC.*]]: -; MINGW-NEXT: .xword 8388609 +; MINGW-NEXT: .xword 2305843009222082561 ; MINGW: double: ; MINGW: adrp x8, [[LABEL]] ; MINGW-NEXT: ldr d0, [x8, [[LABEL]]]