Index: llvm/include/llvm/CodeGen/CallingConvLower.h =================================================================== --- llvm/include/llvm/CodeGen/CallingConvLower.h +++ llvm/include/llvm/CodeGen/CallingConvLower.h @@ -43,6 +43,7 @@ AExtUpper, // The value is in the upper bits of the location and should be // extended with undefined upper bits when retrieved. BCvt, // The value is bit-converted in the location. + Trunc, // The value is truncated in the location. VExt, // The value is vector-widened in the location. // FIXME: Not implemented yet. Code that uses AExt to mean // vector-widen should be fixed to use VExt instead. Index: llvm/include/llvm/Target/TargetCallingConv.td =================================================================== --- llvm/include/llvm/Target/TargetCallingConv.td +++ llvm/include/llvm/Target/TargetCallingConv.td @@ -152,6 +152,12 @@ ValueType DestTy = destTy; } +/// CCTruncToType - If applied, this truncates the specified current value to +/// the specified type. +class CCTruncToType : CCAction { + ValueType DestTy = destTy; +} + /// CCPassIndirect - If applied, this stores the value to stack and passes the pointer /// as normal argument. class CCPassIndirect : CCAction { Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9700,6 +9700,10 @@ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } + // Analyses past this point are naive and don't expect an assertion. + if (Res.getOpcode() == ISD::AssertZext) + Res = Res.getOperand(0); + // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast(Res.getOperand(1))->getReg(); Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -139,6 +139,7 @@ setLibcallName(RTLIB::BZERO, "__bzero"); break; case Triple::aarch64: + case Triple::aarch64_32: setLibcallName(RTLIB::BZERO, "bzero"); break; default: Index: llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -154,6 +154,7 @@ break; case Triple::aarch64: case Triple::aarch64_be: + case Triple::aarch64_32: // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. Index: llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp =================================================================== --- llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -119,7 +119,8 @@ return make_error( std::string("No callback manager available for ") + T.str(), inconvertibleErrorCode()); - case Triple::aarch64: { + case Triple::aarch64: + case Triple::aarch64_32: { typedef orc::LocalJITCompileCallbackManager CCMgrT; return CCMgrT::Create(ES, ErrorHandlerAddress); } @@ -167,6 +168,7 @@ }; case Triple::aarch64: + case Triple::aarch64_32: return [](){ return llvm::make_unique< orc::LocalIndirectStubsManager>(); Index: llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp =================================================================== --- llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -94,6 +94,7 @@ inconvertibleErrorCode()); case Triple::aarch64: + case Triple::aarch64_32: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); Index: llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp =================================================================== --- llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -909,7 +909,8 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, unsigned AbiVariant) { - if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) { + if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be || + Arch == Triple::aarch64_32) { // This stub has to be able to access the full address space, // since symbol lookup won't necessarily find a handy, in-range, // PLT stub for functions which could be anywhere. Index: llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp =================================================================== --- llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -353,6 +353,7 @@ case Triple::arm: return make_unique(MemMgr, Resolver); case Triple::aarch64: + case Triple::aarch64_32: return make_unique(MemMgr, Resolver); case Triple::x86: return make_unique(MemMgr, Resolver); Index: llvm/lib/LTO/LTOCodeGenerator.cpp =================================================================== --- llvm/lib/LTO/LTOCodeGenerator.cpp +++ llvm/lib/LTO/LTOCodeGenerator.cpp @@ -358,7 +358,8 @@ MCpu = "core2"; else if (Triple.getArch() == llvm::Triple::x86) MCpu = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) MCpu = "cyclone"; } Index: llvm/lib/LTO/LTOModule.cpp =================================================================== --- llvm/lib/LTO/LTOModule.cpp +++ llvm/lib/LTO/LTOModule.cpp @@ -219,7 +219,8 @@ CPU = "core2"; else if (Triple.getArch() == llvm::Triple::x86) CPU = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) CPU = "cyclone"; } Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -489,7 +489,8 @@ TMBuilder.MCpu = "core2"; else if (TheTriple.getArch() == llvm::Triple::x86) TMBuilder.MCpu = "yonah"; - else if (TheTriple.getArch() == llvm::Triple::aarch64) + else if (TheTriple.getArch() == llvm::Triple::aarch64 || + TheTriple.getArch() == llvm::Triple::aarch64_32) TMBuilder.MCpu = "cyclone"; } TMBuilder.TheTriple = std::move(TheTriple); Index: llvm/lib/MC/MCObjectFileInfo.cpp =================================================================== --- llvm/lib/MC/MCObjectFileInfo.cpp +++ llvm/lib/MC/MCObjectFileInfo.cpp @@ -27,7 +27,7 @@ return false; // aarch64 always has it. - if (T.getArch() == Triple::aarch64) + if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32) return true; // armv7k always has it. @@ -56,7 +56,8 @@ MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT, SectionKind::getReadOnly()); - if (T.isOSDarwin() && T.getArch() == Triple::aarch64) + if (T.isOSDarwin() && + (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)) SupportsCompactUnwindWithoutEHFrame = true; if (T.isWatchABI()) @@ -192,7 +193,7 @@ if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF - else if (T.getArch() == Triple::aarch64) + else if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32) CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb) CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF Index: llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1132,4 +1132,6 @@ RegisterAsmPrinter X(getTheAArch64leTarget()); RegisterAsmPrinter Y(getTheAArch64beTarget()); RegisterAsmPrinter Z(getTheARM64Target()); + RegisterAsmPrinter W(getTheARM64_32Target()); + RegisterAsmPrinter V(getTheAArch64_32Target()); } Index: llvm/lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -372,14 +372,16 @@ return false; if (F.isVarArg()) { - if (!MF.getSubtarget().isTargetDarwin()) { - // FIXME: we need to reimplement saveVarArgsRegisters from + auto &Subtarget = MF.getSubtarget(); + if (!Subtarget.isTargetDarwin()) { + // FIXME: we need to reimplement saveVarArgsRegisters from // AArch64ISelLowering. return false; } - // We currently pass all varargs at 8-byte alignment. - uint64_t StackOffset = alignTo(Handler.StackUsed, 8); + // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. + uint64_t StackOffset = + alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8); auto &MFI = MIRBuilder.getMF().getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); Index: llvm/lib/Target/AArch64/AArch64CallingConvention.h =================================================================== --- llvm/lib/Target/AArch64/AArch64CallingConvention.h +++ llvm/lib/Target/AArch64/AArch64CallingConvention.h @@ -25,6 +25,9 @@ bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); +bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); Index: llvm/lib/Target/AArch64/AArch64CallingConvention.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64CallingConvention.cpp +++ llvm/lib/Target/AArch64/AArch64CallingConvention.cpp @@ -82,7 +82,7 @@ // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. ArrayRef RegList; - if (LocVT.SimpleTy == MVT::i64) + if (LocVT.SimpleTy == MVT::i64 || LocVT.SimpleTy == MVT::i32) RegList = XRegList; else if (LocVT.SimpleTy == MVT::f16) RegList = HRegList; @@ -108,7 +108,7 @@ return true; unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); - if (RegResult) { + if (RegResult && LocVT.SimpleTy != MVT::i32) { for (auto &It : PendingMembers) { It.convertToReg(RegResult); State.addLoc(It); @@ -116,6 +116,19 @@ } PendingMembers.clear(); return true; + } else if (RegResult) { + bool UseHigh = false; + CCValAssign::LocInfo Info; + for (auto &It : PendingMembers) { + Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; + State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult, + MVT::i64, Info)); + UseHigh = !UseHigh; + if (!UseHigh) + ++RegResult; + } + PendingMembers.clear(); + return true; } // Mark all regs in the class as unavailable Index: llvm/lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -17,6 +17,10 @@ class CCIfBigEndian : CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; +class CCIfILP32 : + CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; + + //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// @@ -95,6 +99,7 @@ CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, + CCIfConsecutiveRegs>, CCIfSwiftError>>, // Big endian vectors must be passed as if they were 1-element vectors so that @@ -186,6 +191,12 @@ CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Re-demote pointers to 32-bits so we don't end up storing 64-bit + // values and clobbering neighbouring stack locations. Not very pretty. + CCIfPtr>>, + CCIfPtr>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], CCAssignToStack<8, 8>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], @@ -213,6 +224,29 @@ CCAssignToStack<16, 16>> ]>; +// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the +// same as the normal Darwin VarArgs handling. +let Entry = 1 in +def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // Handle all scalar types as either i32 or f32. + CCIfType<[i8, i16], CCPromoteToType>, + CCIfType<[f16], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfPtr>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + + // The WebKit_JS calling convention only passes the first argument (the callee) // in register and the remaining arguments on stack. We allow 32bit stack slots, // so that WebKit can write partial values in the stack and define the other Index: llvm/lib/Target/AArch64/AArch64CollectLOH.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +++ llvm/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -103,6 +103,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -181,6 +182,7 @@ case AArch64::ADDXri: return canAddBePartOfLOH(MI); case AArch64::LDRXui: + case AArch64::LDRWui: // Check immediate to see if the immediate is an address. switch (MI.getOperand(2).getType()) { default: @@ -312,7 +314,8 @@ Info.Type = MCLOH_AdrpAdd; Info.IsCandidate = true; Info.MI0 = &MI; - } else if (MI.getOpcode() == AArch64::LDRXui && + } else if ((MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRWui) && MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { Info.Type = MCLOH_AdrpLdrGot; Info.IsCandidate = true; @@ -357,7 +360,9 @@ return true; } } else { - assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui"); + assert((MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRWui) && + "Expect LDRXui or LDRWui"); assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && "Expected GOT relocation"); if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { @@ -474,13 +479,23 @@ handleClobber(LOHInfos[Idx]); } // Handle uses. + + SmallSet UsesSeen; for (const MachineOperand &MO : MI.uses()) { if (!MO.isReg() || !MO.readsReg()) continue; int Idx = mapRegToGPRIndex(MO.getReg()); if (Idx < 0) continue; - handleUse(MI, MO, LOHInfos[Idx]); + + // Multiple uses of the same register within a single instruction don't + // count as MultiUser or block optimization. This is especially important on + // arm64_32, where any memory operation is likely to be an explicit use of + // xN and an implicit use of wN (the base address register). + if (!UsesSeen.count(Idx)) { + handleUse(MI, MO, LOHInfos[Idx]); + UsesSeen.insert(Idx); + } } } @@ -512,6 +527,7 @@ switch (Opcode) { case AArch64::ADDXri: case AArch64::LDRXui: + case AArch64::LDRWui: if (canDefBePartOfLOH(MI)) { const MachineOperand &Def = MI.getOperand(0); const MachineOperand &Op = MI.getOperand(1); Index: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -433,12 +433,26 @@ } } else { // Small codemodel expand into ADRP + LDR. + MachineFunction &MF = *MI.getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) - .add(MI.getOperand(0)) - .addReg(DstReg); + + MachineInstrBuilder MIB2; + if (MF.getSubtarget().isTargetILP32()) { + auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); + unsigned DstFlags = MI.getOperand(0).getTargetFlags(); + MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, DstFlags | RegState::Implicit); + } else { + unsigned DstReg = MI.getOperand(0).getReg(); + MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) + .add(MI.getOperand(0)) + .addUse(DstReg, RegState::Kill); + } if (MO1.isGlobal()) { MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); Index: llvm/lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -474,12 +474,32 @@ ADRPReg) .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); - ResultReg = createResultReg(&AArch64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), + unsigned LdrOpc; + if (Subtarget->isTargetILP32()) { + ResultReg = createResultReg(&AArch64::GPR32RegClass); + LdrOpc = AArch64::LDRWui; + } else { + ResultReg = createResultReg(&AArch64::GPR64RegClass); + LdrOpc = AArch64::LDRXui; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags); + .addReg(ADRPReg) + .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC | OpFlags); + if (!Subtarget->isTargetILP32()) + return ResultReg; + + // LDRWui produces a 32-bit register, but pointers in-register are 64-bits + // so we must extend the result on ILP32. + unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::SUBREG_TO_REG)) + .addDef(Result64) + .addImm(0) + .addReg(ResultReg, RegState::Kill) + .addImm(AArch64::sub_32); + return Result64; } else { // ADRP + ADDX BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), @@ -504,6 +524,15 @@ if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); + // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, + // 'null' pointers need to have a somewhat special treatment. + if (const auto *CPN = dyn_cast(C)) { + (void)CPN; + assert(CPN->getType()->getPointerAddressSpace() == 0 && + "Unexpected address space"); + assert(VT == MVT::i64 && "Expected 64-bit pointers"); + return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); + } if (const auto *CI = dyn_cast(C)) return materializeInt(CI, VT); @@ -946,6 +975,9 @@ bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(DL, Ty, true); + if (Subtarget->isTargetILP32() && Ty->isPointerTy()) + return false; + // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; @@ -988,6 +1020,9 @@ } bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { + if (Subtarget->isTargetILP32()) + return false; + unsigned ScaleFactor = getImplicitScaleFactor(VT); if (!ScaleFactor) return false; @@ -3165,6 +3200,11 @@ if (IsTailCall) return false; + // FIXME: we could and should support this, but for now correctness at -O0 is + // more important. + if (Subtarget->isTargetILP32()) + return false; + CodeModel::Model CM = TM.getCodeModel(); // Only support the small-addressing and large code models. if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) @@ -3788,6 +3828,11 @@ if (!FuncInfo.CanLowerReturn) return false; + // FIXME: in principle it could. Mostly just a case of zero extending outgoing + // pointers. + if (Subtarget->isTargetILP32()) + return false; + if (F.isVarArg()) return false; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -255,6 +255,10 @@ const SelectionDAG &DAG, unsigned Depth = 0) const override; + MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { + return MVT::getIntegerVT(64); + } + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1019,6 +1019,14 @@ Known.One &= Known2.One; break; } + case AArch64ISD::LOADgot: + case AArch64ISD::ADDlow: { + if (!Subtarget->isTargetILP32()) + break; + // In ILP32 mode all valid pointers are in the low 4GB of the address-space. + Known.Zero = APInt::getHighBitsSet(64, 32); + break; + } case ISD::INTRINSIC_W_CHAIN: { ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); @@ -3004,8 +3012,11 @@ return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; - return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; - case CallingConv::Win64: + if (!IsVarArg) + return CC_AArch64_DarwinPCS; + return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg + : CC_AArch64_DarwinPCS_VarArg; + case CallingConv::Win64: return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; case CallingConv::AArch64_VectorCall: return CC_AArch64_AAPCS; @@ -3028,6 +3039,7 @@ // Assign locations to all of the incoming arguments. SmallVector ArgLocs; + DenseMap CopiedRegs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); @@ -3084,11 +3096,10 @@ continue; } + SDValue ArgValue; if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); - - SDValue ArgValue; const TargetRegisterClass *RC; if (RegVT == MVT::i32) @@ -3124,14 +3135,13 @@ case CCValAssign::AExt: case CCValAssign::SExt: case CCValAssign::ZExt: - // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt - // nodes after our lowering. - assert(RegVT == Ins[i].VT && "incorrect register location selected"); + break; + case CCValAssign::AExtUpper: + ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue, + DAG.getConstant(32, DL, RegVT)); + ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT()); break; } - - InVals.push_back(ArgValue); - } else { // VA.isRegLoc() assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); unsigned ArgOffset = VA.getLocMemOffset(); @@ -3146,7 +3156,6 @@ // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue ArgValue; // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; @@ -3155,6 +3164,7 @@ switch (VA.getLocInfo()) { default: break; + case CCValAssign::Trunc: case CCValAssign::BCvt: MemVT = VA.getLocVT(); break; @@ -3174,8 +3184,11 @@ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), MemVT); - InVals.push_back(ArgValue); } + if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) + ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(), + ArgValue, DAG.getValueType(MVT::i32)); + InVals.push_back(ArgValue); } // varargs @@ -3192,8 +3205,8 @@ // This will point to the next argument passed via stack. unsigned StackOffset = CCInfo.getNextStackOffset(); - // We currently pass all varargs at 8-byte alignment. - StackOffset = ((StackOffset + 7) & ~7); + // We currently pass all varargs at 8-byte alignment, or 4 for ILP32 + StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); if (MFI.hasMustTailInVarArgFunc()) { @@ -3330,6 +3343,7 @@ : RetCC_AArch64_AAPCS; // Assign locations to each value returned by this call. SmallVector RVLocs; + DenseMap CopiedRegs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC); @@ -3347,10 +3361,16 @@ continue; } - SDValue Val = - DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); + // Avoid copying a physreg twice since RegAllocFast is incompetent and only + // allows one use of a physreg per block. + SDValue Val = CopiedRegs.lookup(VA.getLocReg()); + if (!Val) { + Val = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + CopiedRegs[VA.getLocReg()] = Val; + } switch (VA.getLocInfo()) { default: @@ -3360,6 +3380,15 @@ case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; + case CCValAssign::AExtUpper: + Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val, + DAG.getConstant(32, DL, VA.getLocVT())); + LLVM_FALLTHROUGH; + case CCValAssign::AExt: + LLVM_FALLTHROUGH; + case CCValAssign::ZExt: + Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT()); + break; } InVals.push_back(Val); @@ -3662,7 +3691,7 @@ SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy(DAG.getDataLayout())); - SmallVector, 8> RegsToPass; + std::map RegsToPass; SmallVector MemOpChains; auto PtrVT = getPointerTy(DAG.getDataLayout()); @@ -3670,7 +3699,7 @@ const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT); - RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val)); + RegsToPass.insert(std::make_pair(unsigned(F.PReg), Val)); } } @@ -3701,8 +3730,17 @@ } Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::AExtUpper: + assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, VA.getLocVT())); + break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + Arg = DAG.getBitcast(VA.getLocVT(), Arg); + break; + case CCValAssign::Trunc: + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); break; case CCValAssign::FPExt: Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); @@ -3718,7 +3756,11 @@ "unexpected use of 'returned'"); IsThisReturn = true; } - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + auto RegVal = RegsToPass.insert(std::make_pair(VA.getLocReg(), Arg)); + if (!RegVal.second) { + SDValue &Bits = RegVal.first->second; + Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); + } } else { assert(VA.isMemLoc()); @@ -3934,7 +3976,7 @@ // Copy the result values into the output registers. SDValue Flag; - SmallVector RetOps(1, Chain); + std::map RetVals; for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; @@ -3956,11 +3998,31 @@ case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; + case CCValAssign::AExt: + case CCValAssign::ZExt: + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); + break; + case CCValAssign::AExtUpper: + assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); + Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, VA.getLocVT())); + break; + } + + auto RetVal = RetVals.insert(std::make_pair(VA.getLocReg(), Arg)); + if (!RetVal.second) { + SDValue &Bits = RetVal.first->second; + Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); } + } - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); + SmallVector RetOps(1, Chain); + for (auto &RetVal : RetVals) { + Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag); Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + RetOps.push_back( + DAG.getRegister(RetVal.first, RetVal.second.getValueType())); } const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *I = @@ -4138,6 +4200,7 @@ SDLoc DL(Op); MVT PtrVT = getPointerTy(DAG.getDataLayout()); + MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout()); const GlobalValue *GV = cast(Op)->getGlobal(); SDValue TLVPAddr = @@ -4148,13 +4211,16 @@ // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( - MVT::i64, DL, Chain, DescAddr, + PtrMemVT, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ 8, + /* Alignment = */ PtrMemVT.getSizeInBits() / 8, MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); Chain = FuncTLVGet.getValue(1); + // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer. + FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setAdjustsStack(true); @@ -5030,6 +5096,7 @@ SDLoc DL(Op); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy(DAG.getDataLayout())); + FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout())); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV)); @@ -5136,15 +5203,15 @@ // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. SDLoc DL(Op); - unsigned VaListSize = - Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32; + unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; + unsigned VaListSize = (Subtarget->isTargetDarwin() || + Subtarget->isTargetWindows()) ? PtrSize : 32; const Value *DestSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); - return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), - Op.getOperand(2), - DAG.getConstant(VaListSize, DL, MVT::i32), - 8, false, false, false, MachinePointerInfo(DestSV), + return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize, + false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } @@ -5158,12 +5225,15 @@ SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); unsigned Align = Op.getConstantOperandVal(3); + unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8; auto PtrVT = getPointerTy(DAG.getDataLayout()); - - SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V)); + auto PtrMemVT = getPointerMemTy(DAG.getDataLayout()); + SDValue VAList = + DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V)); Chain = VAList.getValue(1); + VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT); - if (Align > 8) { + if (Align > MinSlotSize) { assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(Align - 1, DL, PtrVT)); @@ -5172,14 +5242,14 @@ } Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); - uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); + unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); // Scalar integer and FP values smaller than 64 bits are implicitly extended // up to 64 bits. At the very least, we have to increase the striding of the // vaargs list to match this, and for FP values we need to introduce // FP_ROUND nodes as well. if (VT.isInteger() && !VT.isVector()) - ArgSize = 8; + ArgSize = std::max(ArgSize, MinSlotSize); bool NeedFPTrunc = false; if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { ArgSize = 8; @@ -5189,6 +5259,8 @@ // Increment the pointer, VAList, to the next vaarg SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(ArgSize, DL, PtrVT)); + VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT); + // Store the incremented VAList to the legalized pointer SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V)); @@ -5218,10 +5290,15 @@ SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); SDValue FrameAddr = - DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT); + DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64); while (Depth--) FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); + + if (Subtarget->isTargetILP32()) + FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr, + DAG.getValueType(VT)); + return FrameAddr; } Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1469,6 +1469,8 @@ return false; MachineBasicBlock &MBB = *MI.getParent(); + auto &Subtarget = MBB.getParent()->getSubtarget(); + auto TRI = Subtarget.getRegisterInfo(); DebugLoc DL = MI.getDebugLoc(); if (MI.getOpcode() == AArch64::CATCHRET) { @@ -1504,11 +1506,22 @@ if ((OpFlags & AArch64II::MO_GOT) != 0) { BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) .addGlobalAddress(GV, 0, OpFlags); - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addImm(0) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()); + } } else if (TM.getCodeModel() == CodeModel::Large) { + assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) .addImm(0); @@ -1535,10 +1548,20 @@ BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, LoFlags) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()); + } } MBB.erase(MI); Index: llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -32,7 +32,7 @@ const AArch64TargetLowering &TLI = *STI.getTargetLowering(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -386,6 +386,8 @@ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isTargetILP32() const { return TargetTriple.isArch32Bit(); } + bool useAA() const override { return UseAA; } bool hasVH() const { return HasVH; } @@ -412,6 +414,12 @@ bool hasFMI() const { return HasFMI; } bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } + bool addrSinkUsingGEPs() const override { + // Keeping GEPs inbounds is important for exploiting AArch64 + // addressing-modes in ILP32 mode. + return useAA() || isTargetILP32(); + } + bool useSmallAddressing() const { switch (TLInfo.getTargetMachine().getCodeModel()) { case CodeModel::Kernel: Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -156,6 +156,8 @@ RegisterTargetMachine X(getTheAArch64leTarget()); RegisterTargetMachine Y(getTheAArch64beTarget()); RegisterTargetMachine Z(getTheARM64Target()); + RegisterTargetMachine W(getTheARM64_32Target()); + RegisterTargetMachine V(getTheAArch64_32Target()); auto PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); initializeAArch64A53Fix835769Pass(*PR); @@ -198,8 +200,11 @@ bool LittleEndian) { if (Options.getABIName() == "ilp32") return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; - if (TT.isOSBinFormatMachO()) + if (TT.isOSBinFormatMachO()) { + if (TT.getArch() == Triple::aarch64_32) + return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128"; return "e-m:o-i64:64-i128:128-n32:64-S128"; + } if (TT.isOSBinFormatCOFF()) return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; if (LittleEndian) @@ -276,7 +281,8 @@ } // Enable GlobalISel at or below EnableGlobalISelAt0. - if (getOptLevel() <= EnableGlobalISelAtO) { + if (getOptLevel() <= EnableGlobalISelAtO && + TT.getArch() != Triple::aarch64_32) { setGlobalISel(true); setGlobalISelAbort(GlobalISelAbortMode::Disable); } Index: llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h =================================================================== --- llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -23,7 +23,7 @@ class Triple; struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { - explicit AArch64MCAsmInfoDarwin(); + explicit AArch64MCAsmInfoDarwin(bool IsILP32); const MCExpr * getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const override; Index: llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -30,7 +30,7 @@ cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"), clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"))); -AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { +AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) { // We prefer NEON instructions to be printed in the short, Apple-specific // form when targeting Darwin. AssemblerDialect = AsmWriterVariant == Default ? Apple : AsmWriterVariant; @@ -39,7 +39,8 @@ PrivateLabelPrefix = "L"; SeparatorString = "%%"; CommentString = ";"; - CodePointerSize = CalleeSaveStackSlotSize = 8; + CalleeSaveStackSlotSize = 8; + CodePointerSize = IsILP32 ? 4 : 8; AlignmentIsInBytes = false; UsesELFSectionDirectiveForBSS = true; Index: llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp =================================================================== --- llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -73,7 +73,7 @@ const Triple &TheTriple) { MCAsmInfo *MAI; if (TheTriple.isOSBinFormatMachO()) - MAI = new AArch64MCAsmInfoDarwin(); + MAI = new AArch64MCAsmInfoDarwin(TheTriple.getArch() == Triple::aarch64_32); else if (TheTriple.isWindowsMSVCEnvironment()) MAI = new AArch64MCAsmInfoMicrosoftCOFF(); else if (TheTriple.isOSBinFormatCOFF()) Index: llvm/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/lib/Target/X86/X86FastISel.cpp +++ llvm/lib/Target/X86/X86FastISel.cpp @@ -3397,6 +3397,7 @@ case CCValAssign::SExtUpper: case CCValAssign::ZExtUpper: case CCValAssign::FPExt: + case CCValAssign::Trunc: llvm_unreachable("Unexpected loc info!"); case CCValAssign::Indirect: // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully Index: llvm/test/CodeGen/AArch64/arm64-aapcs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-aapcs.ll +++ llvm/test/CodeGen/AArch64/arm64-aapcs.ll @@ -25,7 +25,7 @@ @var64 = global i64 0, align 8 ; Check stack slots are 64-bit at all times. -define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, +define void @test_stack_slots([8 x i64], i1 %bool, i8 %char, i16 %short, i32 %int, i64 %long) { ; CHECK-LABEL: test_stack_slots: ; CHECK-DAG: ldr w[[ext1:[0-9]+]], [sp, #24] Index: llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll +++ llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll @@ -1,4 +1,5 @@ ; RUN: llc -o - %s -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64_32-apple-watchos -O3 -aarch64-enable-collect-loh | FileCheck %s ; Check that the LOH analysis does not crash when the analysed chained ; contains instructions that are filtered out. ; Index: llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll +++ llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll @@ -1,4 +1,5 @@ ; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64_32-apple-ios -O2 | FileCheck %s ; Test case for . ; AdrpAddStr cannot be used when the store uses same ; register as address and value. Indeed, the related Index: llvm/test/CodeGen/AArch64/arm64-collect-loh.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-collect-loh.ll +++ llvm/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -1,4 +1,5 @@ ; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64_32-apple-watchos -O2 | FileCheck %s ; RUN: llc -o - %s -mtriple=arm64-linux-gnu -O2 | FileCheck %s --check-prefix=CHECK-ELF ; CHECK-ELF-NOT: .loh @@ -60,9 +61,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i32 @getC() { @@ -76,9 +77,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsw x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsw x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i64 @getSExtC() { @@ -94,10 +95,10 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] -; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], [x[[LDRGOT_REG]]] ; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0 -; CHECK-NEXT: str [[ADD]], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str [[ADD]], [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] define void @getSeveralC(i32 %t) { @@ -114,9 +115,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define void @setC(i32 %t) { @@ -142,7 +143,7 @@ ; CHECK-NEXT: ret ; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] define i32 @getInternalCPlus4() { - %addr = getelementptr i32, i32* @InternalC, i32 4 + %addr = getelementptr inbounds i32, i32* @InternalC, i32 4 %res = load i32, i32* %addr, align 4 ret i32 %res } @@ -159,7 +160,7 @@ ; CHECK-NEXT: ret ; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] define i64 @getSExtInternalCPlus4() { - %addr = getelementptr i32, i32* @InternalC, i32 4 + %addr = getelementptr inbounds i32, i32* @InternalC, i32 4 %res = load i32, i32* %addr, align 4 %sextres = sext i32 %res to i64 ret i64 %sextres @@ -180,7 +181,7 @@ ; CHECK: .loh AdrpAdd [[ADRP_LABEL]], [[ADDGOT_LABEL]] define void @getSeveralInternalCPlus4(i32 %t) { entry: - %addr = getelementptr i32, i32* @InternalC, i32 4 + %addr = getelementptr inbounds i32, i32* @InternalC, i32 4 %tmp = load i32, i32* %addr, align 4 %add = add nsw i32 %tmp, %t store i32 %add, i32* %addr, align 4 @@ -200,7 +201,7 @@ ; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] define void @setInternalCPlus4(i32 %t) { entry: - %addr = getelementptr i32, i32* @InternalC, i32 4 + %addr = getelementptr inbounds i32, i32* @InternalC, i32 4 store i32 %t, i32* %addr, align 4 ret void } @@ -276,8 +277,8 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] -; CHECK-NEXT: ldrb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldrb w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] define i8 @getD() { @@ -289,9 +290,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: strb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: strb w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setD(i8 %t) { @@ -305,9 +306,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsb w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i32 @getSExtD() { @@ -322,9 +323,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsb x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsb x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i64 @getSExt64D() { @@ -341,8 +342,8 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] -; CHECK-NEXT: ldrh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldrh w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] define i16 @getE() { @@ -356,9 +357,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsh w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i32 @getSExtE() { @@ -371,9 +372,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: strh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: strh w0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setE(i16 %t) { @@ -387,9 +388,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldrsh x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldrsh x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i64 @getSExt64E() { @@ -406,9 +407,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define i64 @getF() { @@ -420,9 +421,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str x0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setF(i64 %t) { @@ -438,9 +439,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr s0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr s0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define float @getG() { @@ -452,9 +453,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str s0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str s0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setG(float %t) { @@ -470,9 +471,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr h0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr h0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define half @getH() { @@ -484,9 +485,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str h0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str h0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setH(half %t) { @@ -502,9 +503,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr d0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define double @getI() { @@ -516,9 +517,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str d0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setI(double %t) { @@ -534,9 +535,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr d0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define <2 x i32> @getJ() { @@ -548,9 +549,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str d0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setJ(<2 x i32> %t) { @@ -566,9 +567,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr q0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr q0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define <4 x i32> @getK() { @@ -580,9 +581,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] ; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: str q0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: str q0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] define void @setK(<4 x i32> %t) { @@ -598,9 +599,9 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] ; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr b0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ldr b0, [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] define <1 x i8> @getL() { @@ -612,11 +613,11 @@ ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: -; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] +; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] ; CHECK-NEXT: ; kill ; Ultimately we should generate str b0, but right now, we match the vector ; variant which does not allow to fold the immediate into the store. -; CHECK-NEXT: st1.b { v0 }[0], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: st1.b { v0 }[0], [x[[LDRGOT_REG]]] ; CHECK-NEXT: ret ; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] define void @setL(<1 x i8> %t) { Index: llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-redzone | FileCheck %s +; RUN: llc < %s -mtriple=arm64_32-apple-ios -aarch64-redzone | FileCheck %s define i64* @store64(i64* %ptr, i64 %index, i64 %spacing) { ; CHECK-LABEL: store64: Index: llvm/test/CodeGen/AArch64/arm64-stacksave.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-stacksave.ll +++ llvm/test/CodeGen/AArch64/arm64-stacksave.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -verify-coalescing +; RUN: llc -mtriple=arm64-apple-macosx10.8.0 < %s -verify-coalescing +; RUN: llc -mtriple=arm64_32-apple-ios9.0 < %s -verify-coalescing ; -target triple = "arm64-apple-macosx10.8.0" ; Verify that we can handle spilling the stack pointer without attempting ; spilling it directly. Index: llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -18,15 +18,14 @@ define void @t1() nounwind ssp { entry: ; ALL-LABEL: t1: -; ALL-NOT: fmov ; NONEFP: ldr h0,{{.*}} -; NONEFP: fmov s1, wzr -; NONEFP: fmov d2, xzr -; NONEFP: movi{{(.16b)?}} v3{{(.2d)?}}, #0 -; NONE16: fmov h0, wzr -; NONE16: fmov s1, wzr -; NONE16: fmov d2, xzr -; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0 +; NONEFP-DAG: fmov s1, wzr +; NONEFP-DAG: fmov d2, xzr +; NONEFP-DAG: movi{{(.16b)?}} v3{{(.2d)?}}, #0 +; NONE16-DAG: fmov h0, wzr +; NONE16-DAG: fmov s1, wzr +; NONE16-DAG: fmov d2, xzr +; NONE16-DAG: movi{{(.16b)?}} v3{{(.2d)?}}, #0 ; ZEROFP: ldr h0,{{.*}} ; ZEROFP: movi v{{[0-3]+}}.2d, #0 ; ZEROFP: movi v{{[0-3]+}}.2d, #0 Index: llvm/test/CodeGen/AArch64/arm64_32-addrs.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-addrs.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s + +; If %base < 96 then the sum will not wrap (in an unsigned sense), but "ldr w0, +; [x0, #-96]" would. +define i32 @test_valid_wrap(i32 %base) { +; CHECK-LABEL: test_valid_wrap: +; CHECK: sub w[[ADDR:[0-9]+]], w0, #96 +; CHECK: ldr w0, [x[[ADDR]]] + + %newaddr = add nuw i32 %base, -96 + %ptr = inttoptr i32 %newaddr to i32* + %val = load i32, i32* %ptr + ret i32 %val +} + +define i8 @test_valid_wrap_optimizable(i8* %base) { +; CHECK-LABEL: test_valid_wrap_optimizable: +; CHECK: ldurb w0, [x0, #-96] + + %newaddr = getelementptr inbounds i8, i8* %base, i32 -96 + %val = load i8, i8* %newaddr + ret i8 %val +} + +define i8 @test_valid_wrap_optimizable1(i8* %base, i32 %offset) { +; CHECK-LABEL: test_valid_wrap_optimizable1: +; CHECK: ldrb w0, [x0, w1, sxtw] + + %newaddr = getelementptr inbounds i8, i8* %base, i32 %offset + %val = load i8, i8* %newaddr + ret i8 %val +} + +; +define i8 @test_valid_wrap_optimizable2(i8* %base, i32 %offset) { +; CHECK-LABEL: test_valid_wrap_optimizable2: +; CHECK: sxtw x[[OFFSET:[0-9]+]], w1 +; CHECK: mov w[[BASE:[0-9]+]], #-100 +; CHECK: ldrb w0, [x[[OFFSET]], x[[BASE]]] + + %newaddr = getelementptr inbounds i8, i8* inttoptr(i32 -100 to i8*), i32 %offset + %val = load i8, i8* %newaddr + ret i8 %val +} Index: llvm/test/CodeGen/AArch64/arm64_32-atomics.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-atomics.ll @@ -0,0 +1,261 @@ +; RUN: llc -mtriple=arm64_32-apple-ios7.0 -o - %s | FileCheck %s + +define i8 @test_load_8(i8* %addr) { +; CHECK-LABAL: test_load_8: +; CHECK: ldarb w0, [x0] + %val = load atomic i8, i8* %addr seq_cst, align 1 + ret i8 %val +} + +define i16 @test_load_16(i16* %addr) { +; CHECK-LABAL: test_load_16: +; CHECK: ldarh w0, [x0] + %val = load atomic i16, i16* %addr acquire, align 2 + ret i16 %val +} + +define i32 @test_load_32(i32* %addr) { +; CHECK-LABAL: test_load_32: +; CHECK: ldar w0, [x0] + %val = load atomic i32, i32* %addr seq_cst, align 4 + ret i32 %val +} + +define i64 @test_load_64(i64* %addr) { +; CHECK-LABAL: test_load_64: +; CHECK: ldar x0, [x0] + %val = load atomic i64, i64* %addr seq_cst, align 8 + ret i64 %val +} + +define i8* @test_load_ptr(i8** %addr) { +; CHECK-LABAL: test_load_ptr: +; CHECK: ldar w0, [x0] + %val = load atomic i8*, i8** %addr seq_cst, align 8 + ret i8* %val +} + +define void @test_store_8(i8* %addr) { +; CHECK-LABAL: test_store_8: +; CHECK: stlrb wzr, [x0] + store atomic i8 0, i8* %addr seq_cst, align 1 + ret void +} + +define void @test_store_16(i16* %addr) { +; CHECK-LABAL: test_store_16: +; CHECK: stlrh wzr, [x0] + store atomic i16 0, i16* %addr seq_cst, align 2 + ret void +} + +define void @test_store_32(i32* %addr) { +; CHECK-LABAL: test_store_32: +; CHECK: stlr wzr, [x0] + store atomic i32 0, i32* %addr seq_cst, align 4 + ret void +} + +define void @test_store_64(i64* %addr) { +; CHECK-LABAL: test_store_64: +; CHECK: stlr xzr, [x0] + store atomic i64 0, i64* %addr seq_cst, align 8 + ret void +} + +define void @test_store_ptr(i8** %addr) { +; CHECK-LABAL: test_store_ptr: +; CHECK: stlr wzr, [x0] + store atomic i8* null, i8** %addr seq_cst, align 8 + ret void +} + +declare i64 @llvm.aarch64.ldxr.p0i8(i8* %addr) +declare i64 @llvm.aarch64.ldxr.p0i16(i16* %addr) +declare i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) +declare i64 @llvm.aarch64.ldxr.p0i64(i64* %addr) + +define i8 @test_ldxr_8(i8* %addr) { +; CHECK-LABEL: test_ldxr_8: +; CHECK: ldxrb w0, [x0] + + %val = call i64 @llvm.aarch64.ldxr.p0i8(i8* %addr) + %val8 = trunc i64 %val to i8 + ret i8 %val8 +} + +define i16 @test_ldxr_16(i16* %addr) { +; CHECK-LABEL: test_ldxr_16: +; CHECK: ldxrh w0, [x0] + + %val = call i64 @llvm.aarch64.ldxr.p0i16(i16* %addr) + %val16 = trunc i64 %val to i16 + ret i16 %val16 +} + +define i32 @test_ldxr_32(i32* %addr) { +; CHECK-LABEL: test_ldxr_32: +; CHECK: ldxr w0, [x0] + + %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) + %val32 = trunc i64 %val to i32 + ret i32 %val32 +} + +define i64 @test_ldxr_64(i64* %addr) { +; CHECK-LABEL: test_ldxr_64: +; CHECK: ldxr x0, [x0] + + %val = call i64 @llvm.aarch64.ldxr.p0i64(i64* %addr) + ret i64 %val +} + +declare i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr) +declare i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr) +declare i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr) +declare i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr) + +define i8 @test_ldaxr_8(i8* %addr) { +; CHECK-LABEL: test_ldaxr_8: +; CHECK: ldaxrb w0, [x0] + + %val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr) + %val8 = trunc i64 %val to i8 + ret i8 %val8 +} + +define i16 @test_ldaxr_16(i16* %addr) { +; CHECK-LABEL: test_ldaxr_16: +; CHECK: ldaxrh w0, [x0] + + %val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr) + %val16 = trunc i64 %val to i16 + ret i16 %val16 +} + +define i32 @test_ldaxr_32(i32* %addr) { +; CHECK-LABEL: test_ldaxr_32: +; CHECK: ldaxr w0, [x0] + + %val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr) + %val32 = trunc i64 %val to i32 + ret i32 %val32 +} + +define i64 @test_ldaxr_64(i64* %addr) { +; CHECK-LABEL: test_ldaxr_64: +; CHECK: ldaxr x0, [x0] + + %val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr) + ret i64 %val +} + +declare i32 @llvm.aarch64.stxr.p0i8(i64, i8*) +declare i32 @llvm.aarch64.stxr.p0i16(i64, i16*) +declare i32 @llvm.aarch64.stxr.p0i32(i64, i32*) +declare i32 @llvm.aarch64.stxr.p0i64(i64, i64*) + +define i32 @test_stxr_8(i8* %addr, i8 %val) { +; CHECK-LABEL: test_stxr_8: +; CHECK: stxrb [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i8 %val to i64 + %success = call i32 @llvm.aarch64.stxr.p0i8(i64 %extval, i8* %addr) + ret i32 %success +} + +define i32 @test_stxr_16(i16* %addr, i16 %val) { +; CHECK-LABEL: test_stxr_16: +; CHECK: stxrh [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i16 %val to i64 + %success = call i32 @llvm.aarch64.stxr.p0i16(i64 %extval, i16* %addr) + ret i32 %success +} + +define i32 @test_stxr_32(i32* %addr, i32 %val) { +; CHECK-LABEL: test_stxr_32: +; CHECK: stxr [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i32 %val to i64 + %success = call i32 @llvm.aarch64.stxr.p0i32(i64 %extval, i32* %addr) + ret i32 %success +} + +define i32 @test_stxr_64(i64* %addr, i64 %val) { +; CHECK-LABEL: test_stxr_64: +; CHECK: stxr [[TMP:w[0-9]+]], x1, [x0] +; CHECK: mov w0, [[TMP]] + + %success = call i32 @llvm.aarch64.stxr.p0i64(i64 %val, i64* %addr) + ret i32 %success +} + +declare i32 @llvm.aarch64.stlxr.p0i8(i64, i8*) +declare i32 @llvm.aarch64.stlxr.p0i16(i64, i16*) +declare i32 @llvm.aarch64.stlxr.p0i32(i64, i32*) +declare i32 @llvm.aarch64.stlxr.p0i64(i64, i64*) + +define i32 @test_stlxr_8(i8* %addr, i8 %val) { +; CHECK-LABEL: test_stlxr_8: +; CHECK: stlxrb [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i8 %val to i64 + %success = call i32 @llvm.aarch64.stlxr.p0i8(i64 %extval, i8* %addr) + ret i32 %success +} + +define i32 @test_stlxr_16(i16* %addr, i16 %val) { +; CHECK-LABEL: test_stlxr_16: +; CHECK: stlxrh [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i16 %val to i64 + %success = call i32 @llvm.aarch64.stlxr.p0i16(i64 %extval, i16* %addr) + ret i32 %success +} + +define i32 @test_stlxr_32(i32* %addr, i32 %val) { +; CHECK-LABEL: test_stlxr_32: +; CHECK: stlxr [[TMP:w[0-9]+]], w1, [x0] +; CHECK: mov w0, [[TMP]] + + %extval = zext i32 %val to i64 + %success = call i32 @llvm.aarch64.stlxr.p0i32(i64 %extval, i32* %addr) + ret i32 %success +} + +define i32 @test_stlxr_64(i64* %addr, i64 %val) { +; CHECK-LABEL: test_stlxr_64: +; CHECK: stlxr [[TMP:w[0-9]+]], x1, [x0] +; CHECK: mov w0, [[TMP]] + + %success = call i32 @llvm.aarch64.stlxr.p0i64(i64 %val, i64* %addr) + ret i32 %success +} + +define {i8*, i1} @test_cmpxchg_ptr(i8** %addr, i8* %cmp, i8* %new) { +; CHECK-LABEL: test_cmpxchg_ptr: +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[OLD:w[0-9]+]], [x0] +; CHECK: cmp [[OLD]], w1 +; CHECK: b.ne [[DONE:LBB[0-9]+_[0-9]+]] +; CHECK: stlxr [[SUCCESS:w[0-9]+]], w2, [x0] +; CHECK: cbnz [[SUCCESS]], [[LOOP]] + +; CHECK: mov w1, #1 +; CHECK: mov w0, [[OLD]] +; CHECK: ret + +; CHECK: [[DONE]]: +; CHECK: clrex +; CHECK: mov w1, wzr +; CHECK: mov w0, [[OLD]] +; CHECK: ret + %res = cmpxchg i8** %addr, i8* %cmp, i8* %new acq_rel acquire + ret {i8*, i1} %res +} Index: llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=arm64_32-apple-ios -O0 -fast-isel %s -o - | FileCheck %s +@var = global i8* null + +define void @test_store_release_ptr() { +; CHECK-LABEL: test_store_release_ptr +; CHECK: mov [[ZERO:w[0-9]+]], wzr +; CHECK: stlr [[ZERO]] + store atomic i8* null, i8** @var release, align 4 + br label %next + +next: + ret void +} + +declare [2 x i32] @callee() + +define void @test_struct_return(i32* %addr) { +; CHECK-LABEL: test_struct_return: +; CHECK: bl _callee +; CHECK-DAG: lsr [[HI:x[0-9]+]], x0, #32 +; CHECK-DAG: mov [[LO:w[0-9]+]], w0 + %res = call [2 x i32] @callee() + %res.0 = extractvalue [2 x i32] %res, 0 + store i32 %res.0, i32* %addr + %res.1 = extractvalue [2 x i32] %res, 1 + store i32 %res.1, i32* %addr + ret void +} Index: llvm/test/CodeGen/AArch64/arm64_32-frame-pointers.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-frame-pointers.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=arm64_32-apple-ios8.0 %s -o - | FileCheck %s + +; We're provoking LocalStackSlotAllocation to create some shared frame bases +; here: it wants multiple using instructions that can be satisfied by a +; single base, but not within the addressing-mode. +; +; When that happens it's important that we don't mix our pointer sizes +; (e.g. try to create an ldr from a w-register base). +define i8 @test_register_wrangling() { +; CHECK-LABEL: test_register_wrangling: +; CHECK: add [[TMP:x[0-9]+]], sp, +; CHECK: add x[[BASE:[0-9]+]], [[TMP]], +; CHECK: ldrb {{w[0-9]+}}, [x[[BASE]], #1] +; CHECK: ldrb {{w[0-9]+}}, [x[[BASE]]] + + %var1 = alloca i8, i32 4100 + %var3 = alloca i8 + %dummy = alloca i8, i32 4100 + + %var1p1 = getelementptr i8, i8* %var1, i32 1 + %val1 = load i8, i8* %var1 + %val2 = load i8, i8* %var3 + + %sum = add i8 %val1, %val2 + ret i8 %sum +} Index: llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll @@ -0,0 +1,61 @@ +; RUN: opt -codegenprepare -mtriple=arm64_32-apple-ios %s -S -o - | FileCheck %s + +define void @test_simple_sink(i1* %base, i64 %offset) { +; CHECK-LABEL: @test_simple_sink +; CHECK: next: +; CHECK: [[BASE8:%.*]] = bitcast i1* %base to i8* +; CHECK: [[ADDR8:%.*]] = getelementptr i8, i8* [[BASE8]], i64 %offset +; CHECK: [[ADDR:%.*]] = bitcast i8* [[ADDR8]] to i1* +; CHECK: load volatile i1, i1* [[ADDR]] + %addr = getelementptr i1, i1* %base, i64 %offset + %tst = load i1, i1* %addr + br i1 %tst, label %next, label %end + +next: + load volatile i1, i1* %addr + ret void + +end: + ret void +} + +define void @test_inbounds_sink(i1* %base, i64 %offset) { +; CHECK-LABEL: @test_inbounds_sink +; CHECK: next: +; CHECK: [[BASE8:%.*]] = bitcast i1* %base to i8* +; CHECK: [[ADDR8:%.*]] = getelementptr inbounds i8, i8* [[BASE8]], i64 %offset +; CHECK: [[ADDR:%.*]] = bitcast i8* [[ADDR8]] to i1* +; CHECK: load volatile i1, i1* [[ADDR]] + %addr = getelementptr inbounds i1, i1* %base, i64 %offset + %tst = load i1, i1* %addr + br i1 %tst, label %next, label %end + +next: + load volatile i1, i1* %addr + ret void + +end: + ret void +} + +; No address derived via an add can be guaranteed inbounds +define void @test_add_sink(i1* %base, i64 %offset) { +; CHECK-LABEL: @test_add_sink +; CHECK: next: +; CHECK: [[BASE8:%.*]] = bitcast i1* %base to i8* +; CHECK: [[ADDR8:%.*]] = getelementptr i8, i8* [[BASE8]], i64 %offset +; CHECK: [[ADDR:%.*]] = bitcast i8* [[ADDR8]] to i1* +; CHECK: load volatile i1, i1* [[ADDR]] + %base64 = ptrtoint i1* %base to i64 + %addr64 = add nsw nuw i64 %base64, %offset + %addr = inttoptr i64 %addr64 to i1* + %tst = load i1, i1* %addr + br i1 %tst, label %next, label %end + +next: + load volatile i1, i1* %addr + ret void + +end: + ret void +} Index: llvm/test/CodeGen/AArch64/arm64_32-memcpy.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-memcpy.ll @@ -0,0 +1,66 @@ +; RUN: llc -mtriple=arm64_32-apple-ios9.0 -o - %s | FileCheck %s + +define i64 @test_memcpy(i64* %addr, i8* %src, i1 %tst) minsize { +; CHECK-LABEL: test_memcpy: +; CHECK: ldr [[VAL64:x[0-9]+]], [x0] +; [...] +; CHECK: and x0, [[VAL64]], #0xffffffff +; CHECK: bl _memcpy + + %val64 = load i64, i64* %addr + br i1 %tst, label %true, label %false + +true: + ret i64 %val64 + +false: + %val32 = trunc i64 %val64 to i32 + %val.ptr = inttoptr i32 %val32 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %val.ptr, i8* %src, i32 128, i32 0, i1 1) + ret i64 undef +} + +define i64 @test_memmove(i64* %addr, i8* %src, i1 %tst) minsize { +; CHECK-LABEL: test_memmove: +; CHECK: ldr [[VAL64:x[0-9]+]], [x0] +; [...] +; CHECK: and x0, [[VAL64]], #0xffffffff +; CHECK: bl _memmove + + %val64 = load i64, i64* %addr + br i1 %tst, label %true, label %false + +true: + ret i64 %val64 + +false: + %val32 = trunc i64 %val64 to i32 + %val.ptr = inttoptr i32 %val32 to i8* + call void @llvm.memmove.p0i8.p0i8.i32(i8* %val.ptr, i8* %src, i32 128, i32 0, i1 1) + ret i64 undef +} + +define i64 @test_memset(i64* %addr, i8* %src, i1 %tst) minsize { +; CHECK-LABEL: test_memset: +; CHECK: ldr [[VAL64:x[0-9]+]], [x0] +; [...] +; CHECK: and x0, [[VAL64]], #0xffffffff +; CHECK: bl _memset + + %val64 = load i64, i64* %addr + br i1 %tst, label %true, label %false + +true: + ret i64 %val64 + +false: + %val32 = trunc i64 %val64 to i32 + %val.ptr = inttoptr i32 %val32 to i8* + call void @llvm.memset.p0i8.i32(i8* %val.ptr, i8 42, i32 256, i32 0, i1 1) + ret i64 undef +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) + Index: llvm/test/CodeGen/AArch64/arm64_32-neon.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-neon.ll @@ -0,0 +1,198 @@ +; RUN: llc -mtriple=arm64_32-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s + +define <2 x double> @test_insert_elt(<2 x double> %vec, double %val) { +; CHECK-LABEL: test_insert_elt: +; CHECK: mov.d v0[0], v1[0] + %res = insertelement <2 x double> %vec, double %val, i32 0 + ret <2 x double> %res +} + +define void @test_split_16B(<4 x float> %val, <4 x float>* %addr) { +; CHECK-LABEL: test_split_16B: +; CHECK: str q0, [x0] + store <4 x float> %val, <4 x float>* %addr, align 8 + ret void +} + +define void @test_split_16B_splat(<4 x i32>, <4 x i32>* %addr) { +; CHECK-LABEL: test_split_16B_splat: +; CHECK: str {{q[0-9]+}} + + %vec.tmp0 = insertelement <4 x i32> undef, i32 42, i32 0 + %vec.tmp1 = insertelement <4 x i32> %vec.tmp0, i32 42, i32 1 + %vec.tmp2 = insertelement <4 x i32> %vec.tmp1, i32 42, i32 2 + %vec = insertelement <4 x i32> %vec.tmp2, i32 42, i32 3 + + store <4 x i32> %vec, <4 x i32>* %addr, align 8 + ret void +} + + +%vec = type <2 x double> + +declare {%vec, %vec} @llvm.aarch64.neon.ld2r.v2f64.p0i8(i8*) +define {%vec, %vec} @test_neon_load(i8* %addr) { +; CHECK-LABEL: test_neon_load: +; CHECK: ld2r.2d { v0, v1 }, [x0] + %res = call {%vec, %vec} @llvm.aarch64.neon.ld2r.v2f64.p0i8(i8* %addr) + ret {%vec, %vec} %res +} + +declare {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec, %vec, i64, i8*) +define {%vec, %vec} @test_neon_load_lane(i8* %addr, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_load_lane: +; CHECK: ld2.d { v0, v1 }[0], [x0] + %res = call {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 0, i8* %addr) + ret {%vec, %vec} %res +} + +declare void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec, %vec, i8*) +define void @test_neon_store(i8* %addr, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_store: +; CHECK: st2.2d { v0, v1 }, [x0] + call void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec %in1, %vec %in2, i8* %addr) + ret void +} + +declare void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec, %vec, i64, i8*) +define void @test_neon_store_lane(i8* %addr, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_store_lane: +; CHECK: st2.d { v0, v1 }[1], [x0] + call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 1, i8* %addr) + ret void +} + +declare {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8*) +define {{%vec, %vec}, i8*} @test_neon_load_post(i8* %addr, i32 %offset) { +; CHECK-LABEL: test_neon_load_post: +; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 +; CHECK: ld2.2d { v0, v1 }, [x0], [[OFFSET]] + + %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset + + %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 + %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 + ret {{%vec, %vec}, i8*} %res +} + +define {{%vec, %vec}, i8*} @test_neon_load_post_lane(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_load_post_lane: +; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 +; CHECK: ld2.d { v0, v1 }[1], [x0], [[OFFSET]] + + %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 1, i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset + + %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 + %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 + ret {{%vec, %vec}, i8*} %res +} + +define i8* @test_neon_store_post(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_store_post: +; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 +; CHECK: st2.2d { v0, v1 }, [x0], [[OFFSET]] + + call void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec %in1, %vec %in2, i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset + + ret i8* %addr.new +} + +define i8* @test_neon_store_post_lane(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { +; CHECK-LABEL: test_neon_store_post_lane: +; CHECK: sxtw [[OFFSET:x[0-9]+]], w1 +; CHECK: st2.d { v0, v1 }[0], [x0], [[OFFSET]] + + call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 0, i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset + + ret i8* %addr.new +} + +; ld1 is slightly different because it goes via ISelLowering of normal IR ops +; rather than an intrinsic. +define {%vec, double*} @test_neon_ld1_post_lane(double* %addr, i32 %offset, %vec %in) { +; CHECK-LABEL: test_neon_ld1_post_lane: +; CHECK: sbfiz [[OFFSET:x[0-9]+]], x1, #3, #32 +; CHECK: ld1.d { v0 }[0], [x0], [[OFFSET]] + + %loaded = load double, double* %addr, align 8 + %newvec = insertelement %vec %in, double %loaded, i32 0 + + %addr.new = getelementptr inbounds double, double* %addr, i32 %offset + + %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 + %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 + + ret {%vec, double*} %res +} + +define {{%vec, %vec}, i8*} @test_neon_load_post_exact(i8* %addr) { +; CHECK-LABEL: test_neon_load_post_exact: +; CHECK: ld2.2d { v0, v1 }, [x0], #32 + + %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8* %addr) + + %addr.new = getelementptr inbounds i8, i8* %addr, i32 32 + + %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 + %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 + ret {{%vec, %vec}, i8*} %res +} + +define {%vec, double*} @test_neon_ld1_post_lane_exact(double* %addr, %vec %in) { +; CHECK-LABEL: test_neon_ld1_post_lane_exact: +; CHECK: ld1.d { v0 }[0], [x0], #8 + + %loaded = load double, double* %addr, align 8 + %newvec = insertelement %vec %in, double %loaded, i32 0 + + %addr.new = getelementptr inbounds double, double* %addr, i32 1 + + %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 + %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 + + ret {%vec, double*} %res +} + +; As in the general load/store case, this GEP has defined semantics when the +; address wraps. We cannot use post-indexed addressing. +define {%vec, double*} @test_neon_ld1_notpost_lane_exact(double* %addr, %vec %in) { +; CHECK-LABEL: test_neon_ld1_notpost_lane_exact: +; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], #8 +; CHECK: add w0, w0, #8 +; CHECK: ret + + %loaded = load double, double* %addr, align 8 + %newvec = insertelement %vec %in, double %loaded, i32 0 + + %addr.new = getelementptr double, double* %addr, i32 1 + + %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 + %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 + + ret {%vec, double*} %res +} + +define {%vec, double*} @test_neon_ld1_notpost_lane(double* %addr, i32 %offset, %vec %in) { +; CHECK-LABEL: test_neon_ld1_notpost_lane: +; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], {{x[0-9]+|sp}} +; CHECK: add w0, w0, w1, lsl #3 +; CHECK: ret + + %loaded = load double, double* %addr, align 8 + %newvec = insertelement %vec %in, double %loaded, i32 0 + + %addr.new = getelementptr double, double* %addr, i32 %offset + + %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 + %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 + + ret {%vec, double*} %res +} Index: llvm/test/CodeGen/AArch64/arm64_32-null.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-null.ll @@ -0,0 +1,29 @@ +; RUN: llc -fast-isel=true -global-isel=false -O0 -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s +; RUN: llc -fast-isel=false -global-isel=false -O0 -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s + +define void @test_store(i8** %p) { +; CHECK-LABEL: test_store: +; CHECK: mov [[R1:w[0-9]+]], wzr +; CHECK: str [[R1]], [x0] + + store i8* null, i8** %p + ret void +} + +define void @test_phi(i8** %p) { +; CHECK-LABEL: test_phi: +; CHECK: mov [[R1:x[0-9]+]], xzr +; CHECK: str [[R1]], [sp] +; CHECK: b [[BB:LBB[0-9_]+]] +; CHECK: [[BB]]: +; CHECK: ldr x0, [sp] +; CHECK: mov [[R2:w[0-9]+]], w0 +; CHECK: str [[R2]], [x{{.*}}] + +bb0: + br label %bb1 +bb1: + %tmp0 = phi i8* [ null, %bb0 ] + store i8* %tmp0, i8** %p + ret void +} Index: llvm/test/CodeGen/AArch64/arm64_32-pointer-extend.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-pointer-extend.ll @@ -0,0 +1,49 @@ +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - | FileCheck %s + +define void @pass_pointer(i64 %in) { +; CHECK-LABEL: pass_pointer: +; CHECK: and x0, x0, #0xffffffff +; CHECK: bl _take_pointer + + %in32 = trunc i64 %in to i32 + %ptr = inttoptr i32 %in32 to i8* + call i64 @take_pointer(i8* %ptr) + ret void +} + +define i64 @take_pointer(i8* %ptr) nounwind { +; CHECK-LABEL: take_pointer: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: ret + + %val = ptrtoint i8* %ptr to i32 + %res = zext i32 %val to i64 + ret i64 %res +} + +define i32 @callee_ptr_stack_slot([8 x i64], i8*, i32 %val) { +; CHECK-LABEL: callee_ptr_stack_slot: +; CHECK: ldr w0, [sp, #4] + + ret i32 %val +} + +define void @caller_ptr_stack_slot(i8* %ptr) { +; CHECK-LABEL: caller_ptr_stack_slot: +; CHECK-DAG: mov [[VAL:w[0-9]]], #42 +; CHECK: stp w0, [[VAL]], [sp] + + call i32 @callee_ptr_stack_slot([8 x i64] undef, i8* %ptr, i32 42) + ret void +} + +define i8* @return_ptr(i64 %in, i64 %r) { +; CHECK-LABEL: return_ptr: +; CHECK: sdiv [[VAL64:x[0-9]+]], x0, x1 +; CHECK: and x0, [[VAL64]], #0xffffffff + + %sum = sdiv i64 %in, %r + %sum32 = trunc i64 %sum to i32 + %res = inttoptr i32 %sum32 to i8* + ret i8* %res +} Index: llvm/test/CodeGen/AArch64/arm64_32-stack-pointers.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-stack-pointers.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=arm64_32-apple-ios9.0 -o - %s | FileCheck %s + +declare void @callee([8 x i64], i8*, i8*) + +; Make sure we don't accidentally store X0 or XZR, which might well +; clobber other arguments or data. +define void @test_stack_ptr_32bits(i8* %in) { +; CHECK-LABEL: test_stack_ptr_32bits: +; CHECK-DAG: stp wzr, w0, [sp] + + call void @callee([8 x i64] undef, i8* null, i8* %in) + ret void +} Index: llvm/test/CodeGen/AArch64/arm64_32-tls.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-tls.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s + +define i32 @test_thread_local() { +; CHECK-LABEL: test_thread_local: +; CHECK: adrp x[[TMP:[0-9]+]], _var@TLVPPAGE +; CHECK: ldr w0, [x[[TMP]], _var@TLVPPAGEOFF] +; CHECK: ldr w[[DEST:[0-9]+]], [x0] +; CHECK: blr x[[DEST]] + + %val = load i32, i32* @var + ret i32 %val +} + +@var = thread_local global i32 zeroinitializer + +; CHECK: .tbss _var$tlv$init, 4, 2 + +; CHECK-LABEL: __DATA,__thread_vars +; CHECK: _var: +; CHECK: .long __tlv_bootstrap +; CHECK: .long 0 +; CHECK: .long _var$tlv$init Index: llvm/test/CodeGen/AArch64/arm64_32-va.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32-va.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s + +define void @test_va_copy(i8* %dst, i8* %src) { +; CHECK-LABEL: test_va_copy: +; CHECK: ldr [[PTR:w[0-9]+]], [x1] +; CHECK: str [[PTR]], [x0] + + call void @llvm.va_copy(i8* %dst, i8* %src) + ret void +} + +define void @test_va_start(i32, ...) { +; CHECK-LABEL: test_va_start +; CHECK: add x[[LIST:[0-9]+]], sp, #16 +; CHECK: str w[[LIST]], + %slot = alloca i8*, align 4 + %list = bitcast i8** %slot to i8* + call void @llvm.va_start(i8* %list) + ret void +} + +define void @test_va_start_odd([8 x i64], i32, ...) { +; CHECK-LABEL: test_va_start_odd: +; CHECK: add x[[LIST:[0-9]+]], sp, #20 +; CHECK: str w[[LIST]], + %slot = alloca i8*, align 4 + %list = bitcast i8** %slot to i8* + call void @llvm.va_start(i8* %list) + ret void +} + +define i8* @test_va_arg(i8** %list) { +; CHECK-LABEL: test_va_arg: +; CHECK: ldr w[[LOC:[0-9]+]], [x0] +; CHECK: add [[NEXTLOC:w[0-9]+]], w[[LOC]], #4 +; CHECK: str [[NEXTLOC]], [x0] +; CHECK: ldr w0, [x[[LOC]]] + %res = va_arg i8** %list, i8* + ret i8* %res +} + +define i8* @really_test_va_arg(i8** %list, i1 %tst) { +; CHECK-LABEL: really_test_va_arg: +; CHECK: ldr w[[LOC:[0-9]+]], [x0] +; CHECK: add [[NEXTLOC:w[0-9]+]], w[[LOC]], #4 +; CHECK: str [[NEXTLOC]], [x0] +; CHECK: ldr w[[VAARG:[0-9]+]], [x[[LOC]]] +; CHECK: csel x0, x[[VAARG]], xzr + %tmp = va_arg i8** %list, i8* + %res = select i1 %tst, i8* %tmp, i8* null + ret i8* %res +} + +declare void @llvm.va_start(i8*) + +declare void @llvm.va_copy(i8*, i8*) Index: llvm/test/CodeGen/AArch64/arm64_32.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64_32.ll @@ -0,0 +1,715 @@ +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -filetype=obj -o - -disable-post-ra -frame-pointer=all | \ +; RUN: llvm-objdump -private-headers - | \ +; RUN: FileCheck %s --check-prefix=CHECK-MACHO +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-OPT +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -fast-isel -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST + +; CHECK-MACHO: Mach header +; CHECK-MACHO: MH_MAGIC ARM64_32 V8 + +@var64 = global i64 zeroinitializer, align 8 +@var32 = global i32 zeroinitializer, align 4 + +@var_got = external global i8 + +define i32* @test_global_addr() { +; CHECK-LABEL: test_global_addr: +; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE +; CHECK: add x0, [[PAGE]], _var32@PAGEOFF + ret i32* @var32 +} + +; ADRP is necessarily 64-bit. The important point to check is that, however that +; gets truncated to 32-bits, it's free. No need to zero out higher bits of that +; register. +define i64 @test_global_addr_extension() { +; CHECK-LABEL: test_global_addr_extension: +; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE +; CHECK: add x0, [[PAGE]], _var32@PAGEOFF +; CHECK-NOT: and +; CHECK: ret + + ret i64 ptrtoint(i32* @var32 to i64) +} + +define i32 @test_global_value() { +; CHECK-LABEL: test_global_value: +; CHECK: adrp x[[PAGE:[0-9]+]], _var32@PAGE +; CHECK: ldr w0, [x[[PAGE]], _var32@PAGEOFF] + %val = load i32, i32* @var32, align 4 + ret i32 %val +} + +; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here. +define i32 @test_unsafe_indexed_add() { +; CHECK-LABEL: test_unsafe_indexed_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_32 = add i32 %addr_int, 32 + %addr = inttoptr i32 %addr_plus_32 to i32* + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +; Since we've promised there is no unsigned overflow, @var32 must be at least +; 32-bytes below 2^32, and we can use the load this time. +define i32 @test_safe_indexed_add() { +; CHECK-LABEL: test_safe_indexed_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i64 + %addr_plus_32 = add nuw i64 %addr_int, 32 + %addr = inttoptr i64 %addr_plus_32 to i32* + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +define i32 @test_safe_indexed_or(i32 %in) { +; CHECK-LABEL: test_safe_indexed_or: +; CHECK: and [[TMP:w[0-9]+]], {{w[0-9]+}}, #0xfffffff0 +; CHECK: orr w[[ADDR:[0-9]+]], [[TMP]], #0x4 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = and i32 %in, -16 + %addr_plus_4 = or i32 %addr_int, 4 + %addr = inttoptr i32 %addr_plus_4 to i32* + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + + +; Promising nsw is not sufficient because the addressing mode basically +; calculates "zext(base) + zext(offset)" and nsw only guarantees +; "sext(base) + sext(offset) == base + offset". +define i32 @test_unsafe_nsw_indexed_add() { +; CHECK-LABEL: test_unsafe_nsw_indexed_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 +; CHECK-NOT: ubfx +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_32 = add nsw i32 %addr_int, 32 + %addr = inttoptr i32 %addr_plus_32 to i32* + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here. +define i32 @test_unsafe_unscaled_add() { +; CHECK-LABEL: test_unsafe_unscaled_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_3 = add i32 %addr_int, 3 + %addr = inttoptr i32 %addr_plus_3 to i32* + %val = load i32, i32* %addr, align 1 + ret i32 %val +} + +; Since we've promised there is no unsigned overflow, @var32 must be at least +; 32-bytes below 2^32, and we can use the load this time. +define i32 @test_safe_unscaled_add() { +; CHECK-LABEL: test_safe_unscaled_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_3 = add nuw i32 %addr_int, 3 + %addr = inttoptr i32 %addr_plus_3 to i32* + %val = load i32, i32* %addr, align 1 + ret i32 %val +} + +; Promising nsw is not sufficient because the addressing mode basically +; calculates "zext(base) + zext(offset)" and nsw only guarantees +; "sext(base) + sext(offset) == base + offset". +define i32 @test_unsafe_nsw_unscaled_add() { +; CHECK-LABEL: test_unsafe_nsw_unscaled_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 +; CHECK-NOT: ubfx +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_plus_3 = add nsw i32 %addr_int, 3 + %addr = inttoptr i32 %addr_plus_3 to i32* + %val = load i32, i32* %addr, align 1 + ret i32 %val +} + +; Because the addition may wrap, it is not safe to use "ldur w0, [xN, #-3]" +; here. +define i32 @test_unsafe_negative_unscaled_add() { +; CHECK-LABEL: test_unsafe_negative_unscaled_add: +; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF +; CHECK: sub w[[ADDR:[0-9]+]], w[[VAR32]], #3 +; CHECK: ldr w0, [x[[ADDR]]] + %addr_int = ptrtoint i32* @var32 to i32 + %addr_minus_3 = add i32 %addr_int, -3 + %addr = inttoptr i32 %addr_minus_3 to i32* + %val = load i32, i32* %addr, align 1 + ret i32 %val +} + +define i8* @test_got_addr() { +; CHECK-LABEL: test_got_addr: +; CHECK: adrp x[[PAGE:[0-9]+]], _var_got@GOTPAGE +; CHECK: ldr w0, [x[[PAGE]], _var_got@GOTPAGEOFF] + ret i8* @var_got +} + +define float @test_va_arg_f32(i8** %list) { +; CHECK-LABEL: test_va_arg_f32: + +; CHECK: ldr w[[START:[0-9]+]], [x0] +; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #8 +; CHECK: str [[AFTER]], [x0] + + ; Floating point arguments get promoted to double as per C99. +; CHECK: ldr [[DBL:d[0-9]+]], [x[[START]]] +; CHECK: fcvt s0, [[DBL]] + %res = va_arg i8** %list, float + ret float %res +} + +; Interesting point is that the slot is 4 bytes. +define i8 @test_va_arg_i8(i8** %list) { +; CHECK-LABEL: test_va_arg_i8: + +; CHECK: ldr w[[START:[0-9]+]], [x0] +; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #4 +; CHECK: str [[AFTER]], [x0] + + ; i8 gets promoted to int (again, as per C99). +; CHECK: ldr w0, [x[[START]]] + + %res = va_arg i8** %list, i8 + ret i8 %res +} + +; Interesting point is that the slot needs aligning (again, min size is 4 +; bytes). +define i64 @test_va_arg_i64(i64** %list) { +; CHECK-LABEL: test_va_arg_i64: + + ; Update the list for the next user (minimum slot size is 4, but the actual + ; argument is 8 which had better be reflected!) +; CHECK: ldr w[[UNALIGNED_START:[0-9]+]], [x0] +; CHECK: add [[ALIGN_TMP:x[0-9]+]], x[[UNALIGNED_START]], #7 +; CHECK: and x[[START:[0-9]+]], [[ALIGN_TMP]], #0x1fffffff8 +; CHECK: add w[[AFTER:[0-9]+]], w[[START]], #8 +; CHECK: str w[[AFTER]], [x0] + +; CHECK: ldr x0, [x[[START]]] + + %res = va_arg i64** %list, i64 + ret i64 %res +} + +declare void @bar(...) +define void @test_va_call(i8 %l, i8 %r, float %in, i8* %ptr) { +; CHECK-LABEL: test_va_call: +; CHECK: add [[SUM:w[0-9]+]], {{w[0-9]+}}, w1 + +; CHECK-DAG: str w2, [sp, #32] +; CHECK-DAG: str xzr, [sp, #24] +; CHECK-DAG: str s0, [sp, #16] +; CHECK-DAG: str xzr, [sp, #8] +; CHECK-DAG: str [[SUM]], [sp] + + ; Add them to ensure real promotion occurs. + %sum = add i8 %l, %r + call void(...) @bar(i8 %sum, i64 0, float %in, double 0.0, i8* %ptr) + ret void +} + +declare i8* @llvm.frameaddress(i32) + +define i8* @test_frameaddr() { +; CHECK-LABEL: test_frameaddr: +; CHECK: ldr {{w0|x0}}, [x29] + %val = call i8* @llvm.frameaddress(i32 1) + ret i8* %val +} + +declare i8* @llvm.returnaddress(i32) + +define i8* @test_toplevel_returnaddr() { +; CHECK-LABEL: test_toplevel_returnaddr: +; CHECK: mov x0, x30 + %val = call i8* @llvm.returnaddress(i32 0) + ret i8* %val +} + +define i8* @test_deep_returnaddr() { +; CHECK-LABEL: test_deep_returnaddr: +; CHECK: ldr x[[FRAME_REC:[0-9]+]], [x29] +; CHECK: ldr x0, [x[[FRAME_REC]], #8] + %val = call i8* @llvm.returnaddress(i32 1) + ret i8* %val +} + +define void @test_indirect_call(void()* %func) { +; CHECK-LABEL: test_indirect_call: +; CHECK: blr x0 + call void() %func() + ret void +} + +; Safe to use the unextended address here +define void @test_indirect_safe_call(i32* %weird_funcs) { +; CHECK-LABEL: test_indirect_safe_call: +; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 +; CHECK-OPT-NOT: ubfx +; CHECK: blr x[[ADDR32]] + %addr = getelementptr i32, i32* %weird_funcs, i32 1 + %func = bitcast i32* %addr to void()* + call void() %func() + ret void +} + +declare void @simple() +define void @test_simple_tail_call() { +; CHECK-LABEL: test_simple_tail_call: +; CHECK: b _simple + tail call void @simple() + ret void +} + +define void @test_indirect_tail_call(void()* %func) { +; CHECK-LABEL: test_indirect_tail_call: +; CHECK: br x0 + tail call void() %func() + ret void +} + +; Safe to use the unextended address here +define void @test_indirect_safe_tail_call(i32* %weird_funcs) { +; CHECK-LABEL: test_indirect_safe_tail_call: +; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 +; CHECK-OPT-NOT: ubfx +; CHECK-OPT: br x[[ADDR32]] + %addr = getelementptr i32, i32* %weird_funcs, i32 1 + %func = bitcast i32* %addr to void()* + tail call void() %func() + ret void +} + +; For the "armv7k" slice, Clang will be emitting some small structs as [N x +; i32]. For ABI compatibility with arm64_32 these need to be passed in *X* +; registers (e.g. [2 x i32] would be packed into a single register). + +define i32 @test_in_smallstruct_low([3 x i32] %in) { +; CHECK-LABEL: test_in_smallstruct_low: +; CHECK: mov x0, x1 + %val = extractvalue [3 x i32] %in, 2 + ret i32 %val +} + +define i32 @test_in_smallstruct_high([3 x i32] %in) { +; CHECK-LABEL: test_in_smallstruct_high: +; CHECK: lsr x0, x0, #32 + %val = extractvalue [3 x i32] %in, 1 + ret i32 %val +} + +; The 64-bit DarwinPCS ABI has the quirk that structs on the stack are always +; 64-bit aligned. This must not happen for arm64_32 since othwerwise va_arg will +; be incompatible with the armv7k ABI. +define i32 @test_in_smallstruct_stack([8 x i64], i32, [3 x i32] %in) { +; CHECK-LABEL: test_in_smallstruct_stack: +; CHECK: ldr w0, [sp, #4] + %val = extractvalue [3 x i32] %in, 0 + ret i32 %val +} + +define [2 x i32] @test_ret_smallstruct([3 x i32] %in) { +; CHECK-LABEL: test_ret_smallstruct: +; CHECK: mov x0, #1 +; CHECK: movk x0, #2, lsl #32 + + ret [2 x i32] [i32 1, i32 2] +} + +declare void @smallstruct_callee([4 x i32]) +define void @test_call_smallstruct() { +; CHECK-LABEL: test_call_smallstruct: +; CHECK: mov x0, #1 +; CHECK: movk x0, #2, lsl #32 +; CHECK: mov x1, #3 +; CHECK: movk x1, #4, lsl #32 +; CHECK: bl _smallstruct_callee + + call void @smallstruct_callee([4 x i32] [i32 1, i32 2, i32 3, i32 4]) + ret void +} + +declare void @smallstruct_callee_stack([8 x i64], i32, [2 x i32]) +define void @test_call_smallstruct_stack() { +; CHECK-LABEL: test_call_smallstruct_stack: +; CHECK: mov [[VAL:x[0-9]+]], #1 +; CHECK: movk [[VAL]], #2, lsl #32 +; CHECK: stur [[VAL]], [sp, #4] + + call void @smallstruct_callee_stack([8 x i64] undef, i32 undef, [2 x i32] [i32 1, i32 2]) + ret void +} + +declare [3 x i32] @returns_smallstruct() +define i32 @test_use_smallstruct_low() { +; CHECK-LABEL: test_use_smallstruct_low: +; CHECK: bl _returns_smallstruct +; CHECK: mov x0, x1 + + %struct = call [3 x i32] @returns_smallstruct() + %val = extractvalue [3 x i32] %struct, 2 + ret i32 %val +} + +define i32 @test_use_smallstruct_high() { +; CHECK-LABEL: test_use_smallstruct_high: +; CHECK: bl _returns_smallstruct +; CHECK: lsr x0, x0, #32 + + %struct = call [3 x i32] @returns_smallstruct() + %val = extractvalue [3 x i32] %struct, 1 + ret i32 %val +} + +; If a small struct can't be allocated to x0-x7, the remaining registers should +; be marked as unavailable and subsequent GPR arguments should also be on the +; stack. Obviously the struct itself should be passed entirely on the stack. +define i32 @test_smallstruct_padding([7 x i64], [4 x i32] %struct, i32 %in) { +; CHECK-LABEL: test_smallstruct_padding: +; CHECK-DAG: ldr [[IN:w[0-9]+]], [sp, #16] +; CHECK-DAG: ldr [[LHS:w[0-9]+]], [sp] +; CHECK: add w0, [[LHS]], [[IN]] + %lhs = extractvalue [4 x i32] %struct, 0 + %sum = add i32 %lhs, %in + ret i32 %sum +} + +declare void @take_small_smallstruct(i64, [1 x i32]) +define void @test_small_smallstruct() { +; CHECK-LABEL: test_small_smallstruct: +; CHECK-DAG: mov w0, #1 +; CHECK-DAG: mov w1, #2 +; CHECK: bl _take_small_smallstruct + call void @take_small_smallstruct(i64 1, [1 x i32] [i32 2]) + ret void +} + +define void @test_bare_frameaddr(i8** %addr) { +; CHECK-LABEL: test_bare_frameaddr: +; CHECK: add x[[LOCAL:[0-9]+]], sp, #{{[0-9]+}} +; CHECK: str w[[LOCAL]], + + %ptr = alloca i8 + store i8* %ptr, i8** %addr, align 4 + ret void +} + +define void @test_sret_use([8 x i64]* sret %out) { +; CHECK-LABEL: test_sret_use: +; CHECK: str xzr, [x8] + %addr = getelementptr [8 x i64], [8 x i64]* %out, i32 0, i32 0 + store i64 0, i64* %addr + ret void +} + +define i64 @test_sret_call() { +; CHECK-LABEL: test_sret_call: +; CHECK: mov x8, sp +; CHECK: bl _test_sret_use + %arr = alloca [8 x i64] + call void @test_sret_use([8 x i64]* sret %arr) + + %addr = getelementptr [8 x i64], [8 x i64]* %arr, i32 0, i32 0 + %val = load i64, i64* %addr + ret i64 %val +} + +define double @test_constpool() { +; CHECK-LABEL: test_constpool: +; CHECK: adrp x[[PAGE:[0-9]+]], [[POOL:lCPI[0-9]+_[0-9]+]]@PAGE +; CHECK: ldr d0, [x[[PAGE]], [[POOL]]@PAGEOFF] + ret double 1.0e-6 +} + +define i8* @test_blockaddress() { +; CHECK-LABEL: test_blockaddress: +; CHECK: [[BLOCK:Ltmp[0-9]+]]: +; CHECK: adrp [[PAGE:x[0-9]+]], [[BLOCK]]@PAGE +; CHECK: add x0, [[PAGE]], [[BLOCK]]@PAGEOFF + br label %dest +dest: + ret i8* blockaddress(@test_blockaddress, %dest) +} + +define i8* @test_indirectbr(i8* %dest) { +; CHECK-LABEL: test_indirectbr: +; CHECK: br x0 + indirectbr i8* %dest, [label %true, label %false] + +true: + ret i8* blockaddress(@test_indirectbr, %true) +false: + ret i8* blockaddress(@test_indirectbr, %false) +} + +; ISelDAGToDAG tries to fold an offset FI load (in this case var+4) into the +; actual load instruction. This needs to be done slightly carefully since we +; claim the FI in the process -- it doesn't need extending. +define float @test_frameindex_offset_load() { +; CHECK-LABEL: test_frameindex_offset_load: +; CHECK: ldr s0, [sp, #4] + %arr = alloca float, i32 4, align 8 + %addr = getelementptr inbounds float, float* %arr, i32 1 + + %val = load float, float* %addr, align 4 + ret float %val +} + +define void @test_unaligned_frameindex_offset_store() { +; CHECK-LABEL: test_unaligned_frameindex_offset_store: +; CHECK: mov x[[TMP:[0-9]+]], sp +; CHECK: orr w[[ADDR:[0-9]+]], w[[TMP]], #0x2 +; CHECK: mov [[VAL:w[0-9]+]], #42 +; CHECK: str [[VAL]], [x[[ADDR]]] + %arr = alloca [4 x i32] + + %addr.int = ptrtoint [4 x i32]* %arr to i32 + %addr.nextint = add nuw i32 %addr.int, 2 + %addr.next = inttoptr i32 %addr.nextint to i32* + store i32 42, i32* %addr.next + ret void +} + + +define {i64, i64*} @test_pre_idx(i64* %addr) { +; CHECK-LABEL: test_pre_idx: + +; CHECK: add w[[ADDR:[0-9]+]], w0, #8 +; CHECK: ldr x0, [x[[ADDR]]] + %addr.int = ptrtoint i64* %addr to i32 + %addr.next.int = add nuw i32 %addr.int, 8 + %addr.next = inttoptr i32 %addr.next.int to i64* + %val = load i64, i64* %addr.next + + %tmp = insertvalue {i64, i64*} undef, i64 %val, 0 + %res = insertvalue {i64, i64*} %tmp, i64* %addr.next, 1 + + ret {i64, i64*} %res +} + +; Forming a post-indexed load is invalid here since the GEP needs to work when +; %addr wraps round to 0. +define {i64, i64*} @test_invalid_pre_idx(i64* %addr) { +; CHECK-LABEL: test_invalid_pre_idx: +; CHECK: add w1, w0, #8 +; CHECK: ldr x0, [x1] + %addr.next = getelementptr i64, i64* %addr, i32 1 + %val = load i64, i64* %addr.next + + %tmp = insertvalue {i64, i64*} undef, i64 %val, 0 + %res = insertvalue {i64, i64*} %tmp, i64* %addr.next, 1 + + ret {i64, i64*} %res +} + +declare void @callee([8 x i32]*) +define void @test_stack_guard() ssp { +; CHECK-LABEL: test_stack_guard: +; CHECK: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE +; CHECK: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF] +; CHECK: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] +; CHECK: stur [[GUARD_VAL]], [x29, #[[GUARD_OFFSET:-[0-9]+]]] + +; CHECK: add x0, sp, #{{[0-9]+}} +; CHECK: bl _callee + +; CHECK-OPT: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE +; CHECK-OPT: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF] +; CHECK-OPT: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] +; CHECK-OPT: ldur [[NEW_VAL:w[0-9]+]], [x29, #[[GUARD_OFFSET]]] +; CHECK-OPT: cmp [[GUARD_VAL]], [[NEW_VAL]] +; CHECK-OPT: b.ne [[FAIL:LBB[0-9]+_[0-9]+]] + +; CHECK-OPT: [[FAIL]]: +; CHECK-OPT-NEXT: bl ___stack_chk_fail + %arr = alloca [8 x i32] + call void @callee([8 x i32]* %arr) + ret void +} + +declare i32 @__gxx_personality_v0(...) +declare void @eat_landingpad_args(i32, i8*, i32) +@_ZTI8Whatever = external global i8 +define void @test_landingpad_marshalling() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: test_landingpad_marshalling: +; CHECK: mov w2, w1 +; CHECK: mov x1, x0 +; CHECK: bl _eat_landingpad_args + invoke void @callee([8 x i32]* undef) to label %done unwind label %lpad + +lpad: ; preds = %entry + %exc = landingpad { i8*, i32 } + catch i8* @_ZTI8Whatever + %pointer = extractvalue { i8*, i32 } %exc, 0 + %selector = extractvalue { i8*, i32 } %exc, 1 + call void @eat_landingpad_args(i32 undef, i8* %pointer, i32 %selector) + ret void + +done: + ret void +} + +define void @test_dynamic_stackalloc() { +; CHECK-LABEL: test_dynamic_stackalloc: +; CHECK: sub [[REG:x[0-9]+]], sp, #32 +; CHECK: mov sp, [[REG]] +; CHECK-OPT-NOT: ubfx +; CHECK: bl _callee + br label %next + +next: + %val = alloca [8 x i32] + call void @callee([8 x i32]* %val) + ret void +} + +define void @test_asm_memory(i32* %base.addr) { +; CHECK-LABEL: test_asm_memory: +; CHECK: add w[[ADDR:[0-9]+]], w0, #4 +; CHECK: str wzr, [x[[ADDR]] + %addr = getelementptr i32, i32* %base.addr, i32 1 + call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) + ret void +} + +define void @test_unsafe_asm_memory(i64 %val) { +; CHECK-LABEL: test_unsafe_asm_memory: +; CHECK: and x[[ADDR:[0-9]+]], x0, #0xffffffff +; CHECK: str wzr, [x[[ADDR]]] + %addr_int = trunc i64 %val to i32 + %addr = inttoptr i32 %addr_int to i32* + call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) + ret void +} + +define [9 x i8*] @test_demoted_return(i8* %in) { +; CHECK-LABEL: test_demoted_return: +; CHECK: str w0, [x8, #32] + %res = insertvalue [9 x i8*] undef, i8* %in, 8 + ret [9 x i8*] %res +} + +define i8* @test_inttoptr(i64 %in) { +; CHECK-LABEL: test_inttoptr: +; CHECK: and x0, x0, #0xffffffff + %res = inttoptr i64 %in to i8* + ret i8* %res +} + +declare i32 @llvm.get.dynamic.area.offset.i32() +define i32 @test_dynamic_area() { +; CHECK-LABEL: test_dynamic_area: +; CHECK: mov w0, wzr + %res = call i32 @llvm.get.dynamic.area.offset.i32() + ret i32 %res +} + +define void @test_pointer_vec_store(<2 x i8*>* %addr) { +; CHECK-LABEL: test_pointer_vec_store: +; CHECK: str xzr, [x0] +; CHECK-NOT: str +; CHECK-NOT: stp + + store <2 x i8*> zeroinitializer, <2 x i8*>* %addr, align 16 + ret void +} + +define <2 x i8*> @test_pointer_vec_load(<2 x i8*>* %addr) { +; CHECK-LABEL: test_pointer_vec_load: +; CHECK: ldr d[[TMP:[0-9]+]], [x0] +; CHECK: ushll.2d v0, v[[TMP]], #0 + %val = load <2 x i8*>, <2 x i8*>* %addr, align 16 + ret <2 x i8*> %val +} + +define void @test_inline_asm_mem_pointer(i32* %in) { +; CHECK-LABEL: test_inline_asm_mem_pointer: +; CHECK: str w0, + tail call void asm sideeffect "ldr x0, $0", "rm"(i32* %in) + ret void +} + + +define void @test_struct_hi(i32 %hi) nounwind { +; CHECK-LABEL: test_struct_hi: +; CHECK: mov w[[IN:[0-9]+]], w0 +; CHECK: bl _get_int +; CHECK-NEXT: bfi x0, x[[IN]], #32, #32 +; CHECK-NEXT: bl _take_pair + %val.64 = call i64 @get_int() + %val.32 = trunc i64 %val.64 to i32 + + %pair.0 = insertvalue [2 x i32] undef, i32 %val.32, 0 + %pair.1 = insertvalue [2 x i32] %pair.0, i32 %hi, 1 + call void @take_pair([2 x i32] %pair.1) + + ret void +} +declare void @take_pair([2 x i32]) +declare i64 @get_int() + +define i1 @test_icmp_ptr(i8* %in) { +; CHECK-LABEL: test_icmp_ptr +; CHECK: ubfx x0, x0, #31, #1 + %res = icmp slt i8* %in, null + ret i1 %res +} + +define void @test_multiple_icmp_ptr(i8* %l, i8* %r) { +; CHECK-LABEL: test_multiple_icmp_ptr: +; CHECK: tbnz w0, #31, [[FALSEBB:LBB[0-9]+_[0-9]+]] +; CHECK: tbnz w1, #31, [[FALSEBB]] + %tst1 = icmp sgt i8* %l, inttoptr (i32 -1 to i8*) + %tst2 = icmp sgt i8* %r, inttoptr (i32 -1 to i8*) + %tst = and i1 %tst1, %tst2 + br i1 %tst, label %true, label %false + +true: + call void(...) @bar() + ret void + +false: + ret void +} + +define { [18 x i8] }* @test_gep_nonpow2({ [18 x i8] }* %a0, i32 %a1) { +; CHECK-LABEL: test_gep_nonpow2: +; CHECK: mov w[[SIZE:[0-9]+]], #18 +; CHECK-NEXT: smaddl x0, w1, w[[SIZE]], x0 +; CHECK-NEXT: ret + %tmp0 = getelementptr inbounds { [18 x i8] }, { [18 x i8] }* %a0, i32 %a1 + ret { [18 x i8] }* %tmp0 +} + +define void @test_bzero(i64 %in) { +; CHECK-LABEL: test_bzero: +; CHECK-DAG: lsr x1, x0, #32 +; CHECK-DAG: and x0, x0, #0xffffffff +; CHECK: bl _bzero + + %ptr.i32 = trunc i64 %in to i32 + %size.64 = lshr i64 %in, 32 + %size = trunc i64 %size.64 to i32 + %ptr = inttoptr i32 %ptr.i32 to i8* + tail call void @llvm.memset.p0i8.i32(i8* align 4 %ptr, i8 0, i32 %size, i1 false) + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) Index: llvm/test/CodeGen/AArch64/fastcc-reserved.ll =================================================================== --- llvm/test/CodeGen/AArch64/fastcc-reserved.ll +++ llvm/test/CodeGen/AArch64/fastcc-reserved.ll @@ -4,7 +4,7 @@ ; call-frame is not reserved (hence disable-fp-elim), but where ; callee-pop can occur (hence tailcallopt). -declare fastcc void @will_pop([8 x i32], i32 %val) +declare fastcc void @will_pop([8 x i64], i32 %val) define fastcc void @foo(i32 %in) { ; CHECK-LABEL: foo: @@ -18,7 +18,7 @@ ; Reserve space for call-frame: ; CHECK: str w{{[0-9]+}}, [sp, #-16]! - call fastcc void @will_pop([8 x i32] undef, i32 42) + call fastcc void @will_pop([8 x i64] undef, i32 42) ; CHECK: bl will_pop ; Since @will_pop is fastcc with tailcallopt, it will put the stack @@ -31,7 +31,7 @@ ret void } -declare void @wont_pop([8 x i32], i32 %val) +declare void @wont_pop([8 x i64], i32 %val) define void @foo1(i32 %in) { ; CHECK-LABEL: foo1: @@ -44,7 +44,7 @@ ; Reserve space for call-frame ; CHECK: str w{{[0-9]+}}, [sp, #-16]! - call void @wont_pop([8 x i32] undef, i32 42) + call void @wont_pop([8 x i64] undef, i32 42) ; CHECK: bl wont_pop ; This time we *do* need to unreserve the call-frame Index: llvm/test/CodeGen/AArch64/fastcc.ll =================================================================== --- llvm/test/CodeGen/AArch64/fastcc.ll +++ llvm/test/CodeGen/AArch64/fastcc.ll @@ -18,7 +18,7 @@ ; CHECK-TAIL: str w{{[0-9]+}}, [sp] - call fastcc void @func_stack8([8 x i32] undef, i32 42) + call fastcc void @func_stack8([8 x i64] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, ; CHECK-NOT: [sp, #{{[-0-9]+}}]! @@ -28,7 +28,7 @@ ; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) + call fastcc void @func_stack32([8 x i64] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, @@ -56,7 +56,7 @@ ; CHECK-TAIL-NEXT: ret } -define fastcc void @func_stack8([8 x i32], i32 %stacked) { +define fastcc void @func_stack8([8 x i64], i32 %stacked) { ; CHECK-LABEL: func_stack8: ; CHECK: sub sp, sp, #48 ; CHECK: stp x29, x30, [sp, #32] @@ -71,7 +71,7 @@ ; CHECK-TAIL: str w{{[0-9]+}}, [sp] - call fastcc void @func_stack8([8 x i32] undef, i32 42) + call fastcc void @func_stack8([8 x i64] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, ; CHECK-NOT: [sp, #{{[-0-9]+}}]! @@ -82,7 +82,7 @@ ; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) + call fastcc void @func_stack32([8 x i64] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, @@ -109,7 +109,7 @@ ; CHECK-TAIL-NEXT: ret } -define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) { +define fastcc void @func_stack32([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32: ; CHECK: add x29, sp, #32 @@ -117,7 +117,7 @@ ; CHECK-TAIL: add x29, sp, #32 - call fastcc void @func_stack8([8 x i32] undef, i32 42) + call fastcc void @func_stack8([8 x i64] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, ; CHECK-NOT: [sp, #{{[-0-9]+}}]! @@ -127,7 +127,7 @@ ; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! - call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) + call fastcc void @func_stack32([8 x i64] undef, i128 0, i128 9) ; CHECK: bl func_stack32 ; CHECK-NOT: sub sp, sp, @@ -155,7 +155,7 @@ } ; Check that arg stack pop is done after callee-save restore when no frame pointer is used. -define fastcc void @func_stack32_leaf([8 x i32], i128 %stacked0, i128 %stacked1) { +define fastcc void @func_stack32_leaf([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32_leaf: ; CHECK: str x20, [sp, #-16]! ; CHECK: nop @@ -186,7 +186,7 @@ } ; Check that arg stack pop is done after callee-save restore when no frame pointer is used. -define fastcc void @func_stack32_leaf_local([8 x i32], i128 %stacked0, i128 %stacked1) { +define fastcc void @func_stack32_leaf_local([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32_leaf_local: ; CHECK: sub sp, sp, #32 ; CHECK-NEXT: str x20, [sp, #16] @@ -222,7 +222,7 @@ } ; Check that arg stack pop is done after callee-save restore when no frame pointer is used. -define fastcc void @func_stack32_leaf_local_nocs([8 x i32], i128 %stacked0, i128 %stacked1) { +define fastcc void @func_stack32_leaf_local_nocs([8 x i64], i128 %stacked0, i128 %stacked1) { ; CHECK-LABEL: func_stack32_leaf_local_nocs: ; CHECK: sub sp, sp, #16 ; CHECK: add sp, sp, #16 Index: llvm/test/CodeGen/AArch64/jump-table-32.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/jump-table-32.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64_32-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s + +define i32 @test_jumptable(i32 %in) { +; CHECK: test_jumptable + + switch i32 %in, label %def [ + i32 0, label %lbl1 + i32 1, label %lbl2 + i32 2, label %lbl3 + i32 4, label %lbl4 + ] +; CHECK: adrp [[JTPAGE:x[0-9]+]], LJTI0_0@PAGE +; CHECK: mov w[[INDEX:[0-9]+]], w0 +; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], LJTI0_0@PAGEOFF +; CHECK: adr [[BASE_BLOCK:x[0-9]+]], LBB0_2 +; CHECK: ldrb w[[OFFSET:[0-9]+]], [x[[JT]], x[[INDEX]]] +; CHECK: add [[DEST:x[0-9]+]], [[BASE_BLOCK]], x[[OFFSET]], lsl #2 +; CHECK: br [[DEST]] + +def: + ret i32 0 + +lbl1: + ret i32 1 + +lbl2: + ret i32 2 + +lbl3: + ret i32 4 + +lbl4: + ret i32 8 + +} + +; CHECK: LJTI0_0: +; CHECK-NEXT: .byte +; CHECK-NEXT: .byte +; CHECK-NEXT: .byte +; CHECK-NEXT: .byte +; CHECK-NEXT: .byte Index: llvm/test/CodeGen/AArch64/or-combine.ll =================================================================== --- llvm/test/CodeGen/AArch64/or-combine.ll +++ llvm/test/CodeGen/AArch64/or-combine.ll @@ -28,9 +28,9 @@ ; are used more than once. define [3 x i32] @test_reuse(i32 %in, i32 %mask1, i32 %mask2) { ; CHECK-LABEL: test_reuse: -; CHECK-DAG: and w1, w0, w1 -; CHECK-DAG: and w2, w0, w2 -; CHECK-DAG: orr w0, w1, w2 +; CHECK-DAG: and [[LO:w[0-9]+]], w0, w1 +; CHECK-DAG: and [[HI:w[0-9]+]], w0, w2 +; CHECK-DAG: orr w0, [[LO]], [[HI]] %lo = and i32 %in, %mask1 %hi = and i32 %in, %mask2 Index: llvm/test/CodeGen/AArch64/sibling-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/sibling-call.ll +++ llvm/test/CodeGen/AArch64/sibling-call.ll @@ -1,8 +1,8 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-ldst-opt=0 | FileCheck %s declare void @callee_stack0() -declare void @callee_stack8([8 x i32], i64) -declare void @callee_stack16([8 x i32], i64, i64) +declare void @callee_stack8([8 x i64], i64) +declare void @callee_stack16([8 x i64], i64, i64) define void @caller_to0_from0() nounwind { ; CHECK-LABEL: caller_to0_from0: @@ -12,7 +12,7 @@ ; CHECK-NEXT: b callee_stack0 } -define void @caller_to0_from8([8 x i32], i64) nounwind{ +define void @caller_to0_from8([8 x i64], i64) nounwind{ ; CHECK-LABEL: caller_to0_from8: ; CHECK-NEXT: // %bb. @@ -26,51 +26,51 @@ ; Caller isn't going to clean up any extra stack we allocate, so it ; can't be a tail call. - tail call void @callee_stack8([8 x i32] undef, i64 42) + tail call void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: bl callee_stack8 } -define void @caller_to8_from8([8 x i32], i64 %a) { +define void @caller_to8_from8([8 x i64], i64 %a) { ; CHECK-LABEL: caller_to8_from8: ; CHECK-NOT: sub sp, sp, ; This should reuse our stack area for the 42 - tail call void @callee_stack8([8 x i32] undef, i64 42) + tail call void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: str {{x[0-9]+}}, [sp] ; CHECK-NEXT: b callee_stack8 } -define void @caller_to16_from8([8 x i32], i64 %a) { +define void @caller_to16_from8([8 x i64], i64 %a) { ; CHECK-LABEL: caller_to16_from8: ; Shouldn't be a tail call: we can't use SP+8 because our caller might ; have something there. This may sound obvious but implementation does ; some funky aligning. - tail call void @callee_stack16([8 x i32] undef, i64 undef, i64 undef) + tail call void @callee_stack16([8 x i64] undef, i64 undef, i64 undef) ; CHECK: bl callee_stack16 ret void } -define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) { +define void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: caller_to8_from24: ; CHECK-NOT: sub sp, sp ; Reuse our area, putting "42" at incoming sp - tail call void @callee_stack8([8 x i32] undef, i64 42) + tail call void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: str {{x[0-9]+}}, [sp] ; CHECK-NEXT: b callee_stack8 } -define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { +define void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { ; CHECK-LABEL: caller_to16_from16: ; CHECK-NOT: sub sp, sp, ; Here we want to make sure that both loads happen before the stores: ; otherwise either %a or %b will be wrongly clobbered. - tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a) + tail call void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) ret void ; CHECK: ldr [[VAL0:x[0-9]+]], Index: llvm/test/CodeGen/AArch64/swift-return.ll =================================================================== --- llvm/test/CodeGen/AArch64/swift-return.ll +++ llvm/test/CodeGen/AArch64/swift-return.ll @@ -1,5 +1,7 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s ; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 +; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 ; CHECK-LABEL: test1 ; CHECK: bl _gen @@ -8,7 +10,7 @@ ; CHECK-O0-LABEL: test1 ; CHECK-O0: bl _gen ; CHECK-O0: sxth [[TMP:w.*]], w0 -; CHECK-O0: add w8, [[TMP]], w1, sxtb +; CHECK-O0: add {{w[0-9]+}}, [[TMP]], w1, sxtb define i16 @test1(i32) { entry: %call = call swiftcc { i16, i8 } @gen(i32 %0) Index: llvm/test/CodeGen/AArch64/swiftcc.ll =================================================================== --- llvm/test/CodeGen/AArch64/swiftcc.ll +++ llvm/test/CodeGen/AArch64/swiftcc.ll @@ -1,5 +1,7 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s ; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s ; CHECK: t1 ; CHECK: fadd s0, s0, s1 Index: llvm/test/CodeGen/AArch64/swifterror.ll =================================================================== --- llvm/test/CodeGen/AArch64/swifterror.ll +++ llvm/test/CodeGen/AArch64/swifterror.ll @@ -1,5 +1,7 @@ -; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE %s -; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE --check-prefix=CHECK-APPLE-AARCH64 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 --check-prefix=CHECK-O0-AARCH64 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE --check-prefix=CHECK-APPLE-ARM64_32 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -O0 -fast-isel < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 --check-prefix=CHECK-O0-ARM64_32 %s declare i8* @malloc(i64) declare void @free(i8*) @@ -40,7 +42,8 @@ ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo ; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE-AARCH64: cbnz x21 +; CHECK-APPLE-ARM64_32: cbnz w0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] @@ -50,7 +53,8 @@ ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[ID:x[0-9]+]], x21 -; CHECK-O0: cbnz x21 +; CHECK-O0-AARCH64: cbnz x21 +; CHECK-O0-ARM64_32: cmp x21, #0 entry: %error_ptr_ref = alloca swifterror %swift_error* store %swift_error* null, %swift_error** %error_ptr_ref @@ -76,7 +80,8 @@ ; CHECK-APPLE: fmov [[CMP:s[0-9]+]], #1.0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE-AARCH64: cbnz x21 +; CHECK-APPLE-ARM64_32: cbnz w21 ; CHECK-APPLE: fcmp s0, [[CMP]] ; CHECK-APPLE: b.le ; Access part of the error object and save it to error_ref @@ -89,7 +94,8 @@ ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[ID:x[0-9]+]], x21 -; CHECK-O0: cbnz x21 +; CHECK-O0-AARCH64: cbnz x21 +; CHECK-O0-ARM64_32: cmp x21, #0 entry: %error_ptr_ref = alloca swifterror %swift_error* br label %bb_loop @@ -171,29 +177,53 @@ ; CHECK-APPLE: mov x21, x0 ; CHECK-APPLE: ret -; CHECK-O0-LABEL: foo_loop: +; CHECK-O0-AARCH64-LABEL: foo_loop: ; spill x21 -; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]] -; CHECK-O0: b [[BB1:[A-Za-z0-9_]*]] -; CHECK-O0: [[BB1]]: -; CHECK-O0: ldr x0, [sp, [[SLOT]]] -; CHECK-O0: str x0, [sp, [[SLOT2:#[0-9]+]]] -; CHECK-O0: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] -; CHECK-O0: mov w{{.*}}, #16 -; CHECK-O0: malloc -; CHECK-O0: mov [[ID:x[0-9]+]], x0 -; CHECK-O0: strb w{{.*}}, [{{.*}}[[ID]], #8] +; CHECK-O0-AARCH64: str x21, [sp, [[SLOT:#[0-9]+]]] +; CHECK-O0-AARCH64: b [[BB1:[A-Za-z0-9_]*]] +; CHECK-O0-AARCH64: [[BB1]]: +; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT]]] +; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2:#[0-9]+]]] +; CHECK-O0-AARCH64: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] +; CHECK-O0-AARCH64: mov w{{.*}}, #16 +; CHECK-O0-AARCH64: malloc +; CHECK-O0-AARCH64: mov [[ID:x[0-9]+]], x0 +; CHECK-O0-AARCH64: strb w{{.*}}, [{{.*}}[[ID]], #8] ; spill x0 -; CHECK-O0: str x0, [sp, [[SLOT2]]] -; CHECK-O0:[[BB2]]: -; CHECK-O0: ldr x0, [sp, [[SLOT2]]] -; CHECK-O0: fcmp -; CHECK-O0: str x0, [sp, [[SLOT3:#[0-9]+]] -; CHECK-O0: b.le [[BB1]] +; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2]]] +; CHECK-O0-AARCH64:[[BB2]]: +; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT2]]] +; CHECK-O0-AARCH64: fcmp +; CHECK-O0-AARCH64: str x0, [sp, [[SLOT3:#[0-9]+]] +; CHECK-O0-AARCH64: b.le [[BB1]] ; reload from stack -; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT3]]] -; CHECK-O0: mov x21, [[ID3]] -; CHECK-O0: ret +; CHECK-O0-AARCH64: ldr [[ID3:x[0-9]+]], [sp, [[SLOT3]]] +; CHECK-O0-AARCH64: mov x21, [[ID3]] +; CHECK-O0-AARCH64: ret + +; CHECK-O0-ARM64_32-LABEL: foo_loop: +; spill x21 +; CHECK-O0-ARM64_32: str x21, [sp, [[SLOT:#[0-9]+]]] +; CHECK-O0-ARM64_32: b [[BB1:[A-Za-z0-9_]*]] +; CHECK-O0-ARM64_32: [[BB1]]: +; CHECK-O0-ARM64_32: ldr x0, [sp, [[SLOT]]] +; CHECK-O0-ARM64_32: str x0, [sp, [[SLOT2:#[0-9]+]]] +; CHECK-O0-ARM64_32: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] +; CHECK-O0-ARM64_32: mov w{{.*}}, #16 +; CHECK-O0-ARM64_32: malloc +; CHECK-O0-ARM64_32: strb w{{.*}}, +; spill x0 +; CHECK-O0-ARM64_32: str [[ID2]], [sp, [[SLOT2]]] +; CHECK-O0-ARM64_32:[[BB2]]: +; CHECK-O0-ARM64_32: ldr x0, [sp, [[SLOT2]]] +; CHECK-O0-ARM64_32: fcmp +; CHECK-O0-ARM64_32: str x0, [sp] +; CHECK-O0-ARM64_32: b.le [[BB1]] +; reload from stack +; CHECK-O0-ARM64_32: ldr [[ID3:x[0-9]+]], [sp] +; CHECK-O0-ARM64_32: mov x21, [[ID3]] +; CHECK-O0-ARM64_32: ret + entry: br label %bb_loop @@ -263,7 +293,8 @@ ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret ; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE-AARCH64: cbnz x21 +; CHECK-APPLE-ARM64_32: cbnz w0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] @@ -275,7 +306,8 @@ ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:x[0-9]+]], x21 -; CHECK-O0: cbnz x21 +; CHECK-O0-AARCH64: cbnz x21 +; CHECK-O0-ARM64_32: cmp x21, #0 ; Access part of the error object and save it to error_ref ; reload from stack ; CHECK-O0: ldrb [[CODE:w[0-9]+]] @@ -308,20 +340,22 @@ ; CHECK-APPLE-LABEL: foo_vararg: ; CHECK-APPLE: mov w0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE-DAG: mov [[ID:w[0-9]+]], #1 -; CHECK-APPLE-DAG: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16 -; CHECK-APPLE-DAG: strb [[ID]], [x0, #8] ; First vararg -; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16] +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16] +; CHECK-APPLE-AARCH64: mov [[ID:w[0-9]+]], #1 +; CHECK-APPLE-AARCH64: add [[ARGS:x[0-9]+]], [[TMP]], #16 +; CHECK-APPLE-AARCH64: strb [[ID]], [x0, #8] ; Second vararg -; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24] -; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16 +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24] ; Third vararg -; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] + +; CHECK-APPLE-ARM64_32: mov [[ID:w[0-9]+]], #1 +; CHECK-APPLE-ARM64_32: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16 +; CHECK-APPLE-ARM64_32: strb [[ID]], [x0, #8] + -; CHECK-APPLE: mov x21, x0 -; CHECK-APPLE-NOT: x21 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -349,18 +383,18 @@ define float @caller4(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller4: -; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 -; CHECK-APPLE: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] -; CHECK-APPLE: str {{x[0-9]+}}, [sp] +; CHECK-APPLE-AARCH64: mov [[ID:x[0-9]+]], x0 +; CHECK-APPLE-AARCH64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] +; CHECK-APPLE-AARCH64: str {{x[0-9]+}}, [sp] -; CHECK-APPLE: mov x21, xzr -; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE-AARCH64: mov x21, xzr +; CHECK-APPLE-AARCH64: bl {{.*}}foo_vararg +; CHECK-APPLE-AARCH64: mov x0, x21 +; CHECK-APPLE-AARCH64: cbnz x21 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] -; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: bl {{.*}}free +; CHECK-APPLE-AARCH64: ldrb [[CODE:w[0-9]+]], [x0, #8] +; CHECK-APPLE-AARCH64: strb [[CODE]], [{{.*}}[[ID]]] +; CHECK-APPLE-AARCH64: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* store %swift_error* null, %swift_error** %error_ptr_ref Index: llvm/test/CodeGen/AArch64/swiftself.ll =================================================================== --- llvm/test/CodeGen/AArch64/swiftself.ll +++ llvm/test/CodeGen/AArch64/swiftself.ll @@ -1,6 +1,7 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s ; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTAARCH64 %s +; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT --check-prefix=OPTARM64_32 %s ; Parameter with swiftself should be allocated to x20. ; CHECK-LABEL: swiftself_param: @@ -47,8 +48,11 @@ ; We can use a tail call if the callee swiftself is the same as the caller one. ; CHECK-LABEL: swiftself_tail: -; OPT: b {{_?}}swiftself_param -; OPT-NOT: ret +; OPTAARCH64: b {{_?}}swiftself_param +; OPTAARCH64-NOT: ret + +; OPTARM64_32: bl {{_?}}swiftself_param +; OPTARM64_32: ret define i8* @swiftself_tail(i8* swiftself %addr0) { call void asm sideeffect "", "~{x20}"() %res = tail call i8* @swiftself_param(i8* swiftself %addr0) @@ -70,12 +74,19 @@ ; we normally would. We marked the first parameter with swiftself which means it ; will no longer be passed in x0. declare swiftcc i8* @thisreturn_attribute(i8* returned swiftself) -; OPT-LABEL: swiftself_nothisreturn: -; OPT-DAG: ldr x20, [x20] -; OPT-DAG: mov [[CSREG:x[1-9].*]], x8 -; OPT: bl {{_?}}thisreturn_attribute -; OPT: str x0, {{\[}}[[CSREG]] -; OPT: ret +; OPTAARCH64-LABEL: swiftself_nothisreturn: +; OPTAARCH64-DAG: ldr x20, [x20] +; OPTAARCH64-DAG: mov [[CSREG:x[1-9].*]], x8 +; OPTAARCH64: bl {{_?}}thisreturn_attribute +; OPTAARCH64: str x0, {{\[}}[[CSREG]] +; OPTAARCH64: ret + +; OPTARM64_32-LABEL: swiftself_nothisreturn: +; OPTARM64_32-DAG: ldr w20, [x20] +; OPTARM64_32-DAG: mov [[CSREG:x[1-9].*]], x8 +; OPTARM64_32: bl {{_?}}thisreturn_attribute +; OPTARM64_32: str w0, {{\[}}[[CSREG]] +; OPTARM64_32: ret define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret, i8** noalias nocapture readonly swiftself) { entry: %2 = load i8*, i8** %1, align 8 Index: llvm/test/CodeGen/AArch64/tail-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/tail-call.ll +++ llvm/test/CodeGen/AArch64/tail-call.ll @@ -1,8 +1,8 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s declare fastcc void @callee_stack0() -declare fastcc void @callee_stack8([8 x i32], i64) -declare fastcc void @callee_stack16([8 x i32], i64, i64) +declare fastcc void @callee_stack8([8 x i64], i64) +declare fastcc void @callee_stack16([8 x i64], i64, i64) declare extern_weak fastcc void @callee_weak() define fastcc void @caller_to0_from0() nounwind { @@ -15,7 +15,7 @@ ; CHECK-NEXT: b callee_stack0 } -define fastcc void @caller_to0_from8([8 x i32], i64) { +define fastcc void @caller_to0_from8([8 x i64], i64) { ; CHECK-LABEL: caller_to0_from8: tail call fastcc void @callee_stack0() @@ -31,33 +31,33 @@ ; Key point is that the "42" should go #16 below incoming stack ; pointer (we didn't have arg space to reuse). - tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: str {{x[0-9]+}}, [sp, #16]! ; CHECK-NEXT: b callee_stack8 } -define fastcc void @caller_to8_from8([8 x i32], i64 %a) { +define fastcc void @caller_to8_from8([8 x i64], i64 %a) { ; CHECK-LABEL: caller_to8_from8: ; CHECK: sub sp, sp, #16 ; Key point is that the "%a" should go where at SP on entry. - tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: str {{x[0-9]+}}, [sp, #16]! ; CHECK-NEXT: b callee_stack8 } -define fastcc void @caller_to16_from8([8 x i32], i64 %a) { +define fastcc void @caller_to16_from8([8 x i64], i64 %a) { ; CHECK-LABEL: caller_to16_from8: ; CHECK: sub sp, sp, #16 ; Important point is that the call reuses the "dead" argument space ; above %a on the stack. If it tries to go below incoming-SP then the ; callee will not deallocate the space, even in fastcc. - tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2) + tail call fastcc void @callee_stack16([8 x i64] undef, i64 42, i64 2) ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! ; CHECK-NEXT: b callee_stack16 @@ -65,12 +65,12 @@ } -define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) { +define fastcc void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: caller_to8_from24: ; CHECK: sub sp, sp, #16 ; Key point is that the "%a" should go where at #16 above SP on entry. - tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + tail call fastcc void @callee_stack8([8 x i64] undef, i64 42) ret void ; CHECK: str {{x[0-9]+}}, [sp, #32]! @@ -78,13 +78,13 @@ } -define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { +define fastcc void @caller_to16_from16([8 x i64], i64 %a, i64 %b) { ; CHECK-LABEL: caller_to16_from16: ; CHECK: sub sp, sp, #16 ; Here we want to make sure that both loads happen before the stores: ; otherwise either %a or %b will be wrongly clobbered. - tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a) + tail call fastcc void @callee_stack16([8 x i64] undef, i64 %b, i64 %a) ret void ; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] Index: llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -27,8 +27,8 @@ ; AARCH-NEXT: orr w10, w10, w11 ; AARCH-NEXT: orr w9, w10, w9 ; AARCH-NEXT: mul x0, x0, x2 -; AARCH-NEXT: mov x1, x8 -; AARCH-NEXT: mov w2, w9 +; AARCH-DAG: mov x1, x8 +; AARCH-DAG: mov w2, w9 ; AARCH-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 Index: llvm/test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- llvm/test/CodeGen/AArch64/win64_vararg.ll +++ llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -256,17 +256,19 @@ ret i32 %12 } + ; Osceola: shitty upstream test is just a copy/paste job so I'm certainly not + ; going to put more effort in to make it work here. ; CHECK-LABEL: fixed_params ; CHECK: sub sp, sp, #32 ; CHECK-DAG: mov w6, w3 ; CHECK-DAG: mov [[REG1:w[0-9]+]], w2 -; CHECK: mov w2, w1 -; CHECK: str w4, [sp] -; CHECK: fmov x1, d0 -; CHECK: fmov x3, d1 -; CHECK: fmov x5, d2 -; CHECK: fmov x7, d3 -; CHECK: mov w4, [[REG1]] +; CHECK-DAG: mov w2, w1 +; CHECK-DAG: str w4, [sp] +; CHECK-DAG: fmov x{{.*}}, d0 +; CHECK-DAG: fmov x{{.*}}, d1 +; CHECK-DAG: fmov x{{.*}}, d2 +; CHECK-DAG: fmov x{{.*}}, d3 +; CHECK-DAG: mov w4, [[REG1]] ; CHECK: str x30, [sp, #16] ; CHECK: str d4, [sp, #8] ; CHECK: bl varargs Index: llvm/test/MC/AArch64/arm64_32-compact-unwind.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/arm64_32-compact-unwind.s @@ -0,0 +1,15 @@ +; RUN: llvm-mc -triple=arm64_32-ios7.0 -filetype=obj %s -o %t +; RUN: llvm-objdump -s %t | FileCheck %s + +; The compact unwind format in ILP32 mode is pretty much the same, except +; references to addresses (function, personality, LSDA) are pointer-sized. + +; CHECK: Contents of section __compact_unwind: +; CHECK-NEXT: 0004 00000000 04000000 00000002 00000000 +; CHECK-NEXT: 0014 00000000 + .globl _test_compact_unwind + .align 2 +_test_compact_unwind: + .cfi_startproc + ret + .cfi_endproc Index: llvm/utils/TableGen/CallingConvEmitter.cpp =================================================================== --- llvm/utils/TableGen/CallingConvEmitter.cpp +++ llvm/utils/TableGen/CallingConvEmitter.cpp @@ -264,6 +264,10 @@ Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; O << IndentStr << "LocInfo = CCValAssign::BCvt;\n"; + } else if (Action->isSubClassOf("CCTruncToType")) { + Record *DestTy = Action->getValueAsDef("DestTy"); + O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; + O << IndentStr << "LocInfo = CCValAssign::Trunc;\n"; } else if (Action->isSubClassOf("CCPassIndirect")) { Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";