Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -1272,6 +1272,7 @@ // Add 's' bit operand (always reg0 for this) .addReg(0)); + assert(Subtarget->hasV4TOps()); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX) .addReg(MI->getOperand(0).getReg())); return; @@ -1892,6 +1893,7 @@ .addImm(ARMCC::AL) .addReg(0)); + assert(Subtarget->hasV4TOps()); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX) .addReg(ScratchReg) // Predicate. Index: lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- lib/Target/ARM/ARMCallLowering.cpp +++ lib/Target/ARM/ARMCallLowering.cpp @@ -247,7 +247,9 @@ const Value *Val, unsigned VReg) const { assert(!Val == !VReg && "Return value without a vreg"); - auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL)); + auto const &ST = MIRBuilder.getMF().getSubtarget(); + unsigned Opcode = ST.getReturnOpcode(); + auto Ret = MIRBuilder.buildInstrNoInsert(Opcode).add(predOps(ARMCC::AL)); if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret)) return false; Index: lib/Target/ARM/ARMExpandPseudoInsts.cpp =================================================================== --- lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1023,8 +1023,11 @@ if (STI->isThumb()) MIB.add(predOps(ARMCC::AL)); } else if (RetOpcode == ARM::TCRETURNri) { + unsigned Opcode = + STI->isThumb() ? ARM::tTAILJMPr + : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4); BuildMI(MBB, MBBI, dl, - TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)) + TII.get(Opcode)) .addReg(JumpTarget.getReg(), RegState::Kill); } Index: lib/Target/ARM/ARMFastISel.cpp =================================================================== --- lib/Target/ARM/ARMFastISel.cpp +++ lib/Target/ARM/ARMFastISel.cpp @@ -1332,6 +1332,8 @@ if (AddrReg == 0) return false; unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; + assert(isThumb2 || Subtarget->hasV4TOps()); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(AddrReg)); @@ -2168,9 +2170,8 @@ RetRegs.push_back(VA.getLocReg()); } - unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(RetOpc)); + TII.get(Subtarget->getReturnOpcode())); AddOptionalDefs(MIB); for (unsigned R : RetRegs) MIB.addReg(R, RegState::Implicit); Index: lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- lib/Target/ARM/ARMFrameLowering.cpp +++ lib/Target/ARM/ARMFrameLowering.cpp @@ -2415,9 +2415,8 @@ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - // bx lr - Return from this function. - Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; - BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL)); + // Return from this function. + BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL)); // Restore SR0 and SR1 in case of __morestack() was not called. // pop {SR0, SR1} Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -2431,7 +2431,7 @@ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], (BX GPR:$dst)>, Sched<[WriteBr]>, - Requires<[IsARM]>; + Requires<[IsARM, HasV4T]>; } // Secure Monitor Call is a system instruction. @@ -5612,6 +5612,12 @@ (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in + def TAILJMPr4 : ARMPseudoExpand<(outs), (ins GPR:$dst), + 4, IIC_Br, [], + (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; + // Large immediate handling. // 32-bit immediate using two piece mod_imms or movw + movt. Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1909,6 +1909,7 @@ for (auto Use : Prev->uses()) if (Use.isKill()) { + assert(STI->hasV4TOps()); BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) .addReg(Use.getReg(), RegState::Kill) .add(predOps(ARMCC::AL)) Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -750,6 +750,17 @@ /// True if fast-isel is used. bool useFastISel() const; + + /// Returns the correct return opcode for the current feature set. + /// Use BX if available to allow mixing thumb/arm code, but fall back + /// to plain mov pc,lr on ARMv4. + unsigned getReturnOpcode() const { + if (isThumb()) + return ARM::tBX_RET; + if (hasV4TOps()) + return ARM::BX_RET; + return ARM::MOVPCLR; + } }; } // end namespace llvm Index: lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp =================================================================== --- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -139,9 +139,9 @@ if (TT.isThumb()) { if (ARMArchFeature.empty()) - ARMArchFeature = "+thumb-mode"; + ARMArchFeature = "+thumb-mode,+v4t"; else - ARMArchFeature += ",+thumb-mode"; + ARMArchFeature += ",+thumb-mode,+v4t"; } if (TT.isOSNaCl()) { Index: test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll =================================================================== --- test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE -; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG +; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE +; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG define void @test_void_return() { ; CHECK-LABEL: name: test_void_return Index: test/CodeGen/ARM/armv4.ll =================================================================== --- test/CodeGen/ARM/armv4.ll +++ test/CodeGen/ARM/armv4.ll @@ -5,9 +5,24 @@ ; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=ARM ; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB -define i32 @test(i32 %a) nounwind readnone { +define i32 @test_return(i32 %a) nounwind readnone { entry: +; ARM-LABEL: test_return ; ARM: mov pc +; THUMB-LABEL: test_return ; THUMB: bx ret i32 %a } + +@helper = global i32 ()* null, align 4 + +define i32 @test_indirect() #0 { +entry: +; ARM-LABEL: test_indirect +; ARM: mov pc +; THUMB-LABEL: test_indirect +; THUMB: bx + %0 = load i32 ()*, i32 ()** @helper, align 4 + %call = tail call i32 %0() + ret i32 %call +} Index: test/CodeGen/ARM/debug-segmented-stacks.ll =================================================================== --- test/CodeGen/ARM/debug-segmented-stacks.ll +++ test/CodeGen/ARM/debug-segmented-stacks.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -filetype=obj !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10} Index: test/CodeGen/ARM/segmented-stacks-dynamic.ll =================================================================== --- test/CodeGen/ARM/segmented-stacks-dynamic.ll +++ test/CodeGen/ARM/segmented-stacks-dynamic.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux -; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-androideabi -mattr=+v4t -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-androideabi -mattr=+v4t -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) Index: test/CodeGen/ARM/segmented-stacks.ll =================================================================== --- test/CodeGen/ARM/segmented-stacks.ll +++ test/CodeGen/ARM/segmented-stacks.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android -; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-androideabi -mattr=+v4t -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux ; We used to crash with filetype=obj ; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj