Index: lib/Target/ARM/ARMAsmPrinter.h =================================================================== --- lib/Target/ARM/ARMAsmPrinter.h +++ lib/Target/ARM/ARMAsmPrinter.h @@ -20,6 +20,7 @@ class MCOperand; class MachineConstantPool; class MachineOperand; +class MCSymbol; namespace ARM { enum DW_ISA { @@ -45,6 +46,11 @@ /// InConstantPool - Maintain state when emitting a sequence of constant /// pool entries so we can properly mark them as data regions. bool InConstantPool; + + /// ThumbIndirectPads - These maintain a per-function list of jump pad + /// labels used for ARMv4t thumb code to make register indirect calls. + SmallVector, 4> ThumbIndirectPads; + public: explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr), Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -120,6 +120,23 @@ // Emit the rest of the function body. EmitFunctionBody(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. + // These are created per function, rather than per TU, since it's + // relatively easy to exceed the thumb branch range within a TU. + if (! ThumbIndirectPads.empty()) { + OutStreamer.EmitAssemblerFlag(MCAF_Code16); + EmitAlignment(1); + for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) { + OutStreamer.EmitLabel(ThumbIndirectPads[i].second); + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX) + .addReg(ThumbIndirectPads[i].first) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + } + ThumbIndirectPads.clear(); + } + // We didn't modify anything. return false; } @@ -1282,18 +1299,34 @@ return; } case ARM::tBX_CALL: { - EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr) - .addReg(ARM::LR) - .addReg(ARM::PC) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); + if (Subtarget->hasV5TOps()) + llvm_unreachable("Expected BLX to be selected for v5t+"); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX) - .addReg(MI->getOperand(0).getReg()) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); + // On ARM v4t, when doing a call from thumb mode, we need to ensure + // that the saved lr has its LSB set correctly (the arch doesn't + // have blx). + // So here we generate a bl to a small jump pad that does bx rN. + // The jump pads are emitted after the function body. + + unsigned tReg = MI->getOperand(0).getReg(); + MCSymbol *TRegSym = nullptr; + for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) { + if (ThumbIndirectPads[i].first == tReg) { + TRegSym = ThumbIndirectPads[i].second; + break; + } + } + + if (! TRegSym) { + TRegSym = OutContext.CreateTempSymbol(); + ThumbIndirectPads.push_back(std::make_pair(tReg, TRegSym)); + } + + // Create a link-saving branch to the Reg Indirect Jump Pad. + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBL) + // Predicate comes first here. + .addImm(ARMCC::AL).addReg(0) + .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext))); return; } case ARM::BMOVPCRX_CALL: { Index: test/CodeGen/ARM/none-macho-v4t.ll =================================================================== --- test/CodeGen/ARM/none-macho-v4t.ll +++ test/CodeGen/ARM/none-macho-v4t.ll @@ -11,11 +11,15 @@ ; CHECK: [[PC_LABEL:LPC[0-9]+_[0-9]+]]: ; CHECK-NEXT: add r[[CALLEE_STUB]], pc ; CHECK: ldr [[CALLEE:r[0-9]+]], [r[[CALLEE_STUB]]] -; CHECK: mov lr, pc -; CHECK: bx [[CALLEE]] +; CHECK-NOT: mov lr, pc +; CHECK: bl [[INDIRECT_PAD:Ltmp[0-9]+]] ; CHECK: [[LITPOOL]]: ; CHECK-NEXT: .long L_callee$non_lazy_ptr-([[PC_LABEL]]+4) + +; CHECK: [[INDIRECT_PAD]]: +; CHECK: bx [[CALLEE]] + call void @callee() ret void } Index: test/CodeGen/ARM/thumb_indirect_calls.ll =================================================================== --- test/CodeGen/ARM/thumb_indirect_calls.ll +++ test/CodeGen/ARM/thumb_indirect_calls.ll @@ -0,0 +1,40 @@ +; RUN: llc -mtriple=thumbv4t-eabi %s -o - | FileCheck ---check-prefix=CHECK -check-prefix=CHECK-V4T %s +; RUN: llc -mtriple=thumbv5t-eabi %s -o - | FileCheck ---check-prefix=CHECK -check-prefix=CHECK-V5T %s + +@f = common global void (i32)* null, align 4 + +; CHECK-LABEL foo: +define void @foo(i32 %x) { +entry: + %0 = load void (i32)** @f, align 4 + tail call void %0(i32 %x) + ret void + +; CHECK: ldr [[TMP:r[0-3]]], [[F:\.[A-Z0-9_]+]] +; CHECK: ldr [[CALLEE:r[0-3]]], {{\[}}[[TMP]]{{\]}} + +; CHECK-V4T-NOT: blx +; CHECK-V4T: bl [[INDIRECT_PAD:\.Ltmp[0-9]+]] +; CHECK-V4T: [[F]]: +; CHECK-V4T: [[INDIRECT_PAD]]: +; CHECK-V4T-NEXT: bx [[CALLEE]] +; CHECK-V5T: blx [[CALLEE]] +} + +; CHECK-LABEL bar: +define void @bar(void (i32)* nocapture %g, i32 %x, void (i32)* nocapture %h) { +entry: + tail call void %g(i32 %x) + tail call void %h(i32 %x) + ret void + +; CHECK-V4T: bl [[INDIRECT_PAD1:\.Ltmp[0-9]+]] +; CHECK-V4T: bl [[INDIRECT_PAD2:\.Ltmp[0-9]+]] +; CHECK-V4T: [[INDIRECT_PAD1]]: +; CHECK-V4T-NEXT: bx +; CHECK-V4T: [[INDIRECT_PAD2]]: +; CHECK-V4T-NEXT: bx +; CHECK-V5T: blx +; CHECK-V5T: blx +} +