Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -1709,6 +1709,89 @@ .addReg(0)); return; } + case ARM::tTBB_JT: + case ARM::tTBH_JT: { + + bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT; + unsigned Base = MI->getOperand(0).getReg(); + unsigned Idx = MI->getOperand(1).getReg(); + assert(MI->getOperand(1).isKill() && "We need the index register as scratch!"); + + // Multiply up idx if necessary. + if (!Is8Bit) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(Idx) + .addReg(ARM::CPSR) + .addReg(Idx) + .addImm(1) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + + if (Base == ARM::PC) { + // TBB [base, idx] = + // ADDS base, idx, base + // LDRB idx, [idx, #4] ; or LDRH if TBH + // LSLS idx, #1 + // ADDS pc, pc, idx + + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr) + .addReg(Idx) + .addReg(Idx) + .addReg(Base) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + + unsigned Opc = Is8Bit ? ARM::tLDRBi : ARM::tLDRHi; + EmitToStreamer(*OutStreamer, MCInstBuilder(Opc) + .addReg(Idx) + .addReg(Idx) + .addImm(Is8Bit ? 6 : 3) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + } else { + // TBB [base, idx] = + // LDRB idx, [base, idx] ; or LDRH if TBH + // LSLS idx, #1 + // ADDS pc, pc, idx + + unsigned Opc = Is8Bit ? ARM::tLDRBr : ARM::tLDRHr; + EmitToStreamer(*OutStreamer, MCInstBuilder(Opc) + .addReg(Idx) + .addReg(Base) + .addReg(Idx) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + } + + // Lower and emit the PC label here. We reuse the table format from TBB/TBH + // which requires a 4-byte PC adjustment due to the size of t2TBB. We *could* + // make a new table format that only requires a 2-byte adjustment, or we + // could more simply just emit the label 2 bytes earlier. We take the easy + // option. + OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm())); + + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(Idx) + .addReg(ARM::CPSR) + .addReg(Idx) + .addImm(1) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr) + .addReg(ARM::PC) + .addReg(ARM::PC) + .addReg(Idx) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + return; + } case ARM::tBR_JTr: case ARM::BR_JTr: { // Lower and emit the instruction itself, then the jump table following it. Index: lib/Target/ARM/ARMConstantIslandPass.cpp =================================================================== --- lib/Target/ARM/ARMConstantIslandPass.cpp +++ lib/Target/ARM/ARMConstantIslandPass.cpp @@ -58,6 +58,11 @@ CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30), cl::desc("The max number of iteration for converge")); +static cl::opt SynthesizeThumb1TBB( + "arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(true), + cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an " + "equivalent to the TBB/TBH instructions")); + namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM /// requires constant pool entries to be scattered among the instructions @@ -189,6 +194,7 @@ bool isThumb; bool isThumb1; bool isThumb2; + bool isPositionIndependentOrROPI; public: static char ID; ARMConstantIslands() : MachineFunctionPass(ID) {} @@ -319,6 +325,8 @@ STI = &static_cast(MF->getSubtarget()); TII = STI->getInstrInfo(); + isPositionIndependentOrROPI = + STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); AFI = MF->getInfo(); isThumb = AFI->isThumbFunction(); @@ -326,6 +334,7 @@ isThumb2 = AFI->isThumb2Function(); HasFarJump = false; + bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB); // This pass invalidates liveness information when it splits basic blocks. MF->getRegInfo().invalidateLiveness(); @@ -337,7 +346,7 @@ // Try to reorder and otherwise adjust the block layout to make good use // of the TB[BH] instructions. bool MadeChange = false; - if (isThumb2 && AdjustJumpTableBlocks) { + if (GenerateTBB && AdjustJumpTableBlocks) { scanFunctionJumpTables(); MadeChange |= reorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. @@ -414,7 +423,7 @@ MadeChange |= optimizeThumb2Branches(); // Optimize jump tables using TBB / TBH. - if (isThumb2) + if (GenerateTBB) MadeChange |= optimizeThumb2JumpTables(); // After a while, this might be made debug-only, but it is not expensive. @@ -540,9 +549,11 @@ case ARM::t2BR_JT: JTOpcode = ARM::JUMPTABLE_INSTS; break; + case ARM::tTBB_JT: case ARM::t2TBB_JT: JTOpcode = ARM::JUMPTABLE_TBB; break; + case ARM::tTBH_JT: case ARM::t2TBH_JT: JTOpcode = ARM::JUMPTABLE_TBH; break; @@ -638,7 +649,8 @@ void ARMConstantIslands::scanFunctionJumpTables() { for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &I : MBB) - if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT) + if (I.isBranch() && + (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr)) T2JumpTables.push_back(&I); } } @@ -679,6 +691,7 @@ default: continue; // Ignore other JT branches case ARM::t2BR_JT: + case ARM::tBR_JTr: T2JumpTables.push_back(&I); continue; // Does not get an entry in ImmBranches case ARM::Bcc: @@ -1943,7 +1956,7 @@ if (RemovableAdd) { RemovableAdd->eraseFromParent(); - DeadSize += 4; + DeadSize += isThumb2 ? 4 : 2; } else if (BaseReg == EntryReg) { // The add wasn't removable, but clobbered the base for the TBB. So we can't // preserve it. @@ -2010,25 +2023,80 @@ if (!ByteOk && !HalfWordOk) continue; + CPUser &User = CPUsers[JumpTableUserIndices[JTI]]; MachineBasicBlock *MBB = MI->getParent(); if (!MI->getOperand(0).isKill()) // FIXME: needed now? continue; - unsigned IdxReg = MI->getOperand(1).getReg(); - bool IdxRegKill = MI->getOperand(1).isKill(); - CPUser &User = CPUsers[JumpTableUserIndices[JTI]]; unsigned DeadSize = 0; bool CanDeleteLEA = false; bool BaseRegKill = false; - bool PreservedBaseReg = + + unsigned IdxReg = ~0U; + bool IdxRegKill = true; + if (isThumb2) { + IdxReg = MI->getOperand(1).getReg(); + IdxRegKill = MI->getOperand(1).isKill(); + + bool PreservedBaseReg = preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill); + if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg) + continue; + } else { + // We're in thumb-1 mode, so we must have something like: + // %idx = tLSLri %idx, 2 + // %base = tLEApcrelJT + // %t = tLDRr %idx, %base + unsigned BaseReg = User.MI->getOperand(0).getReg(); + + MachineInstr *Shift = User.MI->getPrevNode(); + if (Shift->getOpcode() != ARM::tLSLri || + Shift->getOperand(3).getImm() != 2 || + !Shift->getOperand(2).isKill()) + continue; + IdxReg = Shift->getOperand(2).getReg(); + unsigned ShiftedIdxReg = Shift->getOperand(0).getReg(); - if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg) - continue; + MachineInstr *Load = User.MI->getNextNode(); + if (Load->getOpcode() != ARM::tLDRr) + continue; + if (Load->getOperand(1).getReg() != ShiftedIdxReg || + Load->getOperand(2).getReg() != BaseReg || + !Load->getOperand(1).isKill()) + continue; + // If we're in PIC mode, there should be another ADD following. + if (isPositionIndependentOrROPI) { + MachineInstr *Add = Load->getNextNode(); + if (Add->getOpcode() != ARM::tADDrr || + Add->getOperand(2).getReg() != Load->getOperand(0).getReg() || + Add->getOperand(3).getReg() != BaseReg || + !Add->getOperand(2).isKill()) + continue; + if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg()) + continue; + + Add->eraseFromParent(); + DeadSize += 2; + } else { + if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg()) + continue; + } + + + // Now safe to delete the load and lsl. The LEA will be removed later. + CanDeleteLEA = true; + Shift->eraseFromParent(); + Load->eraseFromParent(); + DeadSize += 4; + } + DEBUG(dbgs() << "Shrink JT: " << *MI); MachineInstr *CPEMI = User.CPEMI; unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; + if (!isThumb2) + Opc = ByteOk ? ARM::tTBB_JT : ARM::tTBH_JT; + MachineBasicBlock::iterator MI_JT = MI; MachineInstr *NewJTMI = BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) @@ -2048,7 +2116,7 @@ if (CanDeleteLEA) { User.MI->eraseFromParent(); - DeadSize += 4; + DeadSize += isThumb2 ? 4 : 2; // The LEA was eliminated, the TBB instruction becomes the only new user // of the jump table. @@ -2164,9 +2232,16 @@ // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond directly to anything in the source. - assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?"); - BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB) - .addImm(ARMCC::AL).addReg(0); + if (isThumb2) + BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)) + .addMBB(BB) + .addImm(ARMCC::AL) + .addReg(0); + else + BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB)) + .addMBB(BB) + .addImm(ARMCC::AL) + .addReg(0); // Update internal data structures to account for the newly inserted MBB. MF->RenumberBlocks(NewBB); Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== --- lib/Target/ARM/ARMInstrThumb.td +++ lib/Target/ARM/ARMInstrThumb.td @@ -1308,6 +1308,18 @@ (ins i32imm:$label, pred:$p), 2, IIC_iALUi, []>, Sched<[WriteALU]>; +// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them +// and make use of the same compressed jump table format as Thumb-2. +let Size = 2 in { +def tTBB_JT : tPseudoInst<(outs), + (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, + Sched<[WriteBr]>; + +def tTBH_JT : t2PseudoInst<(outs), + (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, + Sched<[WriteBr]>; +} + //===----------------------------------------------------------------------===// // TLS Instructions // Index: test/CodeGen/ARM/arm-position-independence-jump-table.ll =================================================================== --- test/CodeGen/ARM/arm-position-independence-jump-table.ll +++ test/CodeGen/ARM/arm-position-independence-jump-table.ll @@ -8,9 +8,9 @@ ; RUN: llc -relocation-model=ropi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2 ; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv7m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2 -; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_ABS -; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_PC -; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 --check-prefix=THUMB1_PC +; RUN: llc -relocation-model=static -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 +; RUN: llc -relocation-model=ropi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 +; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1 declare void @exit0() @@ -85,30 +85,22 @@ ; THUMB2: [[LBB4]] ; THUMB2-NEXT: b exit4 -; THUMB1: lsls r[[R_TAB_INDEX:[0-9]+]], r{{[0-9]+}}, #2 -; THUMB1: adr r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]] -; THUMB1: ldr r[[R_BB_ADDR:[0-9]+]], [r[[R_TAB_INDEX]], r[[R_TAB_BASE]]] -; THUMB1_PC: adds r[[R_BB_ADDR]], r[[R_BB_ADDR]], r[[R_TAB_BASE]] -; THUMB1: mov pc, r[[R_BB_ADDR]] -; THUMB1: [[LJTI]] -; THUMB1_ABS: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]+1 -; THUMB1_ABS: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]+1 -; THUMB1_ABS: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]+1 -; THUMB1_ABS: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]+1 -; THUMB1_PC: .long [[LBB1:\.LBB[0-9]+_[0-9]+]]-[[LJTI]] -; THUMB1_PC: .long [[LBB2:\.LBB[0-9]+_[0-9]+]]-[[LJTI]] -; THUMB1_PC: .long [[LBB3:\.LBB[0-9]+_[0-9]+]]-[[LJTI]] -; THUMB1_PC: .long [[LBB4:\.LBB[0-9]+_[0-9]+]]-[[LJTI]] + +; THUMB1: add r[[x:[0-9]+]], pc +; THUMB1: ldrb r[[x]], [r[[x]], #6] +; THUMB1: [[LCPI:\.LCPI[0-9]+_[0-9]+]]: +; THUMB1: lsls r[[x]], r[[x]], #1 +; THUMB1: add pc, r[[x]] +; THUMB1: .byte ([[LBB1:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2 +; THUMB1: .byte ([[LBB2:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2 +; THUMB1: .byte ([[LBB3:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2 +; THUMB1: .byte ([[LBB4:\.LBB[0-9]+_[0-9]+]]-([[LCPI]]+4))/2 ; THUMB1: [[LBB1]] ; THUMB1-NEXT: bl exit1 -; THUMB1-NEXT: pop ; THUMB1: [[LBB2]] ; THUMB1-NEXT: bl exit2 -; THUMB1-NEXT: pop ; THUMB1: [[LBB3]] ; THUMB1-NEXT: bl exit3 -; THUMB1-NEXT: pop ; THUMB1: [[LBB4]] ; THUMB1-NEXT: bl exit4 -; THUMB1-NEXT: pop } Index: test/CodeGen/ARM/jump-table-tbh.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/jump-table-tbh.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=thumbv7m-linux-gnu -o - %s | FileCheck %s --check-prefix=T2 +; RUN: llc -mtriple=thumbv6m-linux-gnu -o - %s | FileCheck %s --check-prefix=T1 + +declare void @foo(double) +declare i32 @llvm.arm.space(i32, i32) + +define i32 @test_tbh(i1 %tst, i32 %sw, i32 %l) { + br label %complex + +; T2-LABEL: test_tbh: +; T2: [[ANCHOR:.LCPI[0-9_]+]]: +; T2: tbh [pc, r{{[0-9]+}}, lsl #1] +; T2-NEXT: @ BB#1 +; T2-NEXT: LJTI +; T2-NEXT: .short (.LBB0_4-([[ANCHOR]]+4))/2 +; T2-NEXT: .short (.LBB0_2-([[ANCHOR]]+4))/2 +; T2-NEXT: .short (.LBB0_3-([[ANCHOR]]+4))/2 +; T2-NEXT: .short (.LBB0_4-([[ANCHOR]]+4))/2 + +; T1-LABEL: test_tbh: +; T1: lsls [[x:r[0-9]+]], r4, #1 +; T1: add [[x]], pc +; T1: ldrh [[x]], {{\[}}[[x]], #6] +; T1: [[ANCHOR:.LCPI[0-9_]+]]: +; T1: lsls [[x]], [[x]], #1 +; T1: add pc, [[x]] +; T1-NEXT: @ BB#2 +; T1-NEXT: LJTI +; T1-NEXT: .short (.LBB0_7-([[ANCHOR]]+4))/2 +; T1-NEXT: .short (.LBB0_4-([[ANCHOR]]+4))/2 +; T1-NEXT: .short (.LBB0_6-([[ANCHOR]]+4))/2 +; T1-NEXT: .short (.LBB0_7-([[ANCHOR]]+4))/2 + +complex: + call void @foo(double 12345.0) + switch i32 %sw, label %second [ i32 0, label %other + i32 1, label %third + i32 2, label %end + i32 3, label %other ] + +second: + ret i32 43 +third: + call i32 @llvm.arm.space(i32 970, i32 undef) + ret i32 0 + +other: + call void @bar() + unreachable + +end: + ret i32 42 +} + +declare void @bar() Index: test/CodeGen/Thumb2/thumb2-jtb.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-jtb.ll +++ test/CodeGen/Thumb2/thumb2-jtb.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 -arm-adjust-jump-tables=0 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6-eabi -mcpu=cortex-m0 -arm-adjust-jump-tables=0 %s -o - | FileCheck %s ; Do not use tbb / tbh if any destination is before the jumptable. ; rdar://7102917 Index: test/CodeGen/Thumb2/thumb2-tbb.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-tbb.ll +++ test/CodeGen/Thumb2/thumb2-tbb.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=static | FileCheck %s --check-prefix=THUMB1 +; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=pic | FileCheck %s --check-prefix=THUMB1 define void @bar(i32 %n.u) { entry: @@ -9,6 +11,13 @@ ; CHECK: .end_data_region ; CHECK-NEXT: .p2align 1 +; THUMB1-LABEL: bar: +; THUMB1: add pc, r0 +; THUMB1: .data_region jt8 +; THUMB1: .byte (LBB0_3-(LCPI0_0+4))/2 +; THUMB1: .end_data_region +; THUMB1-NEXT: .p2align 1 + switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ] bb: tail call void(...) @foo1() Index: test/CodeGen/Thumb2/thumb2-tbh.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-tbh.ll +++ test/CodeGen/Thumb2/thumb2-tbh.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s --check-prefix=CHECK --check-prefix=T2 +; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=pic | FileCheck %s --check-prefix=CHECK --check-prefix=T1 +; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=static | FileCheck %s --check-prefix=CHECK --check-prefix=T1 ; Thumb2 target should reorder the bb's in order to use tbb / tbh. @@ -21,7 +23,9 @@ ; CHECK-LABEL: main: ; CHECK-NOT: adr {{r[0-9]+}}, LJTI ; CHECK: [[PCREL_ANCHOR:LCPI[0-9]+_[0-9]+]]: -; CHECK-NEXT: tbb [pc, {{r[0-9]+}}] +; T2-NEXT: tbb [pc, {{r[0-9]+}}] +; T1-NEXT: lsls r[[x:[0-9]+]], {{r[0-9]+}}, #1 +; T1-NEXT: add pc, r[[x]] ; CHECK: LJTI0_0: ; CHECK-NEXT: .data_region jt8