Index: llvm/lib/Target/AArch64/AArch64.h =================================================================== --- llvm/lib/Target/AArch64/AArch64.h +++ llvm/lib/Target/AArch64/AArch64.h @@ -31,6 +31,7 @@ FunctionPass *createAArch64DeadRegisterDefinitions(); FunctionPass *createAArch64RedundantCopyEliminationPass(); +FunctionPass *createAArch64CompressJumpTablesPass(); FunctionPass *createAArch64ConditionalCompares(); FunctionPass *createAArch64AdvSIMDScalar(); FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, @@ -57,6 +58,7 @@ void initializeAArch64AddressTypePromotionPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); +void initializeAArch64CompressJumpTablesPass(PassRegistry&); void initializeAArch64ConditionalComparesPass(PassRegistry&); void initializeAArch64ConditionOptimizerPass(PassRegistry&); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); Index: llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -67,6 +68,11 @@ return MCInstLowering.lowerOperand(MO, MCOp); } + void EmitJumpTableInfo() override; + void emitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, unsigned JTI); + + void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI); void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, @@ -417,6 +423,104 @@ printOperand(MI, NOps - 2, OS); } +void AArch64AsmPrinter::EmitJumpTableInfo() { + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + if (!MJTI) return; + + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(*MF->getFunction(), TM); + OutStreamer->SwitchSection(ReadOnlySec); + + auto AFI = MF->getInfo(); + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { + const std::vector &JTBBs = JT[JTI].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + unsigned Size = AFI->getJumpTableEntrySize(JTI); + EmitAlignment(Log2_32(Size)); + OutStreamer->EmitLabel(GetJTISymbol(JTI)); + + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + emitJumpTableEntry(MJTI, JTBBs[ii], JTI); + } +} + +void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned JTI) { + const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); + auto AFI = MF->getInfo(); + unsigned Size = AFI->getJumpTableEntrySize(JTI); + + if (Size == 4) { + // .word LBB - LJTI + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext); + Value = MCBinaryExpr::createSub(Value, Base, OutContext); + } else { + // .byte (LBB - LJTPC)/4 (or .hword) + const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI); + const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext); + Value = MCBinaryExpr::createSub(Value, Base, OutContext); + Value = MCBinaryExpr::createDiv( + Value, MCConstantExpr::create(4, OutContext), OutContext); + } + + OutStreamer->EmitValue(Value, Size); +} + +/// Small jump tables contain a signed byte or half, representing the offset +/// from the start of this instruction to the desired basic block. Since all +/// instructions are 4-byte aligned, this is further compressed by counting in +/// instructions rather than bytes (i.e. divided by 4). So, to materialized the +/// correct destination we need: +/// +/// .LJTPC0_0: +/// adr xDest, .LJTPC0_0 +/// ldrsb xScratch, [xTable, xEntry] (with "lsl #1" for ldrsh). +/// add xDest, xDest, xScratch, lsl #2 +void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer, + const llvm::MachineInstr &MI) { + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned ScratchReg = MI.getOperand(1).getReg(); + unsigned TableReg = MI.getOperand(2).getReg(); + unsigned EntryReg = MI.getOperand(3).getReg(); + int JTIdx = MI.getOperand(4).getIndex(); + bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8; + + // This has to be first because the compression pass based its reachability + // calculations on the start of the JumpTableDest instruction. + auto Label = + MF->getInfo()->getJumpTableEntryPCRelSymbol(JTIdx); + OutStreamer.EmitLabel(Label); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR) + .addReg(DestReg) + .addExpr(MCSymbolRefExpr::create( + Label, MF->getContext()))); + + // Load the number of instruction-steps to offset from the label. + unsigned LdrOpcode = IsByteEntry ? AArch64::LDRSBXroX : AArch64::LDRSHXroX; + EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode) + .addReg(ScratchReg) + .addReg(TableReg) + .addReg(EntryReg) + .addImm(0) + .addImm(IsByteEntry ? 0 : 1)); + + // Multiply the steps by 4 and add to the already materialized base label + // address. + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs) + .addReg(DestReg) + .addReg(DestReg) + .addReg(ScratchReg) + .addImm(2)); +} + void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI) { unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes(); @@ -624,6 +728,32 @@ return; } + case AArch64::JumpTableDest32: { + // We want: + // ldrsw xScratch, [xTable, xEntry, lsl #2] + // add xDest, xTable, xScratch + unsigned DestReg = MI->getOperand(0).getReg(), + ScratchReg = MI->getOperand(1).getReg(), + TableReg = MI->getOperand(2).getReg(), + EntryReg = MI->getOperand(3).getReg(); + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX) + .addReg(ScratchReg) + .addReg(TableReg) + .addReg(EntryReg) + .addImm(0) + .addImm(1)); + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs) + .addReg(DestReg) + .addReg(TableReg) + .addReg(ScratchReg) + .addImm(0)); + return; + } + case AArch64::JumpTableDest16: + case AArch64::JumpTableDest8: + LowerJumpTableDestSmall(*OutStreamer, *MI); + return; + case AArch64::FMOVS0: case AArch64::FMOVD0: EmitFMov0(*MI); Index: llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp @@ -0,0 +1,153 @@ +//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// This pass looks at the basic blocks each jump-table refers to and works out +// whether they can be emitted in a compressed form (with 8 or 16-bit +// entries). If so, it changes the opcode and flags them in the associated +// AArch64FunctionInfo. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64MachineFunctionInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-jump-tables" + +STATISTIC(NumJT8, "Number of jump-tables with 1-byte entries"); +STATISTIC(NumJT16, "Number of jump-tables with 2-byte entries"); +STATISTIC(NumJT32, "Number of jump-tables with 4-byte entries"); + +namespace { +class AArch64CompressJumpTables : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineFunction *MF; + SmallVector BlockInfo; + + int computeBlockSize(MachineBasicBlock &MBB); + void scanFunction(); + + MCSymbol *getJumpTablePICSymbol(int JTIdx); + + bool compressJumpTable(MachineInstr &MI, int Offset); + +public: + static char ID; + AArch64CompressJumpTables() : MachineFunctionPass(ID) { + initializeAArch64CompressJumpTablesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + StringRef getPassName() const override { + return "AArch64 Compress Jump Tables"; + } +}; +char AArch64CompressJumpTables::ID = 0; +} + +INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE, + "AArch64 compress jump tables pass", false, false) + +int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) { + int Size = 0; + for (const MachineInstr &MI : MBB) + Size += TII->getInstSizeInBytes(MI); + return Size; +} + +void AArch64CompressJumpTables::scanFunction() { + BlockInfo.clear(); + BlockInfo.resize(MF->getNumBlockIDs()); + + int Offset = 0; + for (MachineBasicBlock &MBB : *MF) { + BlockInfo[MBB.getNumber()] = Offset; + Offset += computeBlockSize(MBB); + } +} + +MCSymbol *AArch64CompressJumpTables::getJumpTablePICSymbol(int JTIdx) { + return MF->getContext().getOrCreateSymbol( + Twine(MF->getDataLayout().getPrivateGlobalPrefix()) + "JTPC" + + Twine(MF->getFunctionNumber()) + "_" + Twine(JTIdx)); +} + +bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI, + int Offset) { + if (MI.getOpcode() != AArch64::JumpTableDest32) + return false; + + int JTIdx = MI.getOperand(4).getIndex(); + auto &JTInfo = *MF->getJumpTableInfo(); + const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx]; + + // The jump-table might have been optimized away. + if (JT.MBBs.empty()) + return false; + + int MaxOffset = 0, MinOffset = 0; + for (auto Block : JT.MBBs) { + int BlockOffset = BlockInfo[Block->getNumber()] - Offset; + assert(BlockOffset % 4 == 0 && "misaligned basic block"); + + MaxOffset = std::max(MaxOffset, BlockOffset); + MinOffset = std::min(MinOffset, BlockOffset); + } + + auto AFI = MF->getInfo(); + if (isInt<8>(MaxOffset / 4) && isInt<8>(MinOffset / 4)) { + AFI->setJumpTableEntryInfo(JTIdx, 1, getJumpTablePICSymbol(JTIdx)); + MI.setDesc(TII->get(AArch64::JumpTableDest8)); + ++NumJT8; + return true; + } else if (isInt<16>(MaxOffset / 4) && isInt<16>(MinOffset / 4)) { + AFI->setJumpTableEntryInfo(JTIdx, 2, getJumpTablePICSymbol(JTIdx)); + MI.setDesc(TII->get(AArch64::JumpTableDest16)); + ++NumJT16; + return true; + } + + ++NumJT32; + return false; +} + +bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) { + bool Changed = false; + MF = &MFIn; + + const TargetSubtargetInfo &ST = MF->getSubtarget(); + TII = ST.getInstrInfo(); + + scanFunction(); + + for (MachineBasicBlock &MBB : *MF) { + int Offset = BlockInfo[MBB.getNumber()]; + for (MachineInstr &MI : MBB) { + Changed |= compressJumpTable(MI, Offset); + Offset += TII->getInstSizeInBytes(MI); + } + } + + return Changed; +} + +FunctionPass *llvm::createAArch64CompressJumpTablesPass() { + return new AArch64CompressJumpTables(); +} Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -537,6 +537,7 @@ SDValue TVal, SDValue FVal, const SDLoc &dl, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -182,7 +182,7 @@ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); @@ -2519,6 +2519,8 @@ return LowerSELECT_CC(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::BR_JT: + return LowerBR_JT(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: @@ -4428,6 +4430,22 @@ return getAddr(JT, DAG); } +SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, + SelectionDAG &DAG) const { + // Jump table entries as PC relative offsets. No additional tweaking + // is necessary here. Just get the address of the jump table. + SDLoc DL(Op); + SDValue JT = Op.getOperand(1); + SDValue Entry = Op.getOperand(2); + int JTI = cast(JT.getNode())->getIndex(); + + SDNode *Dest = + DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT, + Entry, DAG.getTargetJumpTable(JTI, MVT::i32)); + return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0), + SDValue(Dest, 0)); +} + SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -110,6 +110,11 @@ // This gets lowered to an instruction sequence which takes 16 bytes NumBytes = 16; break; + case AArch64::JumpTableDest32: + case AArch64::JumpTableDest16: + case AArch64::JumpTableDest8: + NumBytes = 12; + break; } return NumBytes; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -389,6 +389,21 @@ def : Pat<(AArch64LOADgot tconstpool:$addr), (LOADgot tconstpool:$addr)>; +// 32-bit jump table destination is actually only 2 instructions since we can +// use the table itself as a PC-relative base. But optimization occurs after +// branch relaxation so be pessimistic. +let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in { +def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), + (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, + Sched<[]>; +def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), + (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, + Sched<[]>; +def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), + (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, + Sched<[]>; +} + //===----------------------------------------------------------------------===// // System instructions. //===----------------------------------------------------------------------===// Index: llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -145,6 +145,19 @@ unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; } void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; } + unsigned getJumpTableEntrySize(int Idx) const { + auto It = JumpTableEntryInfo.find(Idx); + if (It != JumpTableEntryInfo.end()) + return It->second.first; + return 4; + } + MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const { + return JumpTableEntryInfo.find(Idx)->second.second; + } + void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) { + JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym); + } + typedef SmallPtrSet SetOfInstructions; const SetOfInstructions &getLOHRelated() const { return LOHRelated; } @@ -183,6 +196,8 @@ // Hold the lists of LOHs. MILOHContainer LOHContainerSet; SetOfInstructions LOHRelated; + + DenseMap> JumpTableEntryInfo; }; } // end namespace llvm Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -75,6 +75,8 @@ cl::desc("Enable the pass that emits the linker optimization hints (LOH)"), cl::init(true), cl::Hidden); + + static cl::opt EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden, cl::desc("Enable the pass that removes dead" @@ -128,6 +130,10 @@ BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), cl::desc("Relax out of range conditional branches")); +static cl::opt EnableCompressJumpTables( + "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true), + cl::desc("Use smallest entry possible for jump tables")); + // FIXME: Unify control over GlobalMerge. static cl::opt EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden, @@ -155,6 +161,7 @@ initializeAArch64AddressTypePromotionPass(*PR); initializeAArch64AdvSIMDScalarPass(*PR); initializeAArch64CollectLOHPass(*PR); + initializeAArch64CompressJumpTablesPass(*PR); initializeAArch64ConditionalComparesPass(*PR); initializeAArch64ConditionOptimizerPass(*PR); initializeAArch64DeadRegisterDefinitionsPass(*PR); @@ -533,6 +540,9 @@ if (BranchRelaxation) addPass(&BranchRelaxationPassID); + if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables) + addPass(createAArch64CompressJumpTablesPass()); + if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && TM->getTargetTriple().isOSBinFormatMachO()) addPass(createAArch64CollectLOHPass()); Index: llvm/lib/Target/AArch64/CMakeLists.txt =================================================================== --- llvm/lib/Target/AArch64/CMakeLists.txt +++ llvm/lib/Target/AArch64/CMakeLists.txt @@ -50,6 +50,7 @@ AArch64FastISel.cpp AArch64A53Fix835769.cpp AArch64FrameLowering.cpp + AArch64CompressJumpTables.cpp AArch64ConditionOptimizer.cpp AArch64RedundantCopyElimination.cpp AArch64ISelDAGToDAG.cpp Index: llvm/test/CodeGen/AArch64/jump-table-compress.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/jump-table-compress.mir @@ -0,0 +1,117 @@ +# RUN: llc -mtriple=aarch64-linux-gnu %s -run-pass=aarch64-jump-tables -o - | FileCheck %s +--- | + define i32 @test_jumptable(i32 %in) { + unreachable + } + +... +--- +name: test_jumptable +alignment: 2 +exposesReturnsTwice: false +noVRegs: true +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%w0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.3' ] + - id: 1 + blocks: [ '%bb.5' ] + - id: 2 + blocks: [ '%bb.5' ] + - id: 3 + blocks: [ '%bb.7' ] +body: | + bb.0 (%ir-block.0): + + bb.1 (%ir-block.0): + ; CHECK: body: + ; CHECK-LABEL: bb.1 + ; CHECK: JumpTableDest8 + liveins: %x8 + early-clobber %x10, dead early-clobber %x11 = JumpTableDest32 undef killed %x9, undef killed %x8, %jump-table.0 + BR killed %x10 + + bb.2: + ; Destination is 4 * (3 + 1 + 7 * 16 + 11) = 4 * 127 bytes after JumpTableDest. Byte is OK. + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11", 0 + + bb.3: + ; CHECK-LABEL: bb.3 + ; CHECK: JumpTableDest16 + early-clobber %x10, dead early-clobber %x11 = JumpTableDest32 undef killed %x9, undef killed %x8, %jump-table.1 + BR killed %x10 + + bb.4: + ; Destination is 4 * (3 + 1 + 7 * 16 + 12) = 4 * 128 bytes after JumpTableDest. Need 16-bits + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12", 0 + + bb.5: + ; Destination is 4 * (8 * 16) = 4 * 128 bytes before JumpTableDest. Byte is OK. + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + + bb.6: + ; CHECK-LABEL: bb.6 + ; CHECK: JumpTableDest8 + early-clobber %x10, dead early-clobber %x11 = JumpTableDest32 undef killed %x9, undef killed %x8, %jump-table.2 + BR killed %x10 + + bb.7: + ; Destination is 4 * (8 * 16 + 1) = 4 * 129 bytes before JumpTableDest. Need 16-bits. + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", 0 + INLINEASM $"1", 0 + + bb.8: + ; CHECK-LABEL: bb.8 + ; CHECK: JumpTableDest16 + early-clobber %x10, dead early-clobber %x11 = JumpTableDest32 undef killed %x9, undef killed %x8, %jump-table.3 + BR killed %x10 +... Index: llvm/test/CodeGen/AArch64/jump-table.ll =================================================================== --- llvm/test/CodeGen/AArch64/jump-table.ll +++ llvm/test/CodeGen/AArch64/jump-table.ll @@ -1,6 +1,8 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s -; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -no-integrated-as | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-enable-atomic-cfg-tidy=0 -o - %s -no-integrated-as| FileCheck %s +; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -no-integrated-as | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-apple-ios -aarch64-enable-atomic-cfg-tidy=0 %s -o - -no-integrated-as | FileCheck %s --check-prefix=CHECK-IOS +; RUN: llc -verify-machineinstrs -o - %s -aarch64-enable-compress-jump-tables=0 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -no-integrated-as | FileCheck %s --check-prefix=CHECK-32 define i32 @test_jumptable(i32 %in) { ; CHECK: test_jumptable @@ -11,23 +13,38 @@ i32 2, label %lbl3 i32 4, label %lbl4 ] -; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0 -; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0 -; CHECK: ldr [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #3] -; CHECK: br [[DEST]] - -; CHECK-LARGE: movz x[[JTADDR:[0-9]+]], #:abs_g0_nc:.LJTI0_0 -; CHECK-LARGE: movk x[[JTADDR]], #:abs_g1_nc:.LJTI0_0 -; CHECK-LARGE: movk x[[JTADDR]], #:abs_g2_nc:.LJTI0_0 -; CHECK-LARGE: movk x[[JTADDR]], #:abs_g3:.LJTI0_0 -; CHECK-LARGE: ldr [[DEST:x[0-9]+]], [x[[JTADDR]], {{x[0-9]+}}, lsl #3] -; CHECK-LARGE: br [[DEST]] - -; CHECK-PIC: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0 -; CHECK-PIC: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0 -; CHECK-PIC: ldrsw [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #2] -; CHECK-PIC: add [[TABLE:x[0-9]+]], [[DEST]], x[[JT]] -; CHECK-PIC: br [[TABLE]] +; CHECK-LABEL: test_jumptable: +; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0 +; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0 +; CHECK: [[JTPC:.LJTPC[0-9]+_[0-9]+]]: +; CHECK: adr [[PCBASE:x[0-9]+]], [[JTPC]] +; CHECK: ldrsb [[OFFSET:x[0-9]+]], [x[[JT]], {{x[0-9]+}}] +; CHECK: add [[DEST:x[0-9]+]], [[PCBASE]], [[OFFSET]], lsl #2 +; CHECK: br [[DEST]] + +; CHECK-LARGE: movz x[[JTADDR:[0-9]+]], #:abs_g0_nc:.LJTI0_0 +; CHECK-LARGE: movk x[[JTADDR]], #:abs_g1_nc:.LJTI0_0 +; CHECK-LARGE: movk x[[JTADDR]], #:abs_g2_nc:.LJTI0_0 +; CHECK-LARGE: movk x[[JTADDR]], #:abs_g3:.LJTI0_0 +; CHECK-LARGE: [[JTPC:.LJTPC[0-9]+_[0-9]+]]: +; CHECK-LARGE: adr [[PCBASE:x[0-9]+]], [[JTPC]] +; CHECK-LARGE: ldrsb [[OFFSET:x[0-9]+]], [x[[JTADDR]], {{x[0-9]+}}] +; CHECK-LARGE: add [[DEST:x[0-9]+]], [[PCBASE]], [[OFFSET]], lsl #2 +; CHECK-LARGE: br [[DEST]] + +; CHECK-IOS: adrp [[JTPAGE:x[0-9]+]], LJTI0_0@PAGE +; CHECK-IOS: add x[[JT:[0-9]+]], [[JTPAGE]], LJTI0_0@PAGEOFF +; CHECK-IOS: [[JTPC:LJTPC[0-9]+_[0-9]+]]: +; CHECK-IOS: adr [[PCBASE:x[0-9]+]], [[JTPC]] +; CHECK-IOS: ldrsb [[OFFSET:x[0-9]+]], [x[[JT]], {{x[0-9]+}}] +; CHECK-IOS: add [[DEST:x[0-9]+]], [[PCBASE]], [[OFFSET]], lsl #2 +; CHECK-IOS: br [[DEST]] + +; CHECK-32: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0 +; CHECK-32: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0 +; CHECK-32: ldrsw [[OFFSET:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #2] +; CHECK-32: add [[DEST:x[0-9]+]], x[[JT]], [[OFFSET]] +; CHECK-32: br [[DEST]] def: ret i32 0 @@ -49,18 +66,85 @@ ; CHECK: .rodata ; CHECK: .LJTI0_0: -; CHECK-NEXT: .xword -; CHECK-NEXT: .xword -; CHECK-NEXT: .xword -; CHECK-NEXT: .xword -; CHECK-NEXT: .xword +; CHECK-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 + +define i32 @test_jumptable16(i32 %in) { + + switch i32 %in, label %def [ + i32 0, label %lbl1 + i32 1, label %lbl2 + i32 2, label %lbl3 + i32 4, label %lbl4 + ] +; CHECK-LABEL: test_jumptable16: +; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI1_0 +; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI1_0 +; CHECK: [[JTPC:.LJTPC[0-9]+_[0-9]+]]: +; CHECK: adr [[PCBASE:x[0-9]+]], [[JTPC]] +; CHECK: ldrsh [[OFFSET:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #1] +; CHECK: add [[DEST:x[0-9]+]], [[PCBASE]], [[OFFSET]], lsl #2 +; CHECK: br [[DEST]] + +def: + ret i32 0 + +lbl1: + ret i32 1 + +lbl2: + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + call void asm sideeffect "1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16", ""() + ret i32 2 + +lbl3: + ret i32 4 + +lbl4: + ret i32 8 + +} + +; CHECK: .rodata +; CHECK: .p2align 1 +; CHECK: .LJTI1_0: +; CHECK-NEXT: .hword (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-NEXT: .hword (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-NEXT: .hword (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-NEXT: .hword (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-NEXT: .hword (.LBB{{.*}}-[[JTPC]])/4 ; CHECK-PIC-NOT: .data_region ; CHECK-PIC-NOT: .LJTI0_0 ; CHECK-PIC: .LJTI0_0: -; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0 -; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0 -; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0 -; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0 -; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0 +; CHECK-PIC-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-PIC-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-PIC-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-PIC-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 +; CHECK-PIC-NEXT: .byte (.LBB{{.*}}-[[JTPC]])/4 ; CHECK-PIC-NOT: .end_data_region + +; CHECK-IOS: .section __TEXT,__const +; CHECK-IOS-NOT: .data_region +; CHECK-IOS: LJTI0_0: +; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTPC]])/4 +; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTPC]])/4 +; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTPC]])/4 +; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTPC]])/4 +; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTPC]])/4 +; CHECK-IOS-NOT: .end_data_region Index: llvm/test/CodeGen/AArch64/min-jump-table.ll =================================================================== --- llvm/test/CodeGen/AArch64/min-jump-table.ll +++ llvm/test/CodeGen/AArch64/min-jump-table.ll @@ -11,11 +11,11 @@ i32 2, label %bb2 ] ; CHECK-LABEL: function jt2: -; CHECK0-NEXT: Jump Tables: +; CHECK0-NEXT: {{^}}Jump Tables: ; CHECK0-NEXT: jt#0: ; CHECK0-NOT: jt#1: -; CHECK4-NOT: Jump Tables: -; CHECK8-NOT: Jump Tables: +; CHECK4-NOT: {{^}}Jump Tables: +; CHECK8-NOT: {{^}}Jump Tables: bb1: tail call void @ext(i32 0) br label %return bb2: tail call void @ext(i32 2) br label %return @@ -32,13 +32,13 @@ i32 4, label %bb4 ] ; CHECK-LABEL: function jt4: -; CHECK0-NEXT: Jump Tables: +; CHECK0-NEXT: {{^}}Jump Tables: ; CHECK0-NEXT: jt#0: ; CHECK0-NOT: jt#1: -; CHECK4-NEXT: Jump Tables: +; CHECK4-NEXT: {{^}}Jump Tables: ; CHECK4-NEXT: jt#0: ; CHECK4-NOT: jt#1: -; CHECK8-NOT: Jump Tables: +; CHECK8-NOT: {{^}}Jump Tables: bb1: tail call void @ext(i32 0) br label %return bb2: tail call void @ext(i32 2) br label %return @@ -61,7 +61,7 @@ i32 8, label %bb8 ] ; CHECK-LABEL: function jt8: -; CHECK-NEXT: Jump Tables: +; CHECK-NEXT: {{^}}Jump Tables: ; CHECK-NEXT: jt#0: ; CHECK-NOT: jt#1: