diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1570,6 +1570,17 @@ VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const; /// @} + /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb + /// state. + /// + /// Used by the LowerTypeTests pass, which constructs an IR inline assembler + /// node containing a jump table in a format suitable for the target, so it + /// needs to know what format of jump table it can legally use. + /// + /// For non-Arm targets, this function isn't used. It defaults to returning + /// false, but it shouldn't matter what it returns anyway. + bool hasArmWideBranch(bool Thumb) const; + /// @} private: @@ -1927,6 +1938,7 @@ Align Alignment) const = 0; virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; + virtual bool hasArmWideBranch(bool Thumb) const = 0; }; template @@ -2606,6 +2618,10 @@ getVPLegalizationStrategy(const VPIntrinsic &PI) const override { return Impl.getVPLegalizationStrategy(PI); } + + bool hasArmWideBranch(bool Thumb) const override { + return Impl.hasArmWideBranch(Thumb); + } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -862,6 +862,8 @@ /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); } + bool hasArmWideBranch(bool) const { return false; } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1170,6 +1170,10 @@ return TTIImpl->getVPLegalizationStrategy(VPI); } +bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const { + return TTIImpl->hasArmWideBranch(Thumb); +} + bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -325,6 +325,9 @@ return true; } + + bool hasArmWideBranch(bool Thumb) const; + /// @} }; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2441,3 +2441,16 @@ } return -1; } + +bool ARMTTIImpl::hasArmWideBranch(bool Thumb) const { + if (Thumb) { + // B.W is available in any Thumb2-supporting target, and also in every + // version of Armv8-M, even Baseline which does not include the rest of + // Thumb2. + return ST->isThumb2() || ST->hasV8MBaselineOps(); + } else { + // B is available in all versions of the Arm ISA, so the only question is + // whether that ISA is available at all. + return ST->hasARMOps(); + } +} diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" @@ -406,6 +407,15 @@ Triple::OSType OS; Triple::ObjectFormatType ObjectFormat; + // Determines which kind of Thumb jump table we generate. If arch is + // either 'arm' or 'thumb' we need to find this out, because + // selectJumpTableArmEncoding may decide to use Thumb in either case. + bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false; + + // The jump table type we ended up deciding on. (Usually the same as + // Arch, except that 'arm' and 'thumb' are often interchangeable.) + Triple::ArchType JumpTableArch = Triple::UnknownArch; + IntegerType *Int1Ty = Type::getInt1Ty(M.getContext()); IntegerType *Int8Ty = Type::getInt8Ty(M.getContext()); PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); @@ -481,6 +491,8 @@ void buildBitSetsFromGlobalVariables(ArrayRef TypeIds, ArrayRef Globals); + Triple::ArchType + selectJumpTableArmEncoding(ArrayRef Functions); unsigned getJumpTableEntrySize(); Type *getJumpTableEntryType(); void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS, @@ -518,7 +530,8 @@ void replaceDirectCalls(Value *Old, Value *New); public: - LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary, + LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM, + ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary, bool DropTypeTests); @@ -526,7 +539,7 @@ // Lower the module using the action and summary passed as command line // arguments. For testing purposes only. - static bool runForTesting(Module &M); + static bool runForTesting(Module &M, ModuleAnalysisManager &AM); }; } // end anonymous namespace @@ -1182,31 +1195,36 @@ static const unsigned kX86IBTJumpTableEntrySize = 16; static const unsigned kARMJumpTableEntrySize = 4; static const unsigned kARMBTIJumpTableEntrySize = 8; +static const unsigned kARMv6MJumpTableEntrySize = 16; static const unsigned kRISCVJumpTableEntrySize = 8; unsigned LowerTypeTestsModule::getJumpTableEntrySize() { - switch (Arch) { - case Triple::x86: - case Triple::x86_64: - if (const auto *MD = mdconst::extract_or_null( + switch (JumpTableArch) { + case Triple::x86: + case Triple::x86_64: + if (const auto *MD = mdconst::extract_or_null( M.getModuleFlag("cf-protection-branch"))) - if (MD->getZExtValue()) - return kX86IBTJumpTableEntrySize; - return kX86JumpTableEntrySize; - case Triple::arm: - case Triple::thumb: + if (MD->getZExtValue()) + return kX86IBTJumpTableEntrySize; + return kX86JumpTableEntrySize; + case Triple::arm: + return kARMJumpTableEntrySize; + case Triple::thumb: + if (CanUseThumbBWJumpTable) return kARMJumpTableEntrySize; - case Triple::aarch64: - if (const auto *BTE = mdconst::extract_or_null( + else + return kARMv6MJumpTableEntrySize; + case Triple::aarch64: + if (const auto *BTE = mdconst::extract_or_null( M.getModuleFlag("branch-target-enforcement"))) - if (BTE->getZExtValue()) - return kARMBTIJumpTableEntrySize; - return kARMJumpTableEntrySize; - case Triple::riscv32: - case Triple::riscv64: - return kRISCVJumpTableEntrySize; - default: - report_fatal_error("Unsupported architecture for jump tables"); + if (BTE->getZExtValue()) + return kARMBTIJumpTableEntrySize; + return kARMJumpTableEntrySize; + case Triple::riscv32: + case Triple::riscv64: + return kRISCVJumpTableEntrySize; + default: + report_fatal_error("Unsupported architecture for jump tables"); } } @@ -1240,7 +1258,32 @@ AsmOS << "bti c\n"; AsmOS << "b $" << ArgIndex << "\n"; } else if (JumpTableArch == Triple::thumb) { - AsmOS << "b.w $" << ArgIndex << "\n"; + if (!CanUseThumbBWJumpTable) { + // In Armv6-M, this sequence will generate a branch without corrupting + // any registers. We use two stack words; in the second, we construct the + // address we'll pop into pc, and the first is used to save and restore + // r0 which we use as a temporary register. + // + // To support position-independent use cases, the offset of the target + // function is stored as a relative offset (which will expand into an + // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other + // object file types), and added to pc after we load it. (The alternative + // B.W is automatically pc-relative.) + // + // There are five 16-bit Thumb instructions here, so the .balign 4 adds a + // sixth halfword of padding, and then the offset consumes a further 4 + // bytes, for a total of 16, which is very convenient since entries in + // this jump table need to have power-of-two size. + AsmOS << "push {r0,r1}\n" + << "ldr r0, 1f\n" + << "0: add r0, r0, pc\n" + << "str r0, [sp, #4]\n" + << "pop {r0,pc}\n" + << ".balign 4\n" + << "1: .word $" << ArgIndex << " - (0b + 4)\n"; + } else { + AsmOS << "b.w $" << ArgIndex << "\n"; + } } else if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) { AsmOS << "tail $" << ArgIndex << "@plt\n"; @@ -1352,12 +1395,19 @@ // Each jump table must be either ARM or Thumb as a whole for the bit-test math // to work. Pick one that matches the majority of members to minimize interop // veneers inserted by the linker. -static Triple::ArchType -selectJumpTableArmEncoding(ArrayRef Functions, - Triple::ArchType ModuleArch) { - if (ModuleArch != Triple::arm && ModuleArch != Triple::thumb) - return ModuleArch; +Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding( + ArrayRef Functions) { + if (Arch != Triple::arm && Arch != Triple::thumb) + return Arch; + + if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) { + // In architectures that provide Arm and Thumb-1 but not Thumb-2, + // we should always prefer the Arm jump table format, because the + // Thumb-1 one is larger and slower. + return Triple::arm; + } + // Otherwise, go with majority vote. unsigned ArmCount = 0, ThumbCount = 0; for (const auto GTM : Functions) { if (!GTM->isJumpTableCanonical()) { @@ -1368,7 +1418,7 @@ } Function *F = cast(GTM->getGlobal()); - ++(isThumbFunction(F, ModuleArch) ? ThumbCount : ArmCount); + ++(isThumbFunction(F, Arch) ? ThumbCount : ArmCount); } return ArmCount > ThumbCount ? Triple::arm : Triple::thumb; @@ -1381,8 +1431,6 @@ SmallVector AsmArgs; AsmArgs.reserve(Functions.size() * 2); - Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions, Arch); - for (GlobalTypeMember *GTM : Functions) createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs, cast(GTM->getGlobal())); @@ -1399,9 +1447,11 @@ F->addFnAttr("target-features", "-thumb-mode"); if (JumpTableArch == Triple::thumb) { F->addFnAttr("target-features", "+thumb-mode"); - // Thumb jump table assembly needs Thumb2. The following attribute is added - // by Clang for -march=armv7. - F->addFnAttr("target-cpu", "cortex-a8"); + if (CanUseThumbBWJumpTable) { + // Thumb jump table assembly needs Thumb2. The following attribute is + // added by Clang for -march=armv7. + F->addFnAttr("target-cpu", "cortex-a8"); + } } // When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI // for the function to avoid double BTI. This is a no-op without @@ -1521,6 +1571,10 @@ // FIXME: find a better way to represent the jumptable in the IR. assert(!Functions.empty()); + // Decide on the jump table encoding, so that we know how big the + // entries will be. + JumpTableArch = selectJumpTableArmEncoding(Functions); + // Build a simple layout based on the regular layout of jump tables. DenseMap GlobalLayout; unsigned EntrySize = getJumpTableEntrySize(); @@ -1706,18 +1760,31 @@ /// Lower all type tests in this module. LowerTypeTestsModule::LowerTypeTestsModule( - Module &M, ModuleSummaryIndex *ExportSummary, + Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary), DropTypeTests(DropTypeTests || ClDropTypeTests) { assert(!(ExportSummary && ImportSummary)); Triple TargetTriple(M.getTargetTriple()); Arch = TargetTriple.getArch(); + if (Arch == Triple::arm) + CanUseArmJumpTable = true; + if (Arch == Triple::arm || Arch == Triple::thumb) { + auto &FAM = + AM.getResult(M).getManager(); + for (Function &F : M) { + auto &TTI = FAM.getResult(F); + if (TTI.hasArmWideBranch(false)) + CanUseArmJumpTable = true; + if (TTI.hasArmWideBranch(true)) + CanUseThumbBWJumpTable = true; + } + } OS = TargetTriple.getOS(); ObjectFormat = TargetTriple.getObjectFormat(); } -bool LowerTypeTestsModule::runForTesting(Module &M) { +bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) { ModuleSummaryIndex Summary(/*HaveGVs=*/false); // Handle the command-line summary arguments. This code is for testing @@ -1735,7 +1802,8 @@ bool Changed = LowerTypeTestsModule( - M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, + M, AM, + ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr, /*DropTypeTests*/ false) .lower(); @@ -2298,10 +2366,10 @@ ModuleAnalysisManager &AM) { bool Changed; if (UseCommandLine) - Changed = LowerTypeTestsModule::runForTesting(M); + Changed = LowerTypeTestsModule::runForTesting(M, AM); else Changed = - LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) + LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests) .lower(); if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll b/llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll --- a/llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll +++ b/llvm/test/Transforms/LowerTypeTests/function-arm-thumb.ll @@ -2,7 +2,7 @@ target datalayout = "e-p:64:64" -define void @f1() "target-features"="+thumb-mode" !type !0 { +define void @f1() "target-features"="+thumb-mode,+v6t2" !type !0 { ret void } diff --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll --- a/llvm/test/Transforms/LowerTypeTests/function.ll +++ b/llvm/test/Transforms/LowerTypeTests/function.ll @@ -3,7 +3,10 @@ ; RUN: opt -S -passes=lowertypetests -mtriple=i686-pc-win32 %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE %s ; RUN: opt -S -passes=lowertypetests -mtriple=x86_64-pc-win32 %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE %s ; RUN: opt -S -passes=lowertypetests -mtriple=arm-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s -; RUN: opt -S -passes=lowertypetests -mtriple=thumb-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE %s +; RUN: opt -S -passes=lowertypetests -mtriple=thumbv7m-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE %s +; RUN: opt -S -passes=lowertypetests -mtriple=thumbv8m.base-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMB,NATIVE %s +; RUN: opt -S -passes=lowertypetests -mtriple=thumbv6m-unknown-linux-gnu %s | FileCheck --check-prefixes=THUMBV6M,NATIVE %s +; RUN: opt -S -passes=lowertypetests -mtriple=thumbv5-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s ; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=ARM,NATIVE %s ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s @@ -25,6 +28,7 @@ ; X86: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) ; ARM: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1) ; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1) +; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1) ; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) ; NATIVE: define hidden void @f.cfi() @@ -53,9 +57,10 @@ ; X86-LINUX: define private void @[[JT]]() #[[ATTR:.*]] align 8 { ; X86-WIN32: define private void @[[JT]]() #[[ATTR:.*]] align 8 { -; ARM: define private void @[[JT]]() #[[ATTR:.*]] align 4 { -; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 { -; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 { +; ARM: define private void @[[JT]]() #[[ATTR:.*]] align 4 { +; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 { +; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 { +; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 { ; X86: jmp ${0:c}@plt ; X86-SAME: int3 @@ -72,6 +77,21 @@ ; THUMB: b.w $0 ; THUMB-SAME: b.w $1 +; THUMBV6M: push {r0,r1} +; THUMBV6M-SAME: ldr r0, 1f +; THUMBV6M-SAME: 0: add r0, r0, pc +; THUMBV6M-SAME: str r0, [sp, #4] +; THUMBV6M-SAME: pop {r0,pc} +; THUMBV6M-SAME: .balign 4 +; THUMBV6M-SAME: 1: .word $0 - (0b + 4) +; THUMBV6M-SAME: push {r0,r1} +; THUMBV6M-SAME: ldr r0, 1f +; THUMBV6M-SAME: 0: add r0, r0, pc +; THUMBV6M-SAME: str r0, [sp, #4] +; THUMBV6M-SAME: pop {r0,pc} +; THUMBV6M-SAME: .balign 4 +; THUMBV6M-SAME: 1: .word $1 - (0b + 4) + ; RISCV: tail $0@plt ; RISCV-SAME: tail $1@plt @@ -81,6 +101,7 @@ ; X86-WIN32: attributes #[[ATTR]] = { nocf_check nounwind } ; ARM: attributes #[[ATTR]] = { naked nounwind ; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" } +; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" } ; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" } ; WASM32: ![[I0]] = !{i64 1}