Index: llvm/lib/Target/ARM/ARM.td =================================================================== --- llvm/lib/Target/ARM/ARM.td +++ llvm/lib/Target/ARM/ARM.td @@ -407,6 +407,11 @@ "equivalent when the immediate does " "not fit in the encoding.">; +def FeatureDontRestrictIT : SubtargetFeature<"unrestricted-it-blocks", + "DontRestrictIT", "true", + "Don't restrict V8 IT blocks to" + " a single instruction.">; + // Use the MachineScheduler for instruction scheduling for the subtarget. def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", "Use the MachineScheduler">; @@ -1147,7 +1152,8 @@ def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75, FeatureHWDivThumb, FeatureHWDivARM, - FeatureDotProd]>; + FeatureDotProd, + FeatureDontRestrictIT]>; def : ProcNoItin<"cortex-a76", [ARMv82a, ProcA76, FeatureHWDivThumb, @@ -1155,7 +1161,8 @@ FeatureCrypto, FeatureCRC, FeatureFullFP16, - FeatureDotProd]>; + FeatureDotProd, + FeatureDontRestrictIT]>; def : ProcNoItin<"cortex-a76ae", [ARMv82a, ProcA76, FeatureHWDivThumb, @@ -1163,14 +1170,16 @@ FeatureCrypto, FeatureCRC, FeatureFullFP16, - FeatureDotProd]>; + FeatureDotProd, + FeatureDontRestrictIT]>; def : ProcNoItin<"neoverse-n1", [ARMv82a, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, - FeatureDotProd]>; + FeatureDotProd, + FeatureDontRestrictIT]>; def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureHasRetAddrStack, Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -669,10 +669,9 @@ if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) return false; - if (AFI->isThumb2Function()) { - if (getSubtarget().restrictIT()) - return isV8EligibleForIT(&MI); - } + const ARMSubtarget &ST = getSubtarget(); + if (AFI->isThumb2Function() && (ST.hasV8Ops() || ST.restrictIT())) + return isV8EligibleForIT(&MI, ST.restrictIT()); return true; } Index: llvm/lib/Target/ARM/ARMFeatures.h =================================================================== --- llvm/lib/Target/ARM/ARMFeatures.h +++ llvm/lib/Target/ARM/ARMFeatures.h @@ -21,10 +21,11 @@ bool IsCPSRDead(const InstrType *Instr); template // could be MachineInstr or MCInst -inline bool isV8EligibleForIT(const InstrType *Instr) { +inline bool isV8EligibleForIT(const InstrType *Instr, + bool Restricted = true) { switch (Instr->getOpcode()) { default: - return false; + return !Restricted; case ARM::tADC: case ARM::tADDi3: case ARM::tADDi8: Index: llvm/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/lib/Target/ARM/ARMSubtarget.h +++ llvm/lib/Target/ARM/ARMSubtarget.h @@ -414,9 +414,18 @@ /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). bool StrictAlign = false; - /// RestrictIT - If true, the subtarget disallows generation of deprecated IT - /// blocks to conform to ARMv8 rule. - bool RestrictIT = false; + /// ForceRestrictIT - If true, the subtarget disallows generation of + /// deprecated IT blocks to conform to ARMv8 rule. + bool ForceRestrictIT = false; + + /// ForceNoRestrictIT - If true, the subtarget allows generation of + /// deprecated IT blocks, ignoring conformation to ARMv8 rule. + bool ForceNoRestrictIT = false; + + /// DontRestrictIT - If true, ignore the armv8 limitation of a single + /// Thumb1 instruction within an IT block, possibly allowing multiple + /// Thumb2 instructions too. + bool DontRestrictIT = false; /// HasDSP - If true, the subtarget supports the DSP (saturating arith /// and such) instructions. @@ -789,7 +798,13 @@ bool allowsUnalignedMem() const { return !StrictAlign; } - bool restrictIT() const { return RestrictIT; } + bool restrictIT() const { + if (ForceRestrictIT) + return true; + if (ForceNoRestrictIT) + return false; + return HasV8Ops && !DontRestrictIT; + } const std::string & getCPUString() const { return CPUString; } Index: llvm/lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- llvm/lib/Target/ARM/ARMSubtarget.cpp +++ llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -237,13 +237,12 @@ switch (IT) { case DefaultIT: - RestrictIT = hasV8Ops(); break; case RestrictedIT: - RestrictIT = true; + ForceRestrictIT = true; break; case NoRestrictedIT: - RestrictIT = false; + ForceNoRestrictIT = true; break; } Index: llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp =================================================================== --- llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -44,7 +44,7 @@ public: static char ID; - bool restrictIT; + bool AllowMultipleInsts = true; const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; @@ -227,8 +227,8 @@ unsigned Mask = 0, Pos = 3; // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it - // is set: skip the loop - if (!restrictIT) { + // is set or a core is optimised for them: skip the loop + if (AllowMultipleInsts) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && @@ -290,7 +290,7 @@ AFI = Fn.getInfo(); TII = static_cast(STI.getInstrInfo()); TRI = STI.getRegisterInfo(); - restrictIT = STI.restrictIT(); + AllowMultipleInsts = !STI.restrictIT(); if (!AFI->isThumbFunction()) return false; Index: llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll =================================================================== --- llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -2,6 +2,11 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s ; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it | FileCheck %s ; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a75 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a75 -enable-tail-merge=0 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a76 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a76 -enable-tail-merge=0 | FileCheck %s + define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: t1: ; CHECK: ittt ne Index: llvm/test/CodeGen/Thumb2/v8_deprecate_IT.ll =================================================================== --- llvm/test/CodeGen/Thumb2/v8_deprecate_IT.ll +++ llvm/test/CodeGen/Thumb2/v8_deprecate_IT.ll @@ -2,6 +2,12 @@ ; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it -o - | llvm-mc -triple thumbv7 --show-encoding 2>&1 | FileCheck %s --check-prefix=V7_RESTRICT_IT ; RUN: llc < %s -mtriple=thumbv8 -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8 ; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8_NO_RESTRICT_IT +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a75 -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8_NO_RESTRICT_IT +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a76 -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8_NO_RESTRICT_IT +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a76ae -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8_NO_RESTRICT_IT +; RUN: llc < %s -mtriple=thumbv8 -mcpu=neoverse-n1 -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8_NO_RESTRICT_IT +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a76 -arm-no-restrict-it -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8_NO_RESTRICT_IT +; RUN: llc < %s -mtriple=thumbv8 -mcpu=cortex-a76 -arm-restrict-it -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8 ; V7-NOT: warning