Index: lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseInstrInfo.cpp +++ lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -569,7 +569,7 @@ MI->getParent()->getParent()->getInfo(); if (AFI->isThumb2Function()) { - if (getSubtarget().restrictIT()) + if (getSubtarget().restrictIT(*MI->getParent()->getParent()->getFunction())) return isV8EligibleForIT(MI); } else { // non-Thumb if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -195,10 +195,6 @@ /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). bool AllowsUnalignedMem; - /// RestrictIT - If true, the subtarget disallows generation of deprecated IT - /// blocks to conform to ARMv8 rule. - bool RestrictIT; - /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith /// and such) instructions in Thumb2 code. bool Thumb2DSP; @@ -417,7 +413,8 @@ bool allowsUnalignedMem() const { return AllowsUnalignedMem; } - bool restrictIT() const { return RestrictIT; } + /// Return true if generating deprecated IT blocks is not allowed. + bool restrictIT(const Function &F) const; const std::string & getCPUString() const { return CPUString; } Index: lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- lib/Target/ARM/ARMSubtarget.cpp +++ lib/Target/ARM/ARMSubtarget.cpp @@ -22,6 +22,7 @@ #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -251,18 +252,6 @@ if (isV6M()) AllowsUnalignedMem = false; - switch (IT) { - case DefaultIT: - RestrictIT = hasV8Ops(); - break; - case RestrictedIT: - RestrictIT = true; - break; - case NoRestrictedIT: - RestrictIT = false; - break; - } - // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. const FeatureBitset &Bits = getFeatureBits(); if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters @@ -328,6 +317,27 @@ return false; } +bool ARMSubtarget::restrictIT(const Function &F) const { + // The default behavior is to disallow generation of deprecated IT blocks + // if the target architecture is armv8. + if (IT.getNumOccurrences() == 0) { + auto Val = F.getFnAttribute("arm-restrict-it").getValueAsString(); + return StringSwitch(Val) + .Case("true", true) + .Case("false", false) + .Default(hasV8Ops()); + } + + switch (IT) { + case DefaultIT: + return hasV8Ops(); + case RestrictedIT: + return true; + case NoRestrictedIT: + return false; + } +} + unsigned ARMSubtarget::getMispredictionPenalty() const { return SchedModel.MispredictPenalty; } Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -388,7 +388,7 @@ if (getOptLevel() != CodeGenOpt::None) { // in v8, IfConversion depends on Thumb instruction widths addPass(createThumb2SizeReductionPass([this](const Function &F) { - return this->TM->getSubtarget(F).restrictIT(); + return this->TM->getSubtarget(F).restrictIT(F); })); addPass(createIfConverter([this](const Function &F) { Index: lib/Target/ARM/Thumb2ITBlockPass.cpp =================================================================== --- lib/Target/ARM/Thumb2ITBlockPass.cpp +++ lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -274,7 +274,7 @@ AFI = Fn.getInfo(); TII = static_cast(STI.getInstrInfo()); TRI = STI.getRegisterInfo(); - restrictIT = STI.restrictIT(); + restrictIT = STI.restrictIT(*Fn.getFunction()); if (!AFI->isThumbFunction()) return false; Index: test/CodeGen/Thumb2/restict-it-fnattr.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/restict-it-fnattr.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=thumbv7 -mcpu=cortex-a8 | FileCheck %s -check-prefix=NO-OPTION +; RUN: llc < %s -mtriple=thumbv7 -mcpu=cortex-a8 -arm-no-restrict-it | FileCheck %s -check-prefix=NO-RESTRICT-IT +; RUN: llc < %s -mtriple=thumbv7 -mcpu=cortex-a8 -arm-restrict-it | FileCheck %s -check-prefix=RESTRICT-IT + +; NO-OPTION-LABEL: {{\_?}}no_restrict_it: +; NO-OPTION: itt + +; NO-RESTRICT-IT-LABEL: {{\_?}}no_restrict_it: +; NO-RESTRICT-IT: itt + +; RESTRICT-IT-LABEL: {{\_?}}no_restrict_it: +; RESTRICT-IT: it{{ }} + +define i32 @no_restrict_it(i32 %a, i32 %b) #0 { +entry: + %cmp = icmp sgt i32 %a, 5 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %sub = add nsw i32 %a, -1 + %add = add nsw i32 %b, 2 + br label %if.end + +if.end: + %t0.0 = phi i32 [ %sub, %if.then ], [ %a, %entry ] + %t1.0 = phi i32 [ %add, %if.then ], [ %b, %entry ] + %mul = mul nsw i32 %t1.0, %t0.0 + ret i32 %mul +} + +; NO-OPTION-LABEL: {{\_?}}restrict_it: +; NO-OPTION: it{{ }} + +; NO-RESTRICT-IT-LABEL: {{\_?}}restrict_it: +; NO-RESTRICT-IT: itt + +; RESTRICT-IT-LABEL: {{\_?}}restrict_it: +; RESTRICT-IT: it{{ }} + +define i32 @restrict_it(i32 %a, i32 %b) #1 { +entry: + %cmp = icmp sgt i32 %a, 5 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %sub = add nsw i32 %a, -1 + %add = add nsw i32 %b, 2 + br label %if.end + +if.end: + %t0.0 = phi i32 [ %sub, %if.then ], [ %a, %entry ] + %t1.0 = phi i32 [ %add, %if.then ], [ %b, %entry ] + %mul = mul nsw i32 %t1.0, %t0.0 + ret i32 %mul +} + +attributes #0 = { "arm-restrict-it"="false" } +attributes #1 = { "arm-restrict-it"="true" }