Index: lib/Target/ARM/ARM.td =================================================================== --- lib/Target/ARM/ARM.td +++ lib/Target/ARM/ARM.td @@ -106,6 +106,37 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; +// Whether or not it may be profitable to unpredicate certain instructions +// during if conversion. +def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", + "IsProfitableToUnpredicate", + "true", + "Is profitable to unpredicate">; + +// Some targets (e.g. Swift) have microcoded VGETLNi32. +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; + +// Some targets (e.g. Swift) have microcoded VDUP32. +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", + "Has slow VDUP32 - prefer VMOV">; + +// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON +// for scalar FP, as this allows more effective execution domain optimization. +def FeaturePreferVMOVSR : SubtargetFeature<"pref-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; + +// Swift has ISHST barriers compatible with Atomic Release semantics but weaker +// than ISH +def FeaturePrefISHSTBarrier : SubtargetFeature<"pref-ishst", "PreferISHST", + "true", "Prefer ISHST barriers">; + +// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from +// VFP to NEON. +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", + "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">; + // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. @@ -533,6 +564,8 @@ FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR, + FeaturePreferVMOVSR, + FeatureNEONForFPMovs, FeatureMP]>; // FIXME: A12 has currently the same Schedule model as A9 @@ -596,7 +629,11 @@ FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx]>; + FeatureHasSlowFPVMLx, + FeatureProfUnpredicate, + FeaturePrefISHSTBarrier, + FeatureSlowVGETLNi32, + FeatureSlowVDUP32]>; // FIXME: R4 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, Index: lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseInstrInfo.cpp +++ lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1766,9 +1766,9 @@ bool ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const { - // Reduce false anti-dependencies to let Swift's out-of-order execution + // Reduce false anti-dependencies to let the target's out-of-order execution // engine do its thing. - return Subtarget.isSwift(); + return Subtarget.isProfitableToUnpredicate(); } /// getInstrPredicate - If instruction is predicated, returns its predicate @@ -4178,7 +4178,7 @@ // CortexA9 is particularly picky about mixing the two and wants these // converted. - if (Subtarget.isCortexA9() && !isPredicated(*MI) && + if (Subtarget.useNEONForFPMovs() && !isPredicated(*MI) && (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -3032,7 +3032,8 @@ if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; - } else if (Subtarget->isSwift() && Ord == AtomicOrdering::Release) { + } else if (Subtarget->preferISHSTBarriers() && + Ord == AtomicOrdering::Release) { // Swift happens to implement ISHST barriers in a way that's compatible with // Release semantics but weaker than ISH so we'd be fools not to use // it. Beware: other processors probably don't! @@ -12166,7 +12167,7 @@ /*FALLTHROUGH*/ case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: - if (Subtarget->isSwift()) + if (Subtarget->preferISHSTBarriers()) return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -320,19 +320,14 @@ " Subtarget->hasVFP4()) || " "Subtarget->isTargetDarwin()">; -// VGETLNi32 is microcoded on Swift - prefer VMOV. -def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">; -def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">; - -// VDUP.32 is microcoded on Swift - prefer VMOV. -def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">; -def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; - -// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as -// this allows more effective execution domain optimization. See -// setExecutionDomain(). -def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; -def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; +def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">; +def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">; + +def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">; +def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">; + +def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() || !Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() && Subtarget->useNEONForSinglePrecisionFP()">; def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -218,6 +218,24 @@ /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; + /// If true, if conversion may decide to leave some instructions unpredicated. + bool IsProfitableToUnpredicate; + + /// If true, VMOV will be favored over VGETLNi32. + bool HasSlowVGETLNi32; + + /// If true, VMOV will be favored over VDUP. + bool HasSlowVDUP32; + + /// If true, VMOVSR will be favored over VMOVDRR. + bool PreferVMOVSR; + + /// If true, ISHST barriers will be used for Release semantics. + bool PreferISHST; + + /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. + bool UseNEONForFPMovs; + /// StrictAlign - If true, the subtarget disallows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). @@ -376,6 +394,12 @@ bool hasTrustZone() const { return HasTrustZone; } bool has8MSecExt() const { return Has8MSecExt; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } + bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } + bool hasSlowVDUP32() const { return HasSlowVDUP32; } + bool preferVMOVSR() const { return PreferVMOVSR; } + bool preferISHSTBarriers() const { return PreferISHST; } + bool useNEONForFPMovs() const { return UseNEONForFPMovs; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }