Index: llvm/lib/Target/ARM/A15SDOptimizer.cpp =================================================================== --- llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -660,8 +660,9 @@ const ARMSubtarget &STI = Fn.getSubtarget(); // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be // enabled when NEON is available. - if (!(STI.isCortexA15() && STI.hasNEON())) + if (!(STI.useStrictVFPWidth() && STI.hasNEON())) return false; + TII = STI.getInstrInfo(); TRI = STI.getRegisterInfo(); MRI = &Fn.getRegInfo(); Index: llvm/lib/Target/ARM/ARM.td =================================================================== --- llvm/lib/Target/ARM/ARM.td +++ llvm/lib/Target/ARM/ARM.td @@ -195,6 +195,13 @@ "DontWidenVMOVS", "true", "Don't widen VMOVS to VMOVD">; +// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different +// VFP register widths. +def FeatureStrictVFPWidth : SubtargetFeature<"strict-vfp-sd", + "StrictVFPWidth", "true", + "Use VFP register width strictly", + [FeatureDontWidenVMOVS]>; + // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", @@ -819,6 +826,7 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureDontWidenVMOVS, + FeatureStrictVFPWidth, FeatureHasRetAddrStack, FeatureMuxedUnits, FeatureTrustZone, @@ -1006,24 +1014,28 @@ FeatureNoPostRASched]>; def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, + FeatureStrictVFPWidth, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, + FeatureStrictVFPWidth, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, + FeatureStrictVFPWidth, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m4", [ARMv8a, ProcExynosM1, + FeatureStrictVFPWidth, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, Index: llvm/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/lib/Target/ARM/ARMSubtarget.h +++ llvm/lib/Target/ARM/ARMSubtarget.h @@ -352,9 +352,12 @@ /// If true, the AGU and NEON/FPU units are multiplexed. bool HasMuxedUnits = false; - /// If true, VMOVS will never be widened to VMOVD + /// If true, VMOVS will never be widened to VMOVD. bool DontWidenVMOVS = false; + /// If true, use VFP register width strictly. + bool StrictVFPWidth = false; + /// If true, run the MLx expansion pass. bool ExpandMLx = false; @@ -591,6 +594,7 @@ bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; } bool hasMuxedUnits() const { return HasMuxedUnits; } bool dontWidenVMOVS() const { return DontWidenVMOVS; } + bool useStrictVFPWidth() const { return StrictVFPWidth; } bool useNEONForFPMovs() const { return UseNEONForFPMovs; } bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; } bool nonpipelinedVFP() const { return NonpipelinedVFP; }