Index: llvm/lib/Target/ARM/A15SDOptimizer.cpp
===================================================================
--- llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -660,8 +660,9 @@
   const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
   // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
   // enabled when NEON is available.
-  if (!(STI.isCortexA15() && STI.hasNEON()))
+  if (!(STI.useStrictVFPWidth() && STI.hasNEON()))
     return false;
+
   TII = STI.getInstrInfo();
   TRI = STI.getRegisterInfo();
   MRI = &Fn.getRegInfo();
Index: llvm/lib/Target/ARM/ARM.td
===================================================================
--- llvm/lib/Target/ARM/ARM.td
+++ llvm/lib/Target/ARM/ARM.td
@@ -195,6 +195,13 @@
                                              "DontWidenVMOVS", "true",
                                              "Don't widen VMOVS to VMOVD">;
 
+// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different
+// VFP register widths.
+def FeatureStrictVFPWidth : SubtargetFeature<"strict-vfp-sd",
+                                             "StrictVFPWidth", "true",
+                                             "Use VFP register width strictly",
+                                             [FeatureDontWidenVMOVS]>;
+
 // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
 def FeatureExpandMLx      : SubtargetFeature<"expand-fp-mlx",
                                              "ExpandMLx", "true",
@@ -819,6 +826,7 @@
 
 def : ProcessorModel<"cortex-a15",  CortexA9Model,      [ARMv7a, ProcA15,
                                                          FeatureDontWidenVMOVS,
+                                                         FeatureStrictVFPWidth,
                                                          FeatureHasRetAddrStack,
                                                          FeatureMuxedUnits,
                                                          FeatureTrustZone,
@@ -1006,24 +1014,28 @@
                                                          FeatureNoPostRASched]>;
 
 def : ProcNoItin<"exynos-m1",                           [ARMv8a, ProcExynosM1,
+                                                         FeatureStrictVFPWidth,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
                                                          FeatureCrypto,
                                                          FeatureCRC]>;
 
 def : ProcNoItin<"exynos-m2",                           [ARMv8a, ProcExynosM1,
+                                                         FeatureStrictVFPWidth,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
                                                          FeatureCrypto,
                                                          FeatureCRC]>;
 
 def : ProcNoItin<"exynos-m3",                           [ARMv8a, ProcExynosM1,
+                                                         FeatureStrictVFPWidth,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
                                                          FeatureCrypto,
                                                          FeatureCRC]>;
 
 def : ProcNoItin<"exynos-m4",                           [ARMv8a, ProcExynosM1,
+                                                         FeatureStrictVFPWidth,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
                                                          FeatureCrypto,
Index: llvm/lib/Target/ARM/ARMSubtarget.h
===================================================================
--- llvm/lib/Target/ARM/ARMSubtarget.h
+++ llvm/lib/Target/ARM/ARMSubtarget.h
@@ -352,9 +352,12 @@
   /// If true, the AGU and NEON/FPU units are multiplexed.
   bool HasMuxedUnits = false;
 
-  /// If true, VMOVS will never be widened to VMOVD
+  /// If true, VMOVS will never be widened to VMOVD.
   bool DontWidenVMOVS = false;
 
+  /// If true, use VFP register width strictly.
+  bool StrictVFPWidth = false;
+
   /// If true, run the MLx expansion pass.
   bool ExpandMLx = false;
 
@@ -591,6 +594,7 @@
   bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
   bool hasMuxedUnits() const { return HasMuxedUnits; }
   bool dontWidenVMOVS() const { return DontWidenVMOVS; }
+  bool useStrictVFPWidth() const { return StrictVFPWidth; }
   bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
   bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
   bool nonpipelinedVFP() const { return NonpipelinedVFP; }