diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -415,10 +415,6 @@
     "DisablePostRAScheduler", "true",
     "Don't schedule again after register allocation">;
 
-// Enable use of alias analysis during code generation
-def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
-                                    "Use alias analysis during codegen">;
-
 // Armv8.5-A extensions
 
 def FeatureSB       : SubtargetFeature<"sb", "HasSB", "true",
@@ -584,7 +580,6 @@
                                    "Samsung Exynos processors",
                                    [FeatureZCZeroing,
                                     FeatureUseWideStrideVFP,
-                                    FeatureUseAA,
                                     FeatureSplatVFPToNeon,
                                     FeatureSlowVGETLNi32,
                                     FeatureSlowVDUP32,
@@ -1067,13 +1062,11 @@
                                                          ProcM3,
                                                          FeaturePrefLoopAlign32,
                                                          FeatureUseMISched,
-                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcessorModel<"sc300",       CortexM4Model,      [ARMv7m,
                                                          ProcM3,
                                                          FeatureUseMISched,
-                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcessorModel<"cortex-m4", CortexM4Model,        [ARMv7em,
@@ -1081,7 +1074,6 @@
                                                          FeaturePrefLoopAlign32,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureUseMISched,
-                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcNoItin<"cortex-m7",                           [ARMv7em,
@@ -1096,7 +1088,6 @@
                                                          FeaturePrefLoopAlign32,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureUseMISched,
-                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcessorModel<"cortex-m35p", CortexM4Model,      [ARMv8mMainline,
@@ -1105,7 +1096,6 @@
                                                          FeaturePrefLoopAlign32,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureUseMISched,
-                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
 
@@ -1213,8 +1203,7 @@
 
 def : ProcessorModel<"cortex-r52", CortexR52Model,      [ARMv8r, ProcR52,
                                                          FeatureUseMISched,
-                                                         FeatureFPAO,
-                                                         FeatureUseAA]>;
+                                                         FeatureFPAO]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -223,9 +223,6 @@
   /// register allocation.
   bool DisablePostRAScheduler = false;
 
-  /// UseAA - True if using AA during codegen (DAGCombine, MISched, etc)
-  bool UseAA = false;
-
   /// HasThumb2 - True if Thumb2 instructions are supported.
   bool HasThumb2 = false;
 
@@ -811,7 +808,7 @@
 
   /// Enable use of alias analysis during code generation (during MI
   /// scheduling, DAGCombine, etc.).
-  bool useAA() const override { return UseAA; }
+  bool useAA() const override { return true; }
 
   // enableAtomicExpand- True if we need to expand our atomics.
   bool enableAtomicExpand() const override;
diff --git a/llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll b/llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll
--- a/llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll
+++ b/llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll
@@ -36,10 +36,10 @@
 ; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
 ; CHECKV6-NEXT: ldm{{(\.w)?}} [[LB]]!,
 ; CHECKV6-NEXT: stm{{(\.w)?}} [[SB]]!,
-; CHECKV6-NEXT: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
-; CHECKV6-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
-; CHECKV6-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
-; CHECKV6-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
+; CHECKV6-DAG: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
+; CHECKV6-DAG: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
+; CHECKV6-DAG: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
+; CHECKV6-DAG: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
 ; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
 ; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
 ; CHECKV7: ldr{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #11]
diff --git a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
--- a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
+++ b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -57,14 +57,14 @@
 
 ; Epilogue
 ; --------
-; CHECK-V4T:         ldr [[POP:r[4567]]], [sp, #16]
+; CHECK-V4T:         ldr [[POP:r[4567]]], [sp, #12]
 ; CHECK-V4T-NEXT:    mov lr, [[POP]]
 ; CHECK-V4T-NEXT:    pop {[[SAVED]]}
 ; CHECK-V4T-NEXT:    add sp, #16
 ; CHECK-V4T-NEXT:    bx  lr
 ; CHECK-V5T:         lsls r4
 ; CHECK-V5T-NEXT:    mov sp, r4
-; CHECK-V5T:         ldr [[POP:r[4567]]], [sp, #16]
+; CHECK-V5T:         ldr [[POP:r[4567]]], [sp, #12]
 ; CHECK-V5T-NEXT:    mov lr, [[POP]]
 ; CHECK-V5T-NEXT:    pop {[[SAVED]]}
 ; CHECK-V5T-NEXT:    add sp, #16
diff --git a/llvm/test/CodeGen/ARM/useaa.ll b/llvm/test/CodeGen/ARM/useaa.ll
--- a/llvm/test/CodeGen/ARM/useaa.ll
+++ b/llvm/test/CodeGen/ARM/useaa.ll
@@ -7,9 +7,9 @@
 
 ; CHECK-LABEL: test
 ; GENERIC: ldr
-; GENERIC: str
 ; GENERIC: ldr
 ; GENERIC: str
+; GENERIC: str
 ; USEAA: ldr
 ; USEAA: ldr
 ; USEAA: str
diff --git a/llvm/test/CodeGen/ARM/va_arg.ll b/llvm/test/CodeGen/ARM/va_arg.ll
--- a/llvm/test/CodeGen/ARM/va_arg.ll
+++ b/llvm/test/CodeGen/ARM/va_arg.ll
@@ -1,13 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s
 ; Test that we correctly align elements when using va_arg
 
-; CHECK-LABEL: test1:
-; CHECK-NOT: bfc
-; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
-; CHECK: bic	{{(r[0-9]+)|(lr)}}, [[REG]], #7
-; CHECK-NOT: bic
-
 define i64 @test1(i32 %i, ...) nounwind optsize {
+; CHECK-LABEL: test1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #12
+; CHECK-NEXT:    sub sp, sp, #12
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, sp, #4
+; CHECK-NEXT:    add r0, sp, #4
+; CHECK-NEXT:    stmib sp, {r1, r2, r3}
+; CHECK-NEXT:    add r0, r0, #7
+; CHECK-NEXT:    bic r1, r0, #7
+; CHECK-NEXT:    orr r2, r1, #4
+; CHECK-NEXT:    str r2, [sp]
+; CHECK-NEXT:    ldr r0, [r1]
+; CHECK-NEXT:    add r2, r2, #4
+; CHECK-NEXT:    str r2, [sp]
+; CHECK-NEXT:    ldr r1, [r1, #4]
+; CHECK-NEXT:    add sp, sp, #4
+; CHECK-NEXT:    add sp, sp, #12
+; CHECK-NEXT:    bx lr
 entry:
   %g = alloca i8*, align 4
   %g1 = bitcast i8** %g to i8*
@@ -17,14 +31,25 @@
   ret i64 %0
 }
 
-; CHECK-LABEL: test2:
-; CHECK-NOT: bfc
-; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
-; CHECK: bic	{{(r[0-9]+)|(lr)}}, [[REG]], #7
-; CHECK-NOT:	bic
-; CHECK: bx	lr
-
 define double @test2(i32 %a, i32* %b, ...) nounwind optsize {
+; CHECK-LABEL: test2:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, sp, #8
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, sp, #4
+; CHECK-NEXT:    add r0, sp, #4
+; CHECK-NEXT:    stmib sp, {r2, r3}
+; CHECK-NEXT:    add r0, r0, #11
+; CHECK-NEXT:    bic r0, r0, #3
+; CHECK-NEXT:    str r2, [r1]
+; CHECK-NEXT:    add r1, r0, #8
+; CHECK-NEXT:    str r1, [sp]
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    add sp, sp, #4
+; CHECK-NEXT:    add sp, sp, #8
+; CHECK-NEXT:    bx lr
 entry:
   %ap = alloca i8*, align 4                       ; <i8**> [#uses=3]
   %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=2]