Index: lib/Target/X86/X86.td
===================================================================
--- lib/Target/X86/X86.td
+++ lib/Target/X86/X86.td
@@ -276,12 +276,28 @@
                                FeatureSSE1, FeatureFXSR]>;
 def : Proc<"pentium3m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
                                FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>;
-def : Proc<"pentium-m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
-def : Proc<"pentium4",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureSSE2, FeatureFXSR]>;
-def : Proc<"pentium4m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
+
+// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
+// The intent is to enable it for pentium4 which is the current default
+// processor in a vanilla 32-bit clang compilation when no specific
+// architecture is specified.  This generally gives a nice performance
+// increase on silvermont, with largely neutral behavior on other
+// contemporary large core processors.
+// pentium-m, pentium4m, prescott and nocona are included as a preventative
+// measure to avoid performance surprises, in case clang's default cpu
+// changes slightly.
+
+def : ProcessorModel<"pentium-m", GenericPostRAModel,
+                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
+                      FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
+
+def : ProcessorModel<"pentium4", GenericPostRAModel,
+                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
+                      FeatureSSE2, FeatureFXSR]>;
+
+def : ProcessorModel<"pentium4m", GenericPostRAModel,
+                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
+                      FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
 
 // Intel Quark.
 def : Proc<"lakemont",        []>;
@@ -292,10 +308,10 @@
                       FeatureFXSR, FeatureSlowBTMem]>;
 
 // NetBurst.
-def : Proc<"prescott",
-           [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-            FeatureFXSR, FeatureSlowBTMem]>;
-def : Proc<"nocona", [
+def : ProcessorModel<"prescott", GenericPostRAModel,
+                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
+                      FeatureFXSR, FeatureSlowBTMem]>;
+def : ProcessorModel<"nocona", GenericPostRAModel, [
   FeatureX87,
   FeatureSlowUAMem16,
   FeatureMMX,
Index: lib/Target/X86/X86Schedule.td
===================================================================
--- lib/Target/X86/X86Schedule.td
+++ lib/Target/X86/X86Schedule.td
@@ -633,8 +633,9 @@
 // latencies. Since these latencies are not used for pipeline hazards,
 // they do not need to be exact.
 //
-// The GenericModel contains no instruction itineraries.
-def GenericModel : SchedMachineModel {
+// The GenericX86Model contains no instruction itineraries
+// and disables PostRAScheduler.
+class GenericX86Model : SchedMachineModel {
   let IssueWidth = 4;
   let MicroOpBufferSize = 32;
   let LoadLatency = 4;
@@ -643,6 +644,13 @@
   let CompleteModel = 0;
 }
 
+def GenericModel : GenericX86Model;
+
+// Define a model with the PostRAScheduler enabled.
+def GenericPostRAModel : GenericX86Model {
+  let PostRAScheduler = 1;
+}
+
 include "X86ScheduleAtom.td"
 include "X86SchedSandyBridge.td"
 include "X86SchedHaswell.td"
Index: test/CodeGen/X86/machine-cp.ll
===================================================================
--- test/CodeGen/X86/machine-cp.ll
+++ test/CodeGen/X86/machine-cp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona -verify-machineinstrs -post-RA-scheduler=false < %s | FileCheck %s
 
 ; After tail duplication, two copies in an early exit BB can be cancelled out.
 ; rdar://10640363
Index: test/CodeGen/X86/misched-ilp.ll
===================================================================
--- test/CodeGen/X86/misched-ilp.ll
+++ test/CodeGen/X86/misched-ilp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmax -post-RA-scheduler=false | FileCheck -check-prefix=MAX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmin -post-RA-scheduler=false | FileCheck -check-prefix=MIN %s
 ;
 ; Basic verification of the ScheduleDAGILP metric.
 ;
Index: test/CodeGen/X86/post-ra-sched.ll
===================================================================
--- test/CodeGen/X86/post-ra-sched.ll
+++ test/CodeGen/X86/post-ra-sched.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium-m | FileCheck %s
+; RUN: llc < %s -mtriple=i386 -mcpu=prescott | FileCheck %s
+; RUN: llc < %s -mtriple=i386 -mcpu=nocona | FileCheck %s
+;
+; Verify that scheduling puts some distance between a load feeding into
+; the address of another load, and that second load.  This currently
+; happens during the post-RA-scheduler, which should be enabled by
+; default with the above specified cpus.
+
+@ptrs = external global [0 x i32*], align 4
+@idxa = common global i32 0, align 4
+@idxb = common global i32 0, align 4
+@res = common global i32 0, align 4
+
+define void @addindirect() {
+; CHECK-LABEL: addindirect:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl idxb, %ecx
+; CHECK-NEXT:    movl idxa, %eax
+; CHECK-NEXT:    movl ptrs(,%ecx,4), %ecx
+; CHECK-NEXT:    movl ptrs(,%eax,4), %eax
+; CHECK-NEXT:    movl (%ecx), %ecx
+; CHECK-NEXT:    addl (%eax), %ecx
+; CHECK-NEXT:    movl %ecx, res
+; CHECK-NEXT:    retl
+entry:
+  %0 = load i32, i32* @idxa, align 4
+  %arrayidx = getelementptr inbounds [0 x i32*], [0 x i32*]* @ptrs, i32 0, i32 %0
+  %1 = load i32*, i32** %arrayidx, align 4
+  %2 = load i32, i32* %1, align 4
+  %3 = load i32, i32* @idxb, align 4
+  %arrayidx1 = getelementptr inbounds [0 x i32*], [0 x i32*]* @ptrs, i32 0, i32 %3
+  %4 = load i32*, i32** %arrayidx1, align 4
+  %5 = load i32, i32* %4, align 4
+  %add = add i32 %5, %2
+  store i32 %add, i32* @res, align 4
+  ret void
+}
Index: test/CodeGen/X86/pr16360.ll
===================================================================
--- test/CodeGen/X86/pr16360.ll
+++ test/CodeGen/X86/pr16360.ll
@@ -5,9 +5,9 @@
 ; CHECK-LABEL: foo:
 ; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
 ; CHECK-NEXT:    shrl $2, %eax
 ; CHECK-NEXT:    orl $-67108864, %eax # imm = 0xFFFFFFFFFC000000
-; CHECK-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
 ; CHECK-NEXT:    retl
 entry:
   %conv = sext i32 %sum to i64
Index: test/CodeGen/X86/sse2.ll
===================================================================
--- test/CodeGen/X86/sse2.ll
+++ test/CodeGen/X86/sse2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; Tests for SSE2 and below, without SSE3+.
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
@@ -5,8 +6,8 @@
 define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 ; CHECK-LABEL: test1:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movapd (%ecx), %xmm0
 ; CHECK-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
 ; CHECK-NEXT:    movapd %xmm0, (%eax)
@@ -21,8 +22,8 @@
 define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 ; CHECK-LABEL: test2:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movapd (%ecx), %xmm0
 ; CHECK-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
 ; CHECK-NEXT:    movapd %xmm0, (%eax)
@@ -38,9 +39,9 @@
 define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK-LABEL: test3:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movaps (%edx), %xmm0
 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
 ; CHECK-NEXT:    movaps %xmm0, (%eax)
@@ -75,9 +76,9 @@
 ; CHECK-LABEL: test5:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    pxor %xmm0, %xmm0
 ; CHECK-NEXT:    movl (%eax), %eax
 ; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT:    pxor %xmm0, %xmm0
 ; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; CHECK-NEXT:    retl
@@ -99,8 +100,8 @@
 define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
 ; CHECK-LABEL: test6:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movaps (%ecx), %xmm0
 ; CHECK-NEXT:    movaps %xmm0, (%eax)
 ; CHECK-NEXT:    retl
@@ -181,8 +182,8 @@
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movapd 0, %xmm0
 ; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; CHECK-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
 ; CHECK-NEXT:    xorpd %xmm2, %xmm2
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
 ; CHECK-NEXT:    addps %xmm1, %xmm0
 ; CHECK-NEXT:    movaps %xmm0, 0
@@ -198,9 +199,9 @@
 define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 ; CHECK-LABEL: test13:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movaps (%edx), %xmm0
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1]
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
@@ -218,11 +219,11 @@
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movaps (%ecx), %xmm1
 ; CHECK-NEXT:    movaps (%eax), %xmm2
+; CHECK-NEXT:    movaps (%ecx), %xmm1
 ; CHECK-NEXT:    movaps %xmm2, %xmm0
-; CHECK-NEXT:    addps %xmm1, %xmm0
 ; CHECK-NEXT:    subps %xmm1, %xmm2
+; CHECK-NEXT:    addps %xmm1, %xmm0
 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; CHECK-NEXT:    retl
   %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=2]
@@ -236,8 +237,8 @@
 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
 ; CHECK-LABEL: test15:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movapd (%ecx), %xmm0
 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
 ; CHECK-NEXT:    retl
@@ -316,9 +317,9 @@
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pmuludq %xmm1, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
 ; CHECK-NEXT:    pmuludq %xmm2, %xmm1
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; CHECK-NEXT:    retl