Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1198,6 +1198,11 @@
   Level = AtLevel;
   LegalOperations = Level >= AfterLegalizeVectorOps;
   LegalTypes = Level >= AfterLegalizeTypes;
+  bool LegalizeOps = Level == AfterLegalizeDAG;
+  bool DoCombine = OptLevel != CodeGenOpt::None;
+
+  if (!DoCombine && !LegalizeOps)
+    return;
 
   // Add all the dag nodes to the worklist.
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
@@ -1233,7 +1238,7 @@
 
     // If this combine is running after legalizing the DAG, re-legalize any
     // nodes pulled off the worklist.
-    if (Level == AfterLegalizeDAG) {
+    if (LegalizeOps) {
       SmallSetVector<SDNode *, 16> UpdatedNodes;
       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
 
@@ -1245,52 +1250,54 @@
         continue;
     }
 
-    DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
-
-    // Add any operands of the new node which have not yet been combined to the
-    // worklist as well. Because the worklist uniques things already, this
-    // won't repeatedly process the same operand.
-    CombinedNodes.insert(N);
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      if (!CombinedNodes.count(N->getOperand(i).getNode()))
-        AddToWorklist(N->getOperand(i).getNode());
+    if (DoCombine) {
+      DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
 
-    SDValue RV = combine(N);
+      // Add any operands of the new node which have not yet been combined to the
+      // worklist as well. Because the worklist uniques things already, this
+      // won't repeatedly process the same operand.
+      CombinedNodes.insert(N);
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        if (!CombinedNodes.count(N->getOperand(i).getNode()))
+          AddToWorklist(N->getOperand(i).getNode());
 
-    if (!RV.getNode())
-      continue;
+      SDValue RV = combine(N);
 
-    ++NodesCombined;
+      if (!RV.getNode())
+        continue;
 
-    // If we get back the same node we passed in, rather than a new node or
-    // zero, we know that the node must have defined multiple values and
-    // CombineTo was used.  Since CombineTo takes care of the worklist
-    // mechanics for us, we have no work to do in this case.
-    if (RV.getNode() == N)
-      continue;
+      ++NodesCombined;
 
-    assert(N->getOpcode() != ISD::DELETED_NODE &&
-           RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
-           "Node was deleted but visit returned new node!");
+      // If we get back the same node we passed in, rather than a new node or
+      // zero, we know that the node must have defined multiple values and
+      // CombineTo was used.  Since CombineTo takes care of the worklist
+      // mechanics for us, we have no work to do in this case.
+      if (RV.getNode() == N)
+        continue;
 
-    DEBUG(dbgs() << " ... into: ";
-          RV.getNode()->dump(&DAG));
+      assert(N->getOpcode() != ISD::DELETED_NODE &&
+        RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+        "Node was deleted but visit returned new node!");
+
+      DEBUG(dbgs() << " ... into: ";
+      RV.getNode()->dump(&DAG));
+
+      // Transfer debug value.
+      DAG.TransferDbgValues(SDValue(N, 0), RV);
+      if (N->getNumValues() == RV.getNode()->getNumValues())
+        DAG.ReplaceAllUsesWith(N, RV.getNode());
+      else {
+        assert(N->getValueType(0) == RV.getValueType() &&
+          N->getNumValues() == 1 && "Type mismatch");
+        SDValue OpV = RV;
+        DAG.ReplaceAllUsesWith(N, &OpV);
+      }
 
-    // Transfer debug value.
-    DAG.TransferDbgValues(SDValue(N, 0), RV);
-    if (N->getNumValues() == RV.getNode()->getNumValues())
-      DAG.ReplaceAllUsesWith(N, RV.getNode());
-    else {
-      assert(N->getValueType(0) == RV.getValueType() &&
-             N->getNumValues() == 1 && "Type mismatch");
-      SDValue OpV = RV;
-      DAG.ReplaceAllUsesWith(N, &OpV);
+      // Push the new node and any users onto the worklist
+      AddToWorklist(RV.getNode());
+      AddUsersToWorklist(RV.getNode());
     }
 
-    // Push the new node and any users onto the worklist
-    AddToWorklist(RV.getNode());
-    AddUsersToWorklist(RV.getNode());
-
     // Finally, if the node is now dead, remove it from the graph.  The node
     // may not be dead if the replacement process recursively simplified to
     // something else needing this node. This will also take care of adding any
Index: lib/Target/ARM/ARMISelDAGToDAG.cpp
===================================================================
--- lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2565,14 +2565,9 @@
         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-        if (Subtarget->isThumb()) {
-          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
-          return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
-        } else {
-          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
-                            Reg0 };
-          return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
-        }
+        unsigned Opc = (Subtarget->isThumb() ? ARM::t2ADDrs : ARM::ADDrsi);
+        SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
       }
       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
         unsigned ShImm = Log2_32(RHSV+1);
@@ -2582,14 +2577,9 @@
         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-        if (Subtarget->isThumb()) {
-          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
-          return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
-        } else {
-          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
-                            Reg0 };
-          return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
-        }
+        unsigned Opc = (Subtarget->isThumb() ? ARM::t2RSBrs : ARM::RSBrsi);
+        SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
       }
     }
     break;
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -984,6 +984,7 @@
     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFMA(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue
       LowerFormalArguments(SDValue Chain,
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -694,7 +694,7 @@
     setOperationAction(ISD::FCOS, VT, Expand);
     setOperationAction(ISD::FSINCOS, VT, Expand);
     setOperationAction(ISD::FREM, VT, Expand);
-    setOperationAction(ISD::FMA,  VT, Expand);
+    setOperationAction(ISD::FMA, VT, Expand);
     setOperationAction(ISD::FPOWI, VT, Expand);
     setOperationAction(ISD::FSQRT, VT, Expand);
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
@@ -1114,12 +1114,12 @@
     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
 
     if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
-      setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
-      setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
-      setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
-      setOperationAction(ISD::FMA,             MVT::v2f64, Legal);
-      setOperationAction(ISD::FMA,             MVT::f32, Legal);
-      setOperationAction(ISD::FMA,             MVT::f64, Legal);
+      setOperationAction(ISD::FMA,             MVT::v8f32, Custom);
+      setOperationAction(ISD::FMA,             MVT::v4f64, Custom);
+      setOperationAction(ISD::FMA,             MVT::v4f32, Custom);
+      setOperationAction(ISD::FMA,             MVT::v2f64, Custom);
+      setOperationAction(ISD::FMA,             MVT::f32, Custom);
+      setOperationAction(ISD::FMA,             MVT::f64, Custom);
     }
 
     if (Subtarget->hasInt256()) {
@@ -1285,8 +1285,8 @@
     setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);
     setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);
     setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
-    setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
-    setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
+    setOperationAction(ISD::FMA,                MVT::v8f64, Custom);
+    setOperationAction(ISD::FMA,                MVT::v16f32, Custom);
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::i32, Legal);
@@ -1588,7 +1588,6 @@
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::FADD);
   setTargetDAGCombine(ISD::FSUB);
-  setTargetDAGCombine(ISD::FMA);
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::MLOAD);
@@ -12358,6 +12357,14 @@
       if (User->getOpcode() == ISD::FNEG)
         return Op;
 
+  // If this is a FNEG and it has an FMA user, bail out to lower the combination
+  // into an X86ISD::FMSUB or X86ISD::FNMSUB.
+  bool IsFNEG = (Op.getOpcode() == ISD::FNEG);
+  if (IsFNEG)
+    for (SDNode *User : Op->uses())
+      if (User->getOpcode() == ISD::FMA || User->getOpcode() == ISD::FMAD)
+        return Op;
+
   SDValue Op0 = Op.getOperand(0);
   bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
 
@@ -17606,6 +17613,45 @@
   return NOOP;
 }
 
+SDValue X86TargetLowering::LowerFMA(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  EVT VT = Op.getNode()->getValueType(0);
+
+  // Let legalize expand this if it isn't a legal type yet.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  EVT ScalarVT = VT.getScalarType();
+  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
+    (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
+    return SDValue();
+
+  SDValue A = Op.getOperand(0);
+  SDValue B = Op.getOperand(1);
+  SDValue C = Op.getOperand(2);
+
+  bool NegA = (A.getOpcode() == ISD::FNEG);
+  bool NegB = (B.getOpcode() == ISD::FNEG);
+  bool NegC = (C.getOpcode() == ISD::FNEG);
+
+  // Negative multiplication when NegA xor NegB
+  bool NegMul = (NegA != NegB);
+  if (NegA)
+    A = A.getOperand(0);
+  if (NegB)
+    B = B.getOperand(0);
+  if (NegC)
+    C = C.getOperand(0);
+
+  unsigned Opcode;
+  if (!NegMul)
+    Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
+  else
+    Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
+
+  return DAG.getNode(Opcode, dl, VT, A, B, C);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -17700,6 +17746,7 @@
   case ISD::GC_TRANSITION_START:
                                 return LowerGC_TRANSITION_START(Op, DAG);
   case ISD::GC_TRANSITION_END:  return LowerGC_TRANSITION_END(Op, DAG);
+  case ISD::FMA:                return LowerFMA(Op, DAG);
   }
 }
 
@@ -23828,46 +23875,6 @@
   return SDValue();
 }
 
-static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
-                                 const X86Subtarget* Subtarget) {
-  SDLoc dl(N);
-  EVT VT = N->getValueType(0);
-
-  // Let legalize expand this if it isn't a legal type yet.
-  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
-    return SDValue();
-
-  EVT ScalarVT = VT.getScalarType();
-  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
-      (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
-    return SDValue();
-
-  SDValue A = N->getOperand(0);
-  SDValue B = N->getOperand(1);
-  SDValue C = N->getOperand(2);
-
-  bool NegA = (A.getOpcode() == ISD::FNEG);
-  bool NegB = (B.getOpcode() == ISD::FNEG);
-  bool NegC = (C.getOpcode() == ISD::FNEG);
-
-  // Negative multiplication when NegA xor NegB
-  bool NegMul = (NegA != NegB);
-  if (NegA)
-    A = A.getOperand(0);
-  if (NegB)
-    B = B.getOperand(0);
-  if (NegC)
-    C = C.getOperand(0);
-
-  unsigned Opcode;
-  if (!NegMul)
-    Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
-  else
-    Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
-
-  return DAG.getNode(Opcode, dl, VT, A, B, C);
-}
-
 static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   const X86Subtarget *Subtarget) {
@@ -24479,7 +24486,6 @@
   case X86ISD::VPERMILPI:
   case X86ISD::VPERM2X128:
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
-  case ISD::FMA:            return PerformFMACombine(N, DAG, Subtarget);
   case ISD::INTRINSIC_WO_CHAIN:
     return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
   case X86ISD::INSERTPS: {
Index: test/CodeGen/AArch64/aarch64_f16_be.ll
===================================================================
--- test/CodeGen/AArch64/aarch64_f16_be.ll
+++ test/CodeGen/AArch64/aarch64_f16_be.ll
@@ -32,7 +32,11 @@
 ; CHECK-NOT: st1
 
 ; CHECK-BE-LABEL: test_bitcast_v8f16_to_fp128:
-; CHECK-BE: st1
+; CHECK-BE: rev64
+; CHECK-BE: ext
+; CHECK-BE: rev64
+; CHECK-BE: ext
+; CHECK-BE: str
 
   %x = alloca fp128, align 16
   %y = bitcast <8 x half> %a to fp128
@@ -58,7 +62,9 @@
 ; CHECK-NOT: st1
 
 ; CHECK-BE-LABEL: test_bitcast_v4f16_to_v1f64:
-; CHECK-BE: st1
+; CHECK-BE: rev64
+; CHECK-BE: rev64
+; CHECK-BE: str
 
   %x = alloca <1 x double>, align 8
   %y = bitcast <4 x half> %a to <1 x double>
Index: test/CodeGen/AArch64/and-mask-removal.ll
===================================================================
--- test/CodeGen/AArch64/and-mask-removal.ll
+++ test/CodeGen/AArch64/and-mask-removal.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel=false -mtriple=arm64-apple-darwin  < %s  | FileCheck %s
+; RUN: llc -O1 -fast-isel=false -mtriple=arm64-apple-darwin  < %s  | FileCheck %s
 
 @board = common global [400 x i8] zeroinitializer, align 1
 @next_string = common global i32 0, align 4
Index: test/CodeGen/ARM/Windows/alloca.ll
===================================================================
--- test/CodeGen/ARM/Windows/alloca.ll
+++ test/CodeGen/ARM/Windows/alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
+; RUN: llc -O1 -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
 
 declare arm_aapcs_vfpcc i32 @num_entries()
 
Index: test/CodeGen/ARM/alloc-no-stack-realign.ll
===================================================================
--- test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=NO-REALIGN
-; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=REALIGN
+; RUN: llc < %s -mtriple=armv7-apple-ios -O1 | FileCheck %s -check-prefix=NO-REALIGN
+; RUN: llc < %s -mtriple=armv7-apple-ios -O1 | FileCheck %s -check-prefix=REALIGN
 
 ; rdar://12713765
 ; When realign-stack is set to false, make sure we are not creating stack
@@ -11,25 +11,27 @@
 ; NO-REALIGN-LABEL: test1
 ; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
 ; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
-; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
+
+; NO-REALIGN: add r[[R9:[0-9]+]], r[[R1]], #32
+; NO-REALIGN: add r[[R1]], r[[R1]], #48
+; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R9]]:128]
 ; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
 
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
+; NO-REALIGN: add r[[R1:[0-9]+]], r[[R3:[0-9]+]], #48
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; NO-REALIGN: add r[[R1:[0-9]+]], r[[R3:[0-9]+]], #32
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]!
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]
 
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; NO-REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
 ; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
+
  %retval = alloca <16 x float>, align 16
  %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
@@ -44,19 +46,19 @@
 ; REALIGN: bfc sp, #0, #6
 ; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
 ; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
-; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
+; REALIGN: add r[[R9:[0-9]+]], r[[R1]], #32
+; REALIGN: add r[[R1:[0-9]+]], r[[R1]], #48
+; REALIGN: mov r[[R3:[0-9]+]], sp
+; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R9]]:128]
 ; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
 
-
-; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; REALIGN: orr r[[R1:[0-9]+]], r[[R3:[0-9]+]], #48
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; REALIGN: orr r[[R1:[0-9]+]], r[[R3]], #32
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]
+; REALIGN: orr r[[R1:[0-9]+]], r[[R3]], #16
 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
 
 ; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48
Index: test/CodeGen/ARM/big-endian-ret-f64.ll
===================================================================
--- test/CodeGen/ARM/big-endian-ret-f64.ll
+++ test/CodeGen/ARM/big-endian-ret-f64.ll
@@ -3,8 +3,8 @@
 
 define double @fn() {
 ; CHECK-LABEL: fn
-; CHECK: ldr r0, [sp]
-; CHECK: ldr r1, [sp, #4]
+; CHECK: vldr [[REG:d[0-9]+]], [sp]
+; CHECK: vmov r1, r0, [[REG]]
   %r = alloca double, align 8
   %1 = load double, double* %r, align 8
   ret double %1
Index: test/CodeGen/ARM/vst3.ll
===================================================================
--- test/CodeGen/ARM/vst3.ll
+++ test/CodeGen/ARM/vst3.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon -fast-isel=0 -O0 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -fast-isel=0 -O1 %s -o - | FileCheck %s
 
 define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst3i8:
Index: test/CodeGen/X86/atomic16.ll
===================================================================
--- test/CodeGen/X86/atomic16.ll
+++ test/CodeGen/X86/atomic16.ll
@@ -70,10 +70,10 @@
 ; X32:       lock
 ; X32:       andw $3
   %t2 = atomicrmw and  i16* @sc16, i16 5 acquire
-; X64:       andl
+; X64:       andw
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       andl
+; X32:       andw
 ; X32:       lock
 ; X32:       cmpxchgw
   %t3 = atomicrmw and  i16* @sc16, i16 %t2 acquire
@@ -95,10 +95,10 @@
 ; X32:       lock
 ; X32:       orw $3
   %t2 = atomicrmw or   i16* @sc16, i16 5 acquire
-; X64:       orl
+; X64:       orw
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       orl
+; X32:       orw
 ; X32:       lock
 ; X32:       cmpxchgw
   %t3 = atomicrmw or   i16* @sc16, i16 %t2 acquire
@@ -120,10 +120,10 @@
 ; X32:       lock
 ; X32:       xorw $3
   %t2 = atomicrmw xor  i16* @sc16, i16 5 acquire
-; X64:       xorl
+; X64:       xorw
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       xorl
+; X32:       xorw
 ; X32:       lock
 ; X32:       cmpxchgw
   %t3 = atomicrmw xor  i16* @sc16, i16 %t2 acquire
@@ -140,12 +140,12 @@
 ; X64-LABEL:   atomic_fetch_nand16
 ; X32-LABEL:   atomic_fetch_nand16
   %t1 = atomicrmw nand i16* @sc16, i16 %x acquire
-; X64:       andl
-; X64:       notl
+; X64:       andw
+; X64:       notw
 ; X64:       lock
 ; X64:       cmpxchgw
-; X32:       andl
-; X32:       notl
+; X32:       andw
+; X32:       notw
 ; X32:       lock
 ; X32:       cmpxchgw
   ret void
Index: test/CodeGen/X86/atomic32.ll
===================================================================
--- test/CodeGen/X86/atomic32.ll
+++ test/CodeGen/X86/atomic32.ll
@@ -111,7 +111,9 @@
 ; WITH-CMOV:       cmpxchgl
 
 ; NOCMOV:    subl
-; NOCMOV:    jge
+; NOCMOV:    setg [[REG:%[a-z]+]]
+; NOCMOV:    testb $1, [[REG]]
+; NOCMOV:    jne
 ; NOCMOV:    lock
 ; NOCMOV:    cmpxchgl
   ret void
@@ -130,7 +132,9 @@
 ; WITH-CMOV:       cmpxchgl
 
 ; NOCMOV:    subl
-; NOCMOV:    jle
+; NOCMOV:    setle [[REG:%[a-z]+]]
+; NOCMOV:    testb $1, [[REG]]
+; NOCMOV:    jne
 ; NOCMOV:    lock
 ; NOCMOV:    cmpxchgl
   ret void
@@ -149,7 +153,9 @@
 ; WITH-CMOV:       cmpxchgl
 
 ; NOCMOV:    subl
-; NOCMOV:    ja
+; NOCMOV:    seta [[REG:%[a-z]+]]
+; NOCMOV:    testb $1, [[REG]]
+; NOCMOV:    jne
 ; NOCMOV:    lock
 ; NOCMOV:    cmpxchgl
   ret void
@@ -168,7 +174,9 @@
 ; WITH-CMOV:       cmpxchgl
 
 ; NOCMOV:    subl
-; NOCMOV:    jb
+; NOCMOV:    setbe [[REG:%[a-z]+]]
+; NOCMOV:    testb $1, [[REG]]
+; NOCMOV:    jne
 ; NOCMOV:    lock
 ; NOCMOV:    cmpxchgl
   ret void
Index: test/CodeGen/X86/atomic6432.ll
===================================================================
--- test/CodeGen/X86/atomic6432.ll
+++ test/CodeGen/X86/atomic6432.ll
@@ -32,18 +32,18 @@
 define void @atomic_fetch_sub64() nounwind {
 ; X32-LABEL:   atomic_fetch_sub64:
   %t1 = atomicrmw sub  i64* @sc64, i64 1 acquire
-; X32:       addl $-1
-; X32:       adcl $-1
+; X32:       subl $1
+; X32:       sbbl $0
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t2 = atomicrmw sub  i64* @sc64, i64 3 acquire
-; X32:       addl $-3
-; X32:       adcl $-1
+; X32:       subl $3
+; X32:       sbbl $0
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t3 = atomicrmw sub  i64* @sc64, i64 5 acquire
-; X32:       addl $-5
-; X32:       adcl $-1
+; X32:       subl $5
+; X32:       sbbl $0
 ; X32:       lock
 ; X32:       cmpxchg8b
   %t4 = atomicrmw sub  i64* @sc64, i64 %t3 acquire
Index: test/CodeGen/X86/dag-optnone.ll
===================================================================
--- test/CodeGen/X86/dag-optnone.ll
+++ test/CodeGen/X86/dag-optnone.ll
@@ -1,33 +1,18 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 -O0 -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -O0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=OPT
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -O1 -mattr=+avx | FileCheck %s --check-prefix=OPT
 
 ; Background:
-; If fast-isel bails out to normal selection, then the DAG combiner will run,
-; even at -O0. In principle this should not happen (those are optimizations,
-; and we said -O0) but as a practical matter there are some instruction
-; selection patterns that depend on the legalizations and transforms that the
-; DAG combiner does.
+; If fast-isel bails out to normal selection, then the DAG combiner should not
+; run at -O0.
 ;
 ; The 'optnone' attribute implicitly sets -O0 and fast-isel for the function.
-; The DAG combiner was disabled for 'optnone' (but not -O0) by r221168, then
-; re-enabled in r233153 because of problems with instruction selection patterns
-; mentioned above. (Note: because 'optnone' is supposed to match -O0, r221168
-; really should have disabled the combiner for both.)
 ;
-; If instruction selection eventually becomes smart enough to run without DAG
-; combiner, then the combiner can be turned off for -O0 (not just 'optnone')
-; and this test can go away. (To be replaced by a different test that verifies
-; the DAG combiner does *not* run at -O0 or for 'optnone' functions.)
-;
-; In the meantime, this test wants to make sure the combiner stays enabled for
-; 'optnone' functions, just as it is for -O0.
-
-
-; The test cases @foo[WithOptnone] prove that the same DAG combine happens
-; with -O0 and with 'optnone' set.  To prove this, we use a Windows triple to
+; The test cases @foo[WithOptnone] prove that no DAG combine happens with
+; -O0 and with 'optnone' set.  To prove this, we use a Windows triple to
 ; cause fast-isel to bail out (because something about the calling convention
 ; is not handled in fast-isel).  Then we have a repeated fadd that can be
-; combined into an fmul.  We show that this happens in both the non-optnone
-; function and the optnone function.
+; combined into an fmul.  We show that this does not happen in both the
+; non-optnone function and the optnone function.
 
 define float @foo(float %x) #0 {
 entry:
@@ -37,8 +22,9 @@
 }
 
 ; CHECK-LABEL: @foo
-; CHECK-NOT:   add
-; CHECK:       mul
+; CHECK-NOT:   mul
+; CHECK:       add
+; CHECK:       add
 ; CHECK-NEXT:  ret
 
 define float @fooWithOptnone(float %x) #1 {
@@ -48,10 +34,11 @@
   ret float %add1
 }
 
-; CHECK-LABEL: @fooWithOptnone
-; CHECK-NOT:   add
-; CHECK:       mul
-; CHECK-NEXT:  ret
+; OPT-LABEL: @fooWithOptnone
+; OPT-NOT:   mul
+; OPT:       add
+; OPT:       add
+; OPT-NEXT:  ret
 
 
 ; The test case @bar is derived from an instruction selection failure case
Index: test/CodeGen/X86/fast-isel-gep.ll
===================================================================
--- test/CodeGen/X86/fast-isel-gep.ll
+++ test/CodeGen/X86/fast-isel-gep.ll
@@ -67,7 +67,8 @@
   ret double %tmp2
 
 ; X32-LABEL: test4:
-; X32: 128(%e{{.*}},%e{{.*}},8)
+; X32: addl $16, [[REG:%e[a-z]+]]
+; X32: (%e{{.*}},[[REG]],8)
 ; X64-LABEL: test4:
 ; X64: 128(%r{{.*}},%r{{.*}},8)
 }
Index: test/CodeGen/X86/fma-no-dag-combine.ll
===================================================================
--- test/CodeGen/X86/fma-no-dag-combine.ll
+++ test/CodeGen/X86/fma-no-dag-combine.ll
@@ -0,0 +1,14 @@
+; Check if an fma intrinsic is properly lowered to a target specific fma instruction when
+; DAGCombine is disabled by -O0.
+
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -O0 -mattr=+fma | FileCheck %s
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float>  %b, <4 x float>  %c)
+
+; CHECK: test_fma_no_combine
+; CHECK: vfmadd213ps
+; CHECK: ret  
+define <4 x float> @test_fma_no_combine(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
Index: test/CodeGen/X86/inline-asm-tied.ll
===================================================================
--- test/CodeGen/X86/inline-asm-tied.ll
+++ test/CodeGen/X86/inline-asm-tied.ll
@@ -1,8 +1,9 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic -no-integrated-as | FileCheck %s
 ; rdar://6992609
 
-; CHECK: movl [[EDX:%e..]], 4(%esp)
-; CHECK: movl [[EDX]], 4(%esp)
+; CHECK: movl [[REG1:%e..]], 4(%esp)
+; CHECK: movl 4(%esp), [[REG2:%e..]]
+; CHECK: movl [[REG2]], 4(%esp)
 target triple = "i386-apple-darwin9.0"
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
Index: test/CodeGen/X86/musttail.ll
===================================================================
--- test/CodeGen/X86/musttail.ll
+++ test/CodeGen/X86/musttail.ll
@@ -45,7 +45,8 @@
 define i32 @t4({}* %fn, i32 %n, i32 %r) {
 ; CHECK-LABEL: t4:
 ; CHECK: incl %[[r:.*]]
-; CHECK: decl %[[n:.*]]
+; CHECK: {{decl|subl}}
+; CHECK-SAME: %[[n:.*]]
 ; CHECK: movl %[[r]], {{[0-9]+}}(%esp)
 ; CHECK: movl %[[n]], {{[0-9]+}}(%esp)
 ; CHECK: jmpl *%{{.*}}
@@ -69,7 +70,8 @@
 ; CHECK: movl %esp, %esi
 ; 	Modify the args.
 ; CHECK: incl %[[r:.*]]
-; CHECK: decl %[[n:.*]]
+; CHECK: {{decl|subl}}
+; CHECK-SAME: %[[n:.*]]
 ; 	Store them through ebp, since that's the only stable arg pointer.
 ; CHECK: movl %[[r]], {{[0-9]+}}(%ebp)
 ; CHECK: movl %[[n]], {{[0-9]+}}(%ebp)
Index: test/CodeGen/X86/switch.ll
===================================================================
--- test/CodeGen/X86/switch.ll
+++ test/CodeGen/X86/switch.ll
@@ -19,13 +19,21 @@
 ; Should be lowered as straight compares in -O0 mode.
 ; NOOPT-LABEL: basic
 ; NOOPT: subl $1, %eax
-; NOOPT: je
+; NOOPT: sete [[R1:%.+]]
+; NOOPT: testb $1, [[R1]]
+; NOOPT: jne
 ; NOOPT: subl $3, %eax
-; NOOPT: je
+; NOOPT: sete [[R1]]
+; NOOPT: testb $1, [[R1]]
+; NOOPT: jne
 ; NOOPT: subl $4, %eax
-; NOOPT: je
+; NOOPT: sete [[R1]]
+; NOOPT: testb $1, [[R1]]
+; NOOPT: jne
 ; NOOPT: subl $5, %eax
-; NOOPT: je
+; NOOPT: sete [[R1]]
+; NOOPT: testb $1, [[R1]]
+; NOOPT: jne
 
 ; Jump table otherwise.
 ; CHECK-LABEL: basic
@@ -62,11 +70,15 @@
 
 ; We do this even at -O0, because it's cheap and makes codegen faster.
 ; NOOPT-LABEL: simple_ranges
-; NOOPT: subl $4
-; NOOPT: jb
-; NOOPT: addl $-100
-; NOOPT: subl $4
-; NOOPT: jb
+; NOOPT: subl $3
+; NOOPT: setbe [[R1:%.+]]
+; NOOPT: testb $1, [[R1]]
+; NOOPT: jne
+; NOOPT: subl $100
+; NOOPT: subl $3
+; NOOPT: setbe [[R1]]
+; NOOPT: testb $1, [[R1]]
+; NOOPT: jne
 }
 
 
Index: test/CodeGen/X86/win32_sret.ll
===================================================================
--- test/CodeGen/X86/win32_sret.ll
+++ test/CodeGen/X86/win32_sret.ll
@@ -1,10 +1,10 @@
 ; We specify -mcpu explicitly to avoid instruction reordering that happens on
 ; some setups (e.g., Atom) from affecting the output.
-; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32 -check-prefix=WIN32-NO-O0
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
 ; RUN: llc < %s -mcpu=core2 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32 -check-prefix=WIN32-O0
 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
@@ -116,11 +116,11 @@
 ; LINUX-LABEL:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
 
 ; The address of the return structure is passed as an implicit parameter.
-; In the -O0 build, %eax is spilled at the beginning of the function, hence we
-; should match both 4(%esp) and 8(%esp).
-; WIN32:     {{[48]}}(%esp), %eax
-; WIN32:     movl $42, (%eax)
-; WIN32:     retl $4
+; WIN32-NO-O0: {{[48]}}(%esp), %eax
+; WIN32-O0:    subl ${{[0-9]+}}, %esp
+; WIN32-O0:    {{[0-9]+}}(%esp), %eax
+; WIN32:       movl $42, (%eax)
+; WIN32:       retl $4
 }
 
 define void @call_foo5() {
Index: test/CodeGen/X86/win64_eh.ll
===================================================================
--- test/CodeGen/X86/win64_eh.ll
+++ test/CodeGen/X86/win64_eh.ll
@@ -48,7 +48,7 @@
 
 
 ; Checks stack push
-define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable {
+define i32 @foo3(i32 %g_arg, i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable {
 entry:
   %a = alloca i32
   %b = alloca i32
@@ -56,12 +56,14 @@
   %d = alloca i32
   %e = alloca i32
   %f = alloca i32
+  %g = alloca i32
   store i32 %a_arg, i32* %a
   store i32 %b_arg, i32* %b
   store i32 %c_arg, i32* %c
   store i32 %d_arg, i32* %d
   store i32 %e_arg, i32* %e
   store i32 %f_arg, i32* %f
+  store i32 %g_arg, i32* %g
   %tmp = load i32, i32* %a
   %tmp1 = mul i32 %tmp, 2
   %tmp2 = load i32, i32* %b
@@ -71,7 +73,7 @@
   %tmp6 = mul i32 %tmp5, 5
   %tmp7 = add i32 %tmp4, %tmp6
   %tmp8 = load i32, i32* %d
-  %tmp9 = mul i32 %tmp8, 7
+  %tmp9 = mul i32 %tmp8, 9
   %tmp10 = add i32 %tmp7, %tmp9
   %tmp11 = load i32, i32* %e
   %tmp12 = mul i32 %tmp11, 11
@@ -85,11 +87,11 @@
 ; WIN64: .seh_proc foo3
 ; WIN64: pushq %rsi
 ; WIN64: .seh_pushreg 6
-; NORM:  subq $24, %rsp
-; ATOM:  leaq -24(%rsp), %rsp
-; WIN64: .seh_stackalloc 24
+; NORM:  subq $32, %rsp
+; ATOM:  leaq -32(%rsp), %rsp
+; WIN64: .seh_stackalloc 32
 ; WIN64: .seh_endprologue
-; WIN64: addq $24, %rsp
+; WIN64: addq $32, %rsp
 ; WIN64: popq %rsi
 ; WIN64: ret
 ; WIN64: .seh_endproc
Index: test/CodeGen/XCore/threads.ll
===================================================================
--- test/CodeGen/XCore/threads.ll
+++ test/CodeGen/XCore/threads.ll
@@ -123,7 +123,9 @@
 define void @phiNode2( i1 %bool) {
 ; N.B. check an extra 'Node_crit_edge' (LBB12_1) is inserted
 ; PHINODE-LABEL: phiNode2:
-; PHINODE: bf {{r[0-9]}}, .LBB12_3
+; PHINODE: mkmsk [[MASK_REG:r[0-9]+]], 1
+; PHINODE: xor [[REG:r[0-9]+]], [[REG]], [[MASK_REG]]
+; PHINODE: bt [[REG]], .LBB12_3
 ; PHINODE: bu .LBB12_1
 ; PHINODE-LABEL: .LBB12_1:
 ; PHINODE: get r11, id
Index: test/DebugInfo/ARM/line.test
===================================================================
--- test/DebugInfo/ARM/line.test
+++ test/DebugInfo/ARM/line.test
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-none-linux -O0 -filetype=asm < %S/../Inputs/line.ll | FileCheck %S/../Inputs/line.ll
+; RUN: llc -mtriple=arm-none-linux -O1 -filetype=asm < %S/../Inputs/line.ll | FileCheck %S/../Inputs/line.ll
 
 ; This is more complex than it looked. It's mixed up somewhere in SelectionDAG
 ; (legalized as br_cc, losing the separation between the comparison and the
Index: test/DebugInfo/X86/op_deref.ll
===================================================================
--- test/DebugInfo/X86/op_deref.ll
+++ test/DebugInfo/X86/op_deref.ll
@@ -20,8 +20,8 @@
 ; right now, so we check the asm output:
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
 ; vla should have a register-indirect address at one point.
-; ASM-CHECK: DEBUG_VALUE: vla <- RCX
-; ASM-CHECK: DW_OP_breg2
+; ASM-CHECK: DEBUG_VALUE: vla <- RDX
+; ASM-CHECK: DW_OP_breg1
 
 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT
 ; PRETTY-PRINT: DIExpression(DW_OP_deref, DW_OP_deref)
Index: test/DebugInfo/X86/vla.ll
===================================================================
--- test/DebugInfo/X86/vla.ll
+++ test/DebugInfo/X86/vla.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=asm %s -o - | FileCheck %s
 ; Ensure that we generate an indirect location for the variable length array a.
-; CHECK: ##DEBUG_VALUE: vla:a <- RDX
-; CHECK: DW_OP_breg1
+; CHECK: ##DEBUG_VALUE: vla:a <- RSI
+; CHECK: DW_OP_breg4
 ; rdar://problem/13658587
 ;
 ; generated from:
Index: test/tools/llvm-symbolizer/ppc64.test
===================================================================
--- test/tools/llvm-symbolizer/ppc64.test
+++ test/tools/llvm-symbolizer/ppc64.test
@@ -4,7 +4,7 @@
 int bar() { return foo(); }
 int _start() { return bar(); }
 
-RUN: %python -c "print('0x1000014c\n0x1000018c\n0x100001cc')" | llvm-symbolizer -obj=%p/Inputs/ppc64 | FileCheck %s
+RUN: "%python" -c "print('0x1000014c\n0x1000018c\n0x100001cc')" | llvm-symbolizer -obj=%p/Inputs/ppc64 | FileCheck %s
 
 CHECK: foo
 CHECK: bar