Index: lib/Transforms/Scalar/Reassociate.cpp
===================================================================
--- lib/Transforms/Scalar/Reassociate.cpp
+++ lib/Transforms/Scalar/Reassociate.cpp
@@ -19,7 +19,6 @@
 // than values not in loops.
 //
 //===----------------------------------------------------------------------===//
-
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -207,7 +206,7 @@
             I->getOpcode() == Instruction::And)) {
     Value *V0 = I->getOperand(0);
     Value *V1 = I->getOperand(1);
-    if (isa<ConstantInt>(V0))
+    if (isa<Constant>(V0))
       std::swap(V0, V1);
 
     if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) {
@@ -216,11 +215,21 @@
       isOr = (I->getOpcode() == Instruction::Or);
       return;
     }
+    if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V1))
+      if (Constant *Splat = C->getSplatValue())
+        if (ConstantInt *C = dyn_cast<ConstantInt>(Splat)) {
+          ConstPart = C->getValue();
+          SymbolicPart = V0;
+          isOr = (I->getOpcode() == Instruction::Or);
+          return;
+        }
   }
 
-  // view the operand as "V | 0"
+  // View the operand as "V | 0"
   SymbolicPart = V;
-  ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth());
+  Type *Ty = V->getType();
+  Ty = Ty->isVectorTy() ? Ty->getVectorElementType() : Ty;
+  ConstPart = APInt::getNullValue(Ty->getIntegerBitWidth());
   isOr = true;
 }
 
@@ -228,7 +237,7 @@
 INITIALIZE_PASS(Reassociate, "reassociate",
                 "Reassociate expressions", false, false)
 
-// Public interface to the Reassociate pass
+// Public interface to the Reassociate pass.
 FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
 
 /// isReassociableOp - Return true if V is an instruction of the specified
@@ -273,7 +282,7 @@
 void Reassociate::BuildRankMap(Function &F) {
   unsigned i = 2;
 
-  // Assign distinct ranks to function arguments
+  // Assign distinct ranks to function arguments.
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
     ValueRankMap[&*I] = ++i;
 
@@ -313,21 +322,17 @@
 
   // If this is a not or neg instruction, do not count it for rank.  This
   // assures us that X and ~X will have the same rank.
-  Type *Ty = V->getType();
-  if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) ||
-      (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
-       !BinaryOperator::isFNeg(I)))
+  if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
+      !BinaryOperator::isFNeg(I))
     ++Rank;
 
-  //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
-  //     << Rank << "\n");
-
+  DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
   return ValueRankMap[I] = Rank;
 }
 
 static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
                                  Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
     return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
   else {
     BinaryOperator *Res =
@@ -339,7 +344,7 @@
 
 static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
                                  Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
     return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
   else {
     BinaryOperator *Res =
@@ -351,7 +356,7 @@
 
 static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
                                  Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
     return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
   else {
     BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
@@ -361,11 +366,10 @@
 }
 
 /// LowerNegateToMultiply - Replace 0-X with X*-1.
-///
 static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
   Type *Ty = Neg->getType();
-  Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty)
-                                       : ConstantFP::get(Ty, -1.0);
+  Constant *NegOne = Ty->isIntOrIntVectorTy() ?
+    ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
 
   BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
   Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
@@ -846,7 +850,7 @@
       Constant *Undef = UndefValue::get(I->getType());
       NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
                                      Undef, Undef, "", I);
-      if (NewOp->getType()->isFloatingPointTy())
+      if (NewOp->getType()->isFPOrFPVectorTy())
         NewOp->setFastMathFlags(I->getFastMathFlags());
     } else {
       NewOp = NodesToRewrite.pop_back_val();
@@ -891,9 +895,17 @@
 /// version of the value is returned, and BI is left pointing at the instruction
 /// that should be processed next by the reassociation pass.
 static Value *NegateValue(Value *V, Instruction *BI) {
+  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V)) {
+    if (Constant *Splat = C->getSplatValue()) {
+      if (isa<ConstantFP>(Splat))
+        return ConstantExpr::getFNeg(C);
+      if (isa<ConstantInt>(Splat))
+        return ConstantExpr::getNeg(C);
+    }
+  }
   if (ConstantFP *C = dyn_cast<ConstantFP>(V))
     return ConstantExpr::getFNeg(C);
-  if (Constant *C = dyn_cast<Constant>(V))
+  if (Constant *C = dyn_cast<ConstantInt>(V))
     return ConstantExpr::getNeg(C);
 
   // We are trying to expose opportunity for reassociation.  One of the things
@@ -905,8 +917,7 @@
   // the constants.  We assume that instcombine will clean up the mess later if
   // we introduce tons of unnecessary negation instructions.
   //
-  if (BinaryOperator *I =
-          isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
+  if (BinaryOperator *I = isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
     // Push the negates through the add.
     I->setOperand(0, NegateValue(I->getOperand(0), BI));
     I->setOperand(1, NegateValue(I->getOperand(1), BI));
@@ -1012,7 +1023,11 @@
 /// by one, change this into a multiply by a constant to assist with further
 /// reassociation.
 static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
-  Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
+  Type *Ty = Shl->getType();
+  Type *EltTy = Ty->isVectorTy() ? Ty->getVectorElementType() : Ty;
+  Constant *MulCst = ConstantInt::get(EltTy, 1);
+  if (Ty->isVectorTy())
+    MulCst = ConstantDataVector::getSplat(Ty->getVectorNumElements(), MulCst);
   MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
 
   BinaryOperator *Mul =
@@ -1086,7 +1101,7 @@
       if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
         if (FC1->getValue() == -FC2->getValue()) {
           FoundFactor = NeedsNegate = true;
-          Factors.erase(Factors.begin()+i);
+          Factors.erase(Factors.begin() + i);
           break;
         }
     } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
@@ -1100,6 +1115,21 @@
           break;
         }
       }
+    } else if (ConstantDataVector *FFC1 = dyn_cast<ConstantDataVector>(Factor)) {
+      if (ConstantDataVector *FFC2 = dyn_cast<ConstantDataVector>(Factors[i].Op))
+        if (Constant *Splat1 = FFC1->getSplatValue())
+          if (Constant *Splat2 = FFC2->getSplatValue())
+            if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Splat1))
+              if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Splat2)) {
+                APFloat F1(FC1->getValueAPF());
+                APFloat F2(FC2->getValueAPF());
+                F2.changeSign();
+                if (F1.compare(F2) == APFloat::cmpEqual) {
+                  FoundFactor = NeedsNegate = true;
+                  Factors.erase(Factors.begin() + i);
+                  break;
+                }
+              }
     }
   }
 
@@ -1203,10 +1233,13 @@
                              const APInt &ConstOpnd) {
   if (ConstOpnd != 0) {
     if (!ConstOpnd.isAllOnesValue()) {
-      LLVMContext &Ctx = Opnd->getType()->getContext();
+      Type *Ty = Opnd->getType();
+      LLVMContext &Ctx = Ty->getContext();
       Instruction *I;
-      I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd),
-                                    "and.ra", InsertBefore);
+      Constant *C = ConstantInt::get(Ctx, ConstOpnd);
+      if (Ty->isVectorTy())
+        C = ConstantDataVector::getSplat(Ty->getVectorNumElements(), C);
+      I = BinaryOperator::CreateAnd(Opnd, C, "and.ra", InsertBefore);
       I->setDebugLoc(InsertBefore->getDebugLoc());
       return I;
     }
@@ -1343,6 +1376,7 @@
   SmallVector<XorOpnd, 8> Opnds;
   SmallVector<XorOpnd*, 8> OpndPtrs;
   Type *Ty = Ops[0].Op->getType();
+  Ty = Ty->isVectorTy() ? Ty->getVectorElementType() : Ty;
   APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
 
   // Step 1: Convert ValueEntry to XorOpnd
@@ -1421,8 +1455,11 @@
       ValueEntry VE(getRank(O.getValue()), O.getValue());
       Ops.push_back(VE);
     }
+    Type *ITy = I->getType();
     if (ConstOpnd != 0) {
-      Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+      Constant *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+      if (ITy->isVectorTy())
+        C = ConstantDataVector::getSplat(ITy->getVectorNumElements(), C);
       ValueEntry VE(getRank(C), C);
       Ops.push_back(VE);
     }
@@ -1431,7 +1468,10 @@
       return Ops.back().Op;
     else if (Sz == 0) {
       assert(ConstOpnd == 0);
-      return ConstantInt::get(Ty->getContext(), ConstOpnd);
+      Constant *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+      if (ITy->isVectorTy())
+        C = ConstantDataVector::getSplat(ITy->getVectorNumElements(), C);
+      return C;
     }
   }
 
@@ -1466,8 +1506,8 @@
 
       // Insert a new multiply.
       Type *Ty = TheOp->getType();
-      Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound)
-                                      : ConstantFP::get(Ty, NumFound);
+      Constant *C = Ty->isIntOrIntVectorTy() ?
+        ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
       Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
 
       // Now that we have inserted a multiply, optimize it. This allows us to
@@ -1543,8 +1583,7 @@
   unsigned MaxOcc = 0;
   Value *MaxOccVal = nullptr;
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    BinaryOperator *BOp =
-        isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
+    BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
     if (!BOp)
       continue;
 
@@ -1593,6 +1632,20 @@
             MaxOccVal = Factor;
           }
         }
+      } else if (ConstantDataVector *VC1 = dyn_cast<ConstantDataVector>(Factor)) {
+        if (Constant *Splat = VC1->getSplatValue()) {
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
+            if (CI->isNegative() && !CI->isMinValue(true)) {
+              Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
+              assert(!Duplicates.count(Factor) &&
+                     "Shouldn't have two constant factors, missed a canonicalize");
+              unsigned Occ = ++FactorOccurrences[Factor];
+              if (Occ > MaxOcc) {
+                MaxOcc = Occ;
+                MaxOccVal = Factor;
+              }
+            }
+        }
       }
     }
   }
@@ -1606,16 +1659,15 @@
     // this, we could otherwise run into situations where removing a factor
     // from an expression will drop a use of maxocc, and this can cause
     // RemoveFactorFromExpression on successive values to behave differently.
-    Instruction *DummyInst =
-        I->getType()->isIntegerTy()
-            ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
-            : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
+    Instruction *DummyInst = I->getType()->isIntOrIntVectorTy() ?
+      BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) :
+      BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
 
     SmallVector<WeakVH, 4> NewMulOps;
     for (unsigned i = 0; i != Ops.size(); ++i) {
       // Only try to remove factors from expressions we're allowed to.
-      BinaryOperator *BOp =
-          isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
+      BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul,
+                                             Instruction::FMul);
       if (!BOp)
         continue;
 
@@ -1738,7 +1790,7 @@
 
   Value *LHS = Ops.pop_back_val();
   do {
-    if (LHS->getType()->isIntegerTy())
+    if (LHS->getType()->isIntOrIntVectorTy())
       LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
     else
       LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
@@ -1921,48 +1973,57 @@
   if (!isa<BinaryOperator>(I))
     return;
 
-  if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
-    // If an operand of this shift is a reassociable multiply, or if the shift
-    // is used by a reassociable multiply or add, turn into a multiply.
-    if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
-        (I->hasOneUse() &&
-         (isReassociableOp(I->user_back(), Instruction::Mul) ||
-          isReassociableOp(I->user_back(), Instruction::Add)))) {
-      Instruction *NI = ConvertShiftToMul(I);
-      RedoInsts.insert(I);
-      MadeChange = true;
-      I = NI;
+  // Commute binary operators, to canonicalize the order of their operands.
+  // This can potentially expose more CSE opportunities, and makes writing other
+  // transformations simpler.
+  if (I->isCommutative()) {
+    Value *LHS = I->getOperand(0);
+    Value *RHS = I->getOperand(1);
+    unsigned LHSRank = getRank(LHS);
+    unsigned RHSRank = getRank(RHS);
+
+    // Sort the operands by rank.
+    if (RHSRank < LHSRank && !isa<Constant>(RHS)) {
+      I->setOperand(0, RHS);
+      I->setOperand(1, LHS);
     }
+  }
 
-  // Commute floating point binary operators, to canonicalize the order of their
-  // operands.  This can potentially expose more CSE opportunities, and makes
-  // writing other transformations simpler.
-  if (I->getType()->isFloatingPointTy() || I->getType()->isVectorTy()) {
-
-    // FAdd and FMul can be commuted.
-    if (I->getOpcode() == Instruction::FMul ||
-        I->getOpcode() == Instruction::FAdd) {
-      Value *LHS = I->getOperand(0);
-      Value *RHS = I->getOperand(1);
-      unsigned LHSRank = getRank(LHS);
-      unsigned RHSRank = getRank(RHS);
-
-      // Sort the operands by rank.
-      if (RHSRank < LHSRank) {
-        I->setOperand(0, RHS);
-        I->setOperand(1, LHS);
+  if (I->getOpcode() == Instruction::Shl) {
+    if (isa<ConstantInt>(I->getOperand(1)))
+      // If an operand of this shift is a reassociable multiply, or if the shift
+      // is used by a reassociable multiply or add, turn into a multiply.
+      if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
+          (I->hasOneUse() &&
+           (isReassociableOp(I->user_back(), Instruction::Mul) ||
+            isReassociableOp(I->user_back(), Instruction::Add)))) {
+        Instruction *NI = ConvertShiftToMul(I);
+        RedoInsts.insert(I);
+        MadeChange = true;
+        I = NI;
+      }
+    if (ConstantDataVector *VC1 = dyn_cast<ConstantDataVector>(I->getOperand(1)))
+      if (Constant *Splat = VC1->getSplatValue())
+        if (isa<ConstantInt>(Splat)) {
+        // If an operand of this shift is a reassociable multiply, or if the shift
+        // is used by a reassociable multiply or add, turn into a multiply.
+        if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
+            (I->hasOneUse() &&
+             (isReassociableOp(I->user_back(), Instruction::Mul) ||
+              isReassociableOp(I->user_back(), Instruction::Add)))) {
+          Instruction *NI = ConvertShiftToMul(I);
+          RedoInsts.insert(I);
+          MadeChange = true;
+          I = NI;
+        }
       }
-    }
-
-    // FIXME: We should commute vector instructions as well.  However, this 
-    // requires further analysis to determine the effect on later passes.
-
-    // Don't try to optimize vector instructions or anything that doesn't have
-    // unsafe algebra.
-    if (I->getType()->isVectorTy() || !I->hasUnsafeAlgebra())
-      return;
   }
 
+  // Don't try to optimize floating point binary operators that don't have
+  // unsafe algebra.
+  if (isa<FPMathOperator>(I) && !I->hasUnsafeAlgebra())
+    return;
+
   // Do not reassociate boolean (i1) expressions.  We want to preserve the
   // original order of evaluation for short-circuited comparisons that
   // SimplifyCFG has folded to AND/OR expressions.  If the expression
@@ -2035,9 +2096,6 @@
 }
 
 void Reassociate::ReassociateExpression(BinaryOperator *I) {
-  assert(!I->getType()->isVectorTy() &&
-         "Reassociation of vector instructions is not supported.");
-
   // First, walk the expression tree, linearizing the tree, collecting the
   // operand information.
   SmallVector<RepeatedValue, 8> Tree;
@@ -2083,18 +2141,33 @@
   // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
   if (I->hasOneUse()) {
     if (I->getOpcode() == Instruction::Mul &&
-        cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
-        isa<ConstantInt>(Ops.back().Op) &&
-        cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
-      ValueEntry Tmp = Ops.pop_back_val();
-      Ops.insert(Ops.begin(), Tmp);
+        cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add) {
+      if (isa<ConstantInt>(Ops.back().Op) &&
+          cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+        ValueEntry Tmp = Ops.pop_back_val();
+        Ops.insert(Ops.begin(), Tmp);
+      }
+      if (ConstantDataVector *VC1 = dyn_cast<ConstantDataVector>(Ops.back().Op))
+        if (Constant *Splat = VC1->getSplatValue())
+          if (isa<ConstantInt>(Splat) &&
+              cast<ConstantInt>(Splat)->isAllOnesValue()) {
+            ValueEntry Tmp = Ops.pop_back_val();
+            Ops.insert(Ops.begin(), Tmp);
+          }
     } else if (I->getOpcode() == Instruction::FMul &&
-               cast<Instruction>(I->user_back())->getOpcode() ==
-                   Instruction::FAdd &&
-               isa<ConstantFP>(Ops.back().Op) &&
-               cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
-      ValueEntry Tmp = Ops.pop_back_val();
-      Ops.insert(Ops.begin(), Tmp);
+               cast<Instruction>(I->user_back())->getOpcode() == Instruction::FAdd) {
+      if (isa<ConstantFP>(Ops.back().Op) &&
+          cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
+        ValueEntry Tmp = Ops.pop_back_val();
+        Ops.insert(Ops.begin(), Tmp);
+      }
+      if (ConstantDataVector *VC1 = dyn_cast<ConstantDataVector>(Ops.back().Op))
+        if (Constant *Splat = VC1->getSplatValue())
+          if (isa<ConstantFP>(Splat) &&
+              cast<ConstantFP>(Splat)->isExactlyValue(-1.0)) {
+            ValueEntry Tmp = Ops.pop_back_val();
+            Ops.insert(Ops.begin(), Tmp);
+          }
     }
   }
 
Index: test/Transforms/Reassociate/commute.ll
===================================================================
--- /dev/null
+++ test/Transforms/Reassociate/commute.ll
@@ -0,0 +1,53 @@
+; RUN: opt -reassociate -S < %s | FileCheck %s
+
+; Commute integer operations.
+define i32 @test1(i32 %x, i32 %y) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: %tmp1 = add i32 %y, %x
+; CHECK-NEXT: %tmp2 = mul i32 %y, %x
+; CHECK-NEXT: %tmp3 = add i32 %tmp1, %tmp2
+
+  %tmp1 = add i32 %x, %y
+  %tmp2 = mul i32 %y, %x
+  %tmp3 = add i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+; Commute float operations.
+define float @test2(float %x, float %y) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %tmp1 = fadd float %x, %y
+; CHECK-NEXT: %tmp2 = fmul float %x, %y
+; CHECK-NEXT: %tmp3 = fadd float %tmp1, %tmp2
+
+  %tmp1 = fadd float %x, %y
+  %tmp2 = fmul float %y, %x
+  %tmp3 = fadd float %tmp1, %tmp2
+  ret float %tmp3
+}
+
+; Commute integer vector operations.
+define <2 x i32> @test3(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT: %tmp1 = add <2 x i32> %y, %x
+; CHECK-NEXT: %tmp2 = mul <2 x i32> %y, %x
+; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = add <2 x i32> %x, %y
+  %tmp2 = mul <2 x i32> %y, %x
+  %tmp3 = add <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+; Commute float vector operations.
+define <2 x float> @test4(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @test4
+; CHECK-NEXT: %tmp1 = fadd <2 x float> %x, %y
+; CHECK-NEXT: %tmp2 = fmul <2 x float> %x, %y
+; CHECK-NEXT: %tmp3 = fadd <2 x float> %tmp1, %tmp2
+
+  %tmp1 = fadd <2 x float> %x, %y
+  %tmp2 = fmul <2 x float> %y, %x
+  %tmp3 = fadd <2 x float> %tmp1, %tmp2
+  ret <2 x float> %tmp3
+}
Index: test/Transforms/Reassociate/fast-AgressiveSubMove.ll
===================================================================
--- test/Transforms/Reassociate/fast-AgressiveSubMove.ll
+++ test/Transforms/Reassociate/fast-AgressiveSubMove.ll
@@ -2,8 +2,8 @@
 
 define float @test1(float %A) {
 ; CHECK-LABEL: test1
-; CHECK-NEXT: %X = fadd float 1.000000e+00, %A
-; CHECK-NEXT: %Y = fadd float 1.000000e+00, %A
+; CHECK-NEXT: %X = fadd float %A, 1.000000e+00
+; CHECK-NEXT: %Y = fadd float %A, 1.000000e+00
 ; CHECK-NEXT: %r = fsub float %X, %Y
 ; CHECK-NEXT: ret float %r
 
@@ -17,8 +17,8 @@
 ; CHECK-LABEL: test2
 ; CHECK-NEXT: ret float 0.000000e+00
 
-  %X = fadd fast float 1.000000e+00, %A
-  %Y = fadd fast float 1.000000e+00, %A
+  %X = fadd fast float %A, 1.000000e+00
+  %Y = fadd fast float %A, 1.000000e+00
   %r = fsub fast float %X, %Y
   ret float %r
 }
Index: test/Transforms/Reassociate/fast-ReassociateVector.ll
===================================================================
--- test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -1,22 +1,19 @@
-; RUN: opt < %s -reassociate -S | FileCheck %s
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
 
-; Don't handle floating point vector operations.
 define <4 x float> @test1() {
 ; CHECK-LABEL: test1
-; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
-; CHECK-NEXT: %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
+; CHECK-NEXT: ret <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
 
   %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
   %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
   ret <4 x float> %tmp2
 }
 
-; We don't currently commute integer vector operations.
 define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: test2
-; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y
-; CHECK-NEXT: %tmp2 = add <2 x i32> %y, %x
-; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2
+; CHECK-NEXT: add <2 x i32> %x, %y
+; CHECK-NEXT: shl <2 x i32> %factor12, <i32 1, i32 1>
+; CHECK-NEXT: ret <2 x i32>
 
   %tmp1 = add <2 x i32> %x, %y
   %tmp2 = add <2 x i32> %y, %x
Index: test/Transforms/Reassociate/fast-fp-commute.ll
===================================================================
--- test/Transforms/Reassociate/fast-fp-commute.ll
+++ test/Transforms/Reassociate/fast-fp-commute.ll
@@ -33,7 +33,7 @@
 
 define float @test3(float %x, float %y) {
 ; CHECK-LABEL: test3
-; CHECK-NEXT: %factor = fmul fast float 2.000000e+00, %y
+; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
 ; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
 ; CHECK-NEXT: ret float %tmp1
 
Index: test/Transforms/Reassociate/fp-commute.ll
===================================================================
--- test/Transforms/Reassociate/fp-commute.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt -reassociate -S < %s | FileCheck %s
-
-declare void @use(float)
-
-define void @test1(float %x, float %y) {
-; CHECK-LABEL: test1
-; CHECK: fmul float %x, %y
-; CHECK: fmul float %x, %y
-; CHECK: fsub float %1, %2
-; CHECK: call void @use(float %{{.*}})
-; CHECK: call void @use(float %{{.*}})
-
-  %1 = fmul float %x, %y
-  %2 = fmul float %y, %x
-  %3 = fsub float %1, %2
-  call void @use(float %1)
-  call void @use(float %3)
-  ret void
-}
Index: test/Transforms/Reassociate/multistep.ll
===================================================================
--- test/Transforms/Reassociate/multistep.ll
+++ test/Transforms/Reassociate/multistep.ll
@@ -9,8 +9,8 @@
   %t3 = mul i64 %a, %t2 ; a*(a*c)
   %t4 = add i64 %t1, %t3
 ; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: mul i64 %a, %tmp2
+; CHECK-NEXT: mul i64 %tmp3, %a
 ; CHECK-NEXT: ret
   ret i64 %t4
 }
@@ -23,8 +23,8 @@
   %t2 = add i64 %t1, %d ; a*c+d
   %t3 = add i64 %t0, %t2 ; a*b+(a*c+d)
 ; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
-; CHECK-NEXT: add i64 %tmp{{.*}}, %d
+; CHECK-NEXT: mul i64 %tmp, %a
+; CHECK-NEXT: add i64 %tmp1, %d
 ; CHECK-NEXT: ret
   ret i64 %t3
 }
Index: test/Transforms/Reassociate/vector-basictests.ll
===================================================================
--- /dev/null
+++ test/Transforms/Reassociate/vector-basictests.ll
@@ -0,0 +1,395 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s --check-prefix=R-IC
+; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s --check-prefix=R-GVN-IC
+
+; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
+define <2 x float> @test1(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: fadd fast <2 x float> %c, %b
+; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
+; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
+; CHECK-NEXT: ret <2 x float>
+
+  %t0 = fmul fast <2 x float> %a, %b
+  %t1 = fmul fast <2 x float> %a, %t0
+  %t2 = fmul fast <2 x float> %a, %c
+  %t3 = fmul fast <2 x float> %a, %t2
+  %t4 = fadd fast <2 x float> %t1, %t3
+  ret <2 x float> %t4
+}
+
+; Check that a*b+a*c+d is turned into a*(b+c)+d.
+define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fadd fast <2 x float> %c, %b
+; CHECK-NEXT: fmul fast <2 x float> %tmp, %a
+; CHECK-NEXT: fadd fast <2 x float> %tmp1, %d
+; CHECK-NEXT: ret <2 x float>
+
+  %t0 = fmul fast <2 x float> %a, %b
+  %t1 = fmul fast <2 x float> %a, %c
+  %t2 = fadd fast <2 x float> %t1, %d
+  %t3 = fadd fast <2 x float> %t0, %t2
+  ret <2 x float> %t3
+}
+
+; No fast-math.
+define <2 x float> @test3(<2 x float> %A) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT: %X = fadd <2 x float> %A, <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: %Y = fadd <2 x float> %A, <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: %r = fsub <2 x float> %X, %Y
+; CHECK-NEXT: ret <2 x float> %r
+
+  %X = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 >
+  %Y = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 >
+  %R = fsub <2 x float> %X, %Y
+  ret <2 x float> %R
+}
+
+; FIXME: (A + 1) - (A + 1) -> 0
+define <2 x float> @test4(<2 x float> %A) {
+; CHECK-LABEL: @test4
+; CHECK-NEXT: %A.neg1 = fsub fast <2 x float> <float -0.000000e+00, float -0.000000e+00>, %A
+; CHECK-NEXT: %A.neg = fadd fast <2 x float> <float -0.000000e+00, float -0.000000e+00>, %A.neg1
+; CHECK-NEXT: %X = fadd fast <2 x float> %A, zeroinitializer
+; CHECK-NEXT: %R = fadd fast <2 x float> %X, %A.neg
+; CHECK-NEXT: ret <2 x float> %R
+
+  %X = fadd fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %A
+  %Y = fadd fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %A
+  %R = fsub fast <2 x float> %X, %Y
+  ret <2 x float> %R
+}
+
+; Should be able to eliminate everything.
+define <2 x float> @test5(<2 x float> %a0, <2 x float> %a1, <2 x float> %a2, <2 x float> %a3, <2 x float> %a4) {
+; R-IC-LABEL: @test5
+; R-IC: ret <2 x float> <float 0.000000e+00, float 0.000000e+00>
+
+  %tmp.2 = fadd fast <2 x float> %a4, %a3
+  %tmp.4 = fadd fast <2 x float> %tmp.2, %a2
+  %tmp.6 = fadd fast <2 x float> %tmp.4, %a1
+  %tmp.8 = fadd fast <2 x float> %tmp.6, %a0
+  %tmp.11 = fadd fast <2 x float> %a3, %a2
+  %tmp.13 = fadd fast <2 x float> %tmp.11, %a1
+  %tmp.15 = fadd fast <2 x float> %tmp.13, %a0
+  %tmp.18 = fadd fast <2 x float> %a2, %a1
+  %tmp.20 = fadd fast <2 x float> %tmp.18, %a0
+  %tmp.23 = fadd fast <2 x float> %a1, %a0
+  %tmp.26 = fsub fast <2 x float> %tmp.8, %tmp.15
+  %tmp.28 = fadd fast <2 x float> %tmp.26, %tmp.20
+  %tmp.30 = fsub fast <2 x float> %tmp.28, %tmp.23
+  %tmp.32 = fsub fast <2 x float> %tmp.30, %a4
+  %tmp.34 = fsub fast <2 x float> %tmp.32, %a2
+  %T = fmul fast <2 x float> %tmp.34, %tmp.34
+  ret <2 x float> %T
+}
+
+; With reassociation, constant folding can eliminate the 12 and -12 constants.
+define <2 x float> @test6(<2 x float> %arg) {
+; R-GVN-IC-LABEL: @test6
+; R-GVN-IC-NEXT: fsub fast <2 x float> <float -0.000000e+00, float -0.000000e+00>, %arg
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %tmp1 = fsub fast <2 x float> <float -1.200000e+01, float -1.200000e+01>, %arg
+  %tmp2 = fadd fast <2 x float> %tmp1, <float -1.200000e+01, float -1.200000e+01>
+  ret <2 x float> %tmp2
+}
+
+define <2 x float> @test7(<2 x float> %a, <2 x float> %b) {
+; R-GVN-IC-LABEL: @test7
+; R-GVN-IC-NEXT: fadd <2 x float> %a, %b
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %tmp1 = fadd fast <2 x float> %a, <float -3.000000e+01, float -3.000000e+01>
+  %tmp2 = fadd fast <2 x float> %tmp1, %b
+  %tmp3 = fadd fast <2 x float> %tmp2, <float 3.000000e+01, float 3.000000e+01>
+  ret <2 x float> %tmp3
+}
+
+define <4 x float> @test8() {
+; CHECK-LABEL: @test8
+; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
+; CHECK-NEXT: %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
+
+  %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
+  %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
+  ret <4 x float> %tmp2
+}
+
+@fe = external global <2 x float>
+@fa = external global <2 x float>
+@fb = external global <2 x float>
+@fc = external global <2 x float>
+@ff = external global <2 x float>
+
+define void @test9() {
+; R-GVN-IC-LABEL: @test9
+; R-GVN-IC: fadd fast <2 x float>
+; R-GVN-IC: fadd fast <2 x float>
+; R-GVN-IC-NOT: fadd fast <2 x float>
+; R-GVN-IC: ret void
+
+  %A = load <2 x float>* @fa
+  %B = load <2 x float>* @fb
+  %C = load <2 x float>* @fc
+  %t1 = fadd fast <2 x float> %A, %B
+  %t2 = fadd fast <2 x float> %t1, %C
+  %t3 = fadd fast <2 x float> %C, %A
+  %t4 = fadd fast <2 x float> %t3, %B
+  ; e = (a+b)+c;
+  store <2 x float> %t2, <2 x float>* @fe
+  ; f = (a+c)+b
+  store <2 x float> %t4, <2 x float>* @ff
+  ret void
+}
+
+define void @test10() {
+; R-GVN-IC-LABEL: @test10
+; R-GVN-IC: fadd fast <2 x float>
+; R-GVN-IC: fadd fast <2 x float>
+; R-GVN-IC-NOT: fadd
+; R-GVN-IC: ret void
+
+  %A = load <2 x float>* @fa
+  %B = load <2 x float>* @fb
+  %C = load <2 x float>* @fc
+  %t1 = fadd fast <2 x float> %A, %B
+  %t2 = fadd fast <2 x float> %t1, %C
+  %t3 = fadd fast <2 x float> %C, %A
+  %t4 = fadd fast <2 x float> %t3, %B
+  ; e = c+(a+b)
+  store <2 x float> %t2, <2 x float>* @fe
+  ; f = (c+a)+b
+  store <2 x float> %t4, <2 x float>* @ff
+  ret void
+}
+
+define void @test11() {
+; R-GVN-IC-LABEL: @test11
+; R-GVN-IC: fadd fast <2 x float>
+; R-GVN-IC: fadd fast <2 x float>
+; R-GVN-IC-NOT: fadd
+; R-GVN-IC: ret void
+
+  %A = load <2 x float>* @fa
+  %B = load <2 x float>* @fb
+  %C = load <2 x float>* @fc
+  %t1 = fadd fast <2 x float> %B, %A
+  %t2 = fadd fast <2 x float> %t1, %C
+  %t3 = fadd fast <2 x float> %C, %A
+  %t4 = fadd fast <2 x float> %t3, %B
+  ; e = c+(b+a)
+  store <2 x float> %t2, <2 x float>* @fe
+  ; f = (c+a)+b
+  store <2 x float> %t4, <2 x float>* @ff
+  ret void
+}
+
+define <2 x float> @test12(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; R-GVN-IC-LABEL: @test12
+; R-GVN-IC-NEXT: fadd fast <2 x float> %C, %B
+; R-GVN-IC-NEXT: fmul fast <2 x float> %A, %A
+; R-GVN-IC-NEXT: fmul fast <2 x float> %1, %tmp2
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %aa = fmul fast <2 x float> %A, %A
+  %aab = fmul fast <2 x float> %aa, %B
+  %ac = fmul fast <2 x float> %A, %C
+  %aac = fmul fast <2 x float> %ac, %A
+  %r = fadd fast <2 x float> %aab, %aac
+  ret <2 x float> %r
+}
+
+; (-X)*Y + Z -> Z-X*Y
+define <2 x float> @test13(<2 x float> %X, <2 x float> %Y, <2 x float> %Z) {
+; R-GVN-IC-LABEL: @test13
+; R-GVN-IC-NEXT: fmul fast <2 x float> %Y, %X
+; R-GVN-IC-NEXT: fsub fast <2 x float> %Z
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %A = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %X
+  %B = fmul fast <2 x float> %A, %Y
+  %C = fadd fast <2 x float> %B, %Z
+  ret <2 x float> %C
+}
+
+define <2 x float> @test14(<2 x float> %X) {
+; R-GVN-IC-LABEL: @test14
+; R-GVN-IC-NEXT: fmul fast <2 x float> %X, <float 9.400000e+01, float 9.400000e+01>
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %Y = fmul fast <2 x float> %X, <float 4.700000e+01, float 4.700000e+01>
+  %Z = fadd fast <2 x float> %Y, %Y
+  ret <2 x float> %Z
+}
+
+define <2 x float> @test15(<2 x float> %X) {
+; R-GVN-IC-LABEL: @test15
+; R-GVN-IC-NEXT: fmul fast <2 x float> %X, 3.000000e+00
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %Y = fadd fast <2 x float> %X ,%X
+  %Z = fadd fast <2 x float> %Y, %X
+  ret <2 x float> %Z
+}
+
+define <2 x float> @test16(<2 x float> %W) {
+; R-GVN-IC-LABEL: @test16
+; R-GVN-IC-NEXT: fmul fast <2 x float> %W, <float 3.810000e+02, float 3.810000e+02>
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %X = fmul fast <2 x float> %W, <float 127.0, float 127.0>
+  %Y = fadd fast <2 x float> %X ,%X
+  %Z = fadd fast <2 x float> %Y, %X
+  ret <2 x float> %Z
+}
+
+define <2 x float> @test17(<2 x float> %X) {
+; R-GVN-IC-LABEL: @test17
+; R-GVN-IC-NEXT: fmul fast <2 x float> %X, -3.000000e+00
+; R-GVN-IC-NEXT: fadd fast <2 x float> %factor, 6.000000e+00
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %A = fsub fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %X
+  %B = fsub fast <2 x float> <float 2.000000e+00, float 2.000000e+00>, %X
+  %C = fsub fast <2 x float> <float 3.000000e+00, float 3.000000e+00>, %X
+  %Y = fadd fast <2 x float> %A ,%B
+  %Z = fadd fast <2 x float> %Y, %C
+  ret <2 x float> %Z
+}
+
+; -X1*X2 + X1*X3 -> X1*(X3-X2)
+define <2 x float> @test18(<2 x float> %X1, <2 x float> %X2, <2 x float> %X3) {
+; R-GVN-IC-LABEL: @test18
+; R-GVN-IC-NEXT: fsub fast <2 x float> %X3, %X2
+; R-GVN-IC-NEXT: fmul fast <2 x float> {{.*}}, %X1
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %A = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %X1
+  %B = fmul fast <2 x float> %A, %X2
+  %C = fmul fast <2 x float> %X1, %X3
+  %D = fadd fast <2 x float> %B, %C
+  ret <2 x float> %D
+}
+
+; X1*47 + X2*-47 -> 47*(X1-X2)
+define <2 x float> @test19(<2 x float> %X1, <2 x float> %X2) {
+; R-GVN-IC-LABEL: @test19
+; R-GVN-IC-NEXT: fsub fast <2 x float> %X1, %X2
+; R-GVN-IC-NEXT: fmul fast <2 x float> %tmp, <float 4.700000e+01, float 4.700000e+01>
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %B = fmul fast <2 x float> %X1, <float 4.700000e+01, float 4.700000e+01>
+  %C = fmul fast <2 x float> %X2, <float -4.700000e+01, float -4.700000e+01>
+  %D = fadd fast <2 x float> %B, %C
+  ret <2 x float> %D
+}
+
+define <2 x float> @test20(<2 x float> %arg) {
+; R-GVN-IC-LABEL: @test20
+; R-GVN-IC-NEXT: fmul fast <2 x float> %arg, <float 1.440000e+02, float 1.440000e+02>
+; R-GVN-IC-NEXT: ret <2 x float> %tmp2
+
+  %tmp1 = fmul fast <2 x float> <float 1.200000e+01, float 1.200000e+01>, %arg
+  %tmp2 = fmul fast <2 x float> %tmp1, <float 1.200000e+01, float 1.200000e+01>
+  ret <2 x float> %tmp2
+}
+
+; (b+(a+1234))+-a -> b+1234
+define <2 x float> @test21(<2 x float> %b, <2 x float> %a) {
+; R-GVN-IC-LABEL: @test21
+; R-GVN-IC-NEXT: fadd fast <2 x float> %b, <float 1.234000e+03, float 1.234000e+03>
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %1 = fadd fast <2 x float> %a, <float 1.234000e+03, float 1.234000e+03>
+  %2 = fadd fast <2 x float> %b, %1
+  %3 = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %a
+  %4 = fadd fast <2 x float> %2, %3
+  ret <2 x float> %4
+}
+
+; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z.
+define <2 x float> @test22(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
+; R-GVN-IC-LABEL: @test22
+; R-GVN-IC-NEXT: fmul fast <2 x float> %a, <float 1.234500e+04, float 1.234500e+04>
+; R-GVN-IC-NEXT: fmul fast <2 x float> %e, %b
+; R-GVN-IC-NEXT: fmul fast <2 x float> %f, %z
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %c = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %z
+  %d = fmul fast <2 x float> %a, %b
+  %e = fmul fast <2 x float> %c, %d
+  %f = fmul fast <2 x float> %e, <float 1.234500e+04, float 1.234500e+04>
+  %g = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %f
+  ret <2 x float> %g
+}
+
+define <2 x float> @test23(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
+; R-GVN-IC-LABEL: @test23
+; R-GVN-IC-NEXT: fmul fast <2 x float> %a, 4.000000e+01
+; R-GVN-IC-NEXT: fmul fast <2 x float> %e, %z
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %d = fmul fast <2 x float> %z, <float 4.000000e+01, float 4.000000e+01>
+  %c = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %d
+  %e = fmul fast <2 x float> %a, %c
+  %f = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %e
+  ret <2 x float> %f
+}
+
+; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
+define <2 x float> @test24(<2 x float> %A, <2 x float> %B) {
+; R-GVN-IC-LABEL: @test24
+; R-GVN-IC-NEXT: fsub fast <2 x float> %A, %B
+; R-GVN-IC-NEXT: ret <2 x float>
+
+  %X = fadd fast <2 x float> <float -1.200000e+01, float -1.200000e+01>, %A
+  %Y = fsub fast <2 x float> %X, %B
+  %Z = fadd fast <2 x float> %Y, <float 1.200000e+01, float 1.200000e+01>
+  ret <2 x float> %Z
+}
+
+; With sub reassociation, constant folding can eliminate the uses of %a.
+define <2 x float> @test25(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; R-GVN-IC-LABEL: @test25
+; R-GVN-IC-NEXT: fsub fast <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
+; R-GVN-IC-NEXT: fsub fast <2 x float> %b.neg, %c
+; R-GVN-IC-NEXT: ret <2 x float>
+
+; FIXME: Should be able to generate the below, which may expose more
+;        opportunites for FAdd reassociation.
+; %sum = fadd fast <2 x float> %c, %b
+; %tmp7 = fsub fast <2 x float> 0, %sum
+
+  %tmp3 = fsub fast <2 x float> %a, %b
+  %tmp5 = fsub fast <2 x float> %tmp3, %c
+  %tmp7 = fsub fast <2 x float> %tmp5, %a
+  ret <2 x float> %tmp7
+}
+
+define <2 x float> @test26(<2 x float> %x, <2 x float> %y) {
+; R-GVN-IC-LABEL: @test26
+; R-GVN-IC-NEXT: %factor = fmul fast <2 x float> <float 2.000000e+00, float 2.000000e+00>, %y
+; R-GVN-IC-NEXT: %tmp1 = fmul fast <2 x float> %factor, %x
+; R-GVN-IC-NEXT: ret <2 x float> %tmp1
+
+  %1 = fmul fast <2 x float> %x, %y
+  %2 = fmul fast <2 x float> %y, %x
+  %3 = fadd fast <2 x float> %1, %2
+  ret <2 x float> %3
+}
+
+define <2 x float> @test27(<2 x float> %A, <2 x float> %B) {
+; R-IC-LABEL: @test27
+; R-IC: %Z = fadd fast <2 x float> %A, %B
+; R-IC: ret <2 x float> %Z
+
+	%W = fadd fast <2 x float> %B, <float -5.000000e+00, float -5.000000e+00>
+	%Y = fadd fast <2 x float> %A, <float 5.000000e+00, float 5.000000e+00>
+	%Z = fadd fast <2 x float> %W, %Y
+	ret <2 x float> %Z
+}
+
+