Index: lib/Target/README.txt
===================================================================
--- lib/Target/README.txt
+++ lib/Target/README.txt
@@ -95,44 +95,6 @@
 
 //===---------------------------------------------------------------------===//
 
-This function: (derived from GCC PR19988)
-double foo(double x, double y) {
-  return ((x + 0.1234 * y) * (x + -0.1234 * y));
-}
-
-compiles to:
-_foo:
-	movapd	%xmm1, %xmm2
-	mulsd	LCPI1_1(%rip), %xmm1
-	mulsd	LCPI1_0(%rip), %xmm2
-	addsd	%xmm0, %xmm1
-	addsd	%xmm0, %xmm2
-	movapd	%xmm1, %xmm0
-	mulsd	%xmm2, %xmm0
-	ret
-
-Reassociate should be able to turn it into:
-
-double foo(double x, double y) {
-  return ((x + 0.1234 * y) * (x - 0.1234 * y));
-}
-
-Which allows the multiply by constant to be CSE'd, producing:
-
-_foo:
-	mulsd	LCPI1_0(%rip), %xmm1
-	movapd	%xmm1, %xmm2
-	addsd	%xmm0, %xmm2
-	subsd	%xmm1, %xmm0
-	mulsd	%xmm2, %xmm0
-	ret
-
-This doesn't need -ffast-math support at all.  This is particularly bad because
-the llvm-gcc frontend is canonicalizing the later into the former, but clang
-doesn't have this problem.
-
-//===---------------------------------------------------------------------===//
-
 These two functions should generate the same code on big-endian systems:
 
 int g(int *j,int *l)  {  return memcmp(j,l,4);  }
Index: lib/Transforms/Scalar/Reassociate.cpp
===================================================================
--- lib/Transforms/Scalar/Reassociate.cpp
+++ lib/Transforms/Scalar/Reassociate.cpp
@@ -193,6 +193,8 @@
     Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
     Value *RemoveFactorFromExpression(Value *V, Value *Factor);
     void EraseInst(Instruction *I);
+    void optimizeFAddNegExpr(ConstantFP *ConstOperand, Instruction *I,
+                             int OperandNr);
     void OptimizeInst(Instruction *I);
   };
 }
@@ -1914,6 +1916,32 @@
     }
 }
 
+void Reassociate::optimizeFAddNegExpr(ConstantFP *ConstOperand, Instruction *I,
+                                      int OperandNr) {
+  // Change the sign of the constant.
+  APFloat Val = ConstOperand->getValueAPF();
+  Val.changeSign();
+  I->setOperand(0, ConstantFP::get(ConstOperand->getContext(), Val));
+
+  assert(I->hasOneUse() && "Only a single use can be replaced.");
+  Instruction *Parent = I->user_back();
+
+  assert(
+      Parent->getOpcode() == Instruction::FAdd ||
+      (Parent->getOpcode() == Instruction::FSub && Parent->getOperand(1) == I));
+  Value *OtherOperand = Parent->getOperand(1 - OperandNr);
+  BinaryOperator *NI;
+  if (Parent->getOpcode() == Instruction::FAdd)
+    NI = BinaryOperator::CreateFSub(OtherOperand, I);
+  else
+    NI = BinaryOperator::CreateFAdd(OtherOperand, I);
+  NI->insertBefore(Parent);
+  NI->setName(Parent->getName() + ".repl");
+  Parent->replaceAllUsesWith(NI);
+  NI->setDebugLoc(I->getDebugLoc());
+  MadeChange = true;
+}
+
 /// OptimizeInst - Inspect and optimize the given instruction. Note that erasing
 /// instructions is not allowed.
 void Reassociate::OptimizeInst(Instruction *I) {
@@ -1954,6 +1982,28 @@
       }
     }
 
+    // Transform:
+    //   FAdd(x, FMul(-0.1234, y)) -> FSub(x, FMul(0.1234, y))
+    // where the FMul can also be an FDiv, and FAdd can be a FSub.
+    // This is only done for non-fast-math, as fast-math is handled separately.
+    if ((I->getOpcode() == Instruction::FMul ||
+         I->getOpcode() == Instruction::FDiv) &&
+        !cast<BinaryOperator>(I)->getFastMathFlags().any()) {
+      if (ConstantFP *LHSConst = dyn_cast<ConstantFP>(I->getOperand(0))) {
+        if (LHSConst->isNegative() && I->hasOneUse()) {
+          Instruction *Parent = I->user_back();
+          if (Parent->getOpcode() == Instruction::FAdd) {
+            if (Parent->getOperand(0) == I)
+              optimizeFAddNegExpr(LHSConst, I, 0);
+            else if (Parent->getOperand(1) == I)
+              optimizeFAddNegExpr(LHSConst, I, 1);
+          } else if (Parent->getOpcode() == Instruction::FSub)
+            if (Parent->getOperand(1) == I)
+              optimizeFAddNegExpr(LHSConst, I, 1);
+        }
+      }
+    }
+
     // FIXME: We should commute vector instructions as well.  However, this 
     // requires further analysis to determine the effect on later passes.
 
Index: test/Transforms/Reassociate/liftsign.ll
===================================================================
--- /dev/null
+++ test/Transforms/Reassociate/liftsign.ll
@@ -0,0 +1,69 @@
+; RUN: opt -reassociate -gvn -S < %s | FileCheck %s
+
+; (x + 0.1234 * y) * (x + -0.1234 * y) -> (x + 0.1234 * y) * (x - 0.1234 * y)
+; so CSE can simplify it further
+define double @lift_sign1(double %x, double %y) nounwind readnone ssp uwtable {
+; CHECK-LABEL: @lift_sign1(
+  %mul = fmul double 1.234000e-01, %y
+  %add = fadd double %mul, %x
+  %mul1 = fmul double -1.234000e-01, %y
+  %add2 = fadd double %mul1, %x
+  %mul3 = fmul double %add, %add2
+; CHECK-NOT: %mul1 = fmul double -1.234000e-01, %y
+; CHECK-NOT: %add2 = fadd %mul1, %x
+; CHECK: %add2.repl = fsub double %x, %mul
+; CHECK: %mul3 = fmul double %add, %add2
+ret double %mul3
+}
+
+; (x + -0.1234 * y) * (x + -0.1234 * y) -> (x - 0.1234 * y) * (x - 0.1234 * y)
+; GVN can then rewrite it even further
+define double @lift_sign2(double %x, double %y) nounwind readnone ssp uwtable {
+; CHECK-LABEL: @lift_sign2(
+  %mul = fmul double %y, -1.234000e-01
+  %add = fadd double %mul, %x
+  %mul1 = fmul double %y, -1.234000e-01
+  %add2 = fadd double %mul1, %x
+  %mul3 = fmul double %add, %add2
+; CHECK-NOT: %mul = fmul double %y, -1.234000e-01
+; CHECK-NOT: %add = fadd double %mul, %x
+; CHECK-NOT: %mul1 = fmul double %y, -1.234000e-01
+; CHECK-NOT: %add2 = fadd double %mul1, %x
+; CHECK-NOT: %mul3 = fmul double %add, %add2
+; CHECK: %mul = fmul double 1.234000e-01, %y
+; CHECK: %add.repl = fsub double %x, %mul
+; CHECK: %mul3 = fmul double %add.repl, %add.repl
+  ret double %mul3
+}
+
+; (x + 0.1234 * y) * (x - -0.1234 * y) -> (x + 0.1234 * y) * (x + 0.1234 * y)
+define double @lift_sign3(double %x, double %y) nounwind readnone ssp uwtable {
+; CHECK-LABEL: @lift_sign3(
+  %mul = fmul double %y, 1.234000e-01
+  %add = fadd double %mul, %x
+  %mul1 = fmul double %y, -1.234000e-01
+  %add2 = fsub double %x, %mul1
+  %mul3 = fmul double %add, %add2
+; CHECK-NOT: %mul1 = fmul double %y, -1.234000e-01
+; CHECK-NOT: %add2 = fsub double %x, %mul1
+; CHECK-NOT: %mul3 = fmul double %add, %add2
+; CHECK: %mul3 = fmul double %add, %add
+  ret double %mul3
+}
+
+; (x + 0.1234 / y) * (x + -0.1234 / y) -> (x + 0.1234 / y) * (x - 0.1234 / y)
+; so CSE can simplify it further
+define double @lift_sign4(double %x, double %y) nounwind readnone ssp uwtable {
+; CHECK-LABEL: @lift_sign4(
+  %div = fdiv double 1.234000e-01, %y
+  %add = fadd double %div, %x
+  %div1 = fdiv double -1.234000e-01, %y
+  %add2 = fadd double %div1, %x
+  %mul3 = fmul double %add, %add2
+; CHECK-NOT: %div1 = fdiv double -1.234000e-01, %y
+; CHECK-NOT: %add2 = fadd double %div1, %x
+; CHECK-NOT: %mul3 = fmul double %add, %add2
+; CHECK: %add2.repl = fsub double %x, %div
+; CHECK: %mul3 = fmul double %add, %add2.repl
+  ret double %mul3
+}