Index: lib/Target/README.txt =================================================================== --- lib/Target/README.txt +++ lib/Target/README.txt @@ -95,44 +95,6 @@ //===---------------------------------------------------------------------===// -This function: (derived from GCC PR19988) -double foo(double x, double y) { - return ((x + 0.1234 * y) * (x + -0.1234 * y)); -} - -compiles to: -_foo: - movapd %xmm1, %xmm2 - mulsd LCPI1_1(%rip), %xmm1 - mulsd LCPI1_0(%rip), %xmm2 - addsd %xmm0, %xmm1 - addsd %xmm0, %xmm2 - movapd %xmm1, %xmm0 - mulsd %xmm2, %xmm0 - ret - -Reassociate should be able to turn it into: - -double foo(double x, double y) { - return ((x + 0.1234 * y) * (x - 0.1234 * y)); -} - -Which allows the multiply by constant to be CSE'd, producing: - -_foo: - mulsd LCPI1_0(%rip), %xmm1 - movapd %xmm1, %xmm2 - addsd %xmm0, %xmm2 - subsd %xmm1, %xmm0 - mulsd %xmm2, %xmm0 - ret - -This doesn't need -ffast-math support at all. This is particularly bad because -the llvm-gcc frontend is canonicalizing the later into the former, but clang -doesn't have this problem. - -//===---------------------------------------------------------------------===// - These two functions should generate the same code on big-endian systems: int g(int *j,int *l) { return memcmp(j,l,4); } Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -193,6 +193,8 @@ Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl &Ops); Value *RemoveFactorFromExpression(Value *V, Value *Factor); void EraseInst(Instruction *I); + void optimizeFAddNegExpr(ConstantFP *ConstOperand, Instruction *I, + int OperandNr); void OptimizeInst(Instruction *I); }; } @@ -1803,6 +1805,32 @@ } } +void Reassociate::optimizeFAddNegExpr(ConstantFP *ConstOperand, Instruction *I, + int OperandNr) { + // Change the sign of the constant. + APFloat Val = ConstOperand->getValueAPF(); + Val.changeSign(); + I->setOperand(0, ConstantFP::get(ConstOperand->getContext(), Val)); + + assert(I->hasOneUse() && "Only a single use can be replaced."); + Instruction *Parent = I->user_back(); + + assert( + Parent->getOpcode() == Instruction::FAdd || + (Parent->getOpcode() == Instruction::FSub && Parent->getOperand(1) == I)); + Value *OtherOperand = Parent->getOperand(1 - OperandNr); + BinaryOperator *NI; + if (Parent->getOpcode() == Instruction::FAdd) + NI = BinaryOperator::CreateFSub(OtherOperand, I); + else + NI = BinaryOperator::CreateFAdd(OtherOperand, I); + NI->insertBefore(Parent); + NI->setName(Parent->getName() + ".repl"); + Parent->replaceAllUsesWith(NI); + NI->setDebugLoc(I->getDebugLoc()); + MadeChange = true; +} + /// OptimizeInst - Inspect and optimize the given instruction. Note that erasing /// instructions is not allowed. void Reassociate::OptimizeInst(Instruction *I) { @@ -1824,25 +1852,45 @@ I = NI; } - // Floating point binary operators are not associative, but we can still - // commute (some) of them, to canonicalize the order of their operands. - // This can potentially expose more CSE opportunities, and makes writing - // other transformations simpler. if ((I->getType()->isFloatingPointTy() || I->getType()->isVectorTy())) { - // FAdd and FMul can be commuted. - if (I->getOpcode() != Instruction::FMul && - I->getOpcode() != Instruction::FAdd) - return; + // Floating point binary operators are not associative, but we can still + // commute (some) of them, to canonicalize the order of their operands. + // This can potentially expose more CSE opportunities, and makes writing + // other transformations simpler. - Value *LHS = I->getOperand(0); - Value *RHS = I->getOperand(1); - unsigned LHSRank = getRank(LHS); - unsigned RHSRank = getRank(RHS); + // FAdd and FMul can be commuted. + if (I->getOpcode() == Instruction::FMul || + I->getOpcode() == Instruction::FAdd) { + Value *LHS = I->getOperand(0); + Value *RHS = I->getOperand(1); + unsigned LHSRank = getRank(LHS); + unsigned RHSRank = getRank(RHS); + + // Sort the operands by rank. + if (RHSRank < LHSRank) { + I->setOperand(0, RHS); + I->setOperand(1, LHS); + } + } - // Sort the operands by rank. - if (RHSRank < LHSRank) { - I->setOperand(0, RHS); - I->setOperand(1, LHS); + // Transform: + // FAdd(x, FMul(-0.1234, y)) -> FSub(x, FMul(0.1234, y)) + // where the FMul can also be an FDiv, and FAdd can be a FSub. + if (I->getOpcode() == Instruction::FMul || + I->getOpcode() == Instruction::FDiv) { + if (ConstantFP *LHSConst = dyn_cast(I->getOperand(0))) { + if (LHSConst->isNegative() && I->hasOneUse()) { + Instruction *Parent = I->user_back(); + if (Parent->getOpcode() == Instruction::FAdd) { + if (Parent->getOperand(0) == I) + optimizeFAddNegExpr(LHSConst, I, 0); + else if (Parent->getOperand(1) == I) + optimizeFAddNegExpr(LHSConst, I, 1); + } else if (Parent->getOpcode() == Instruction::FSub) + if (Parent->getOperand(1) == I) + optimizeFAddNegExpr(LHSConst, I, 1); + } + } } return; Index: test/Transforms/Reassociate/liftsign.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/liftsign.ll @@ -0,0 +1,69 @@ +; RUN: opt -reassociate -gvn -S < %s | FileCheck %s + +; (x + 0.1234 * y) * (x + -0.1234 * y) -> (x + 0.1234 * y) * (x - 0.1234 * y) +; so CSE can simplify it further +define double @lift_sign1(double %x, double %y) nounwind readnone ssp uwtable { +; CHECK-LABEL: @lift_sign1( + %mul = fmul double 1.234000e-01, %y + %add = fadd double %mul, %x + %mul1 = fmul double -1.234000e-01, %y + %add2 = fadd double %mul1, %x + %mul3 = fmul double %add, %add2 +; CHECK-NOT: %mul1 = fmul double -1.234000e-01, %y +; CHECK-NOT: %add2 = fadd %mul1, %x +; CHECK: %add2.repl = fsub double %x, %mul +; CHECK: %mul3 = fmul double %add, %add2 +ret double %mul3 +} + +; (x + -0.1234 * y) * (x + -0.1234 * y) -> (x - 0.1234 * y) * (x - 0.1234 * y) +; GVN can then rewrite it even further +define double @lift_sign2(double %x, double %y) nounwind readnone ssp uwtable { +; CHECK-LABEL: @lift_sign2( + %mul = fmul double %y, -1.234000e-01 + %add = fadd double %mul, %x + %mul1 = fmul double %y, -1.234000e-01 + %add2 = fadd double %mul1, %x + %mul3 = fmul double %add, %add2 +; CHECK-NOT: %mul = fmul double %y, -1.234000e-01 +; CHECK-NOT: %add = fadd double %mul, %x +; CHECK-NOT: %mul1 = fmul double %y, -1.234000e-01 +; CHECK-NOT: %add2 = fadd double %mul1, %x +; CHECK-NOT: %mul3 = fmul double %add, %add2 +; CHECK: %mul = fmul double 1.234000e-01, %y +; CHECK: %add.repl = fsub double %x, %mul +; CHECK: %mul3 = fmul double %add.repl, %add.repl + ret double %mul3 +} + +; (x + 0.1234 * y) * (x - -0.1234 * y) -> (x + 0.1234 * y) * (x + 0.1234 * y) +define double @lift_sign3(double %x, double %y) nounwind readnone ssp uwtable { +; CHECK-LABEL: @lift_sign3( + %mul = fmul double %y, 1.234000e-01 + %add = fadd double %mul, %x + %mul1 = fmul double %y, -1.234000e-01 + %add2 = fsub double %x, %mul1 + %mul3 = fmul double %add, %add2 +; CHECK-NOT: %mul1 = fmul double %y, -1.234000e-01 +; CHECK-NOT: %add2 = fsub double %x, %mul1 +; CHECK-NOT: %mul3 = fmul double %add, %add2 +; CHECK: %mul3 = fmul double %add, %add + ret double %mul3 +} + +; (x + 0.1234 / y) * (x + -0.1234 / y) -> (x + 0.1234 / y) * (x - 0.1234 / y) +; so CSE can simplify it further +define double @lift_sign4(double %x, double %y) nounwind readnone ssp uwtable { +; CHECK-LABEL: @lift_sign4( + %div = fdiv double 1.234000e-01, %y + %add = fadd double %div, %x + %div1 = fdiv double -1.234000e-01, %y + %add2 = fadd double %div1, %x + %mul3 = fmul double %add, %add2 +; CHECK-NOT: %div1 = fdiv double -1.234000e-01, %y +; CHECK-NOT: %add2 = fadd double %div1, %x +; CHECK-NOT: %mul3 = fmul double %add, %add2 +; CHECK: %add2.repl = fsub double %x, %div +; CHECK: %mul3 = fmul double %add, %add2.repl + ret double %mul3 +}