Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11897,6 +11897,30 @@ return SDValue(); } +// Is this value a constant 1.0? There are multiple ways this can be the case: +// - A straightforward ConstantFPSDNode that is 1.0 +// - A ConstantPoolSDNode that refers to 1.0 in the constant pool +// - A BUILD_VECTOR node that is a splat of a ConstantPoolSDNode that refers to +// 1.0 in the constant pool. +static bool isConstantFPOne(SDValue Op) { + if (BuildVectorSDNode *BV = dyn_cast(Op)) { + SDValue Op0 = BV->getOperand(0); + for (int i = 1, e = BV->getNumOperands(); i < e; i++) { + if (BV->getOperand(i) != Op0) + return false; + } + Op = Op0; + } + + ConstantFPSDNode *CFP = dyn_cast(Op); + if (CFP && CFP->isExactlyValue(1.0)) + return true; + if (ConstantPoolSDNode *CP = dyn_cast(Op)) + if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) + return CFP->isExactlyValue(1.0); + return false; +} + // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) @@ -11915,8 +11939,7 @@ // Skip if current node is a reciprocal. SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - if (N0CFP && N0CFP->isExactlyValue(1.0)) + if (isConstantFPOne(N0)) return SDValue(); // Exit early if the target does not want this transform or if there can't Index: test/CodeGen/PowerPC/repeated-fp-divisors.ll =================================================================== --- test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +define dso_local void @test(float %a, <4 x float>* nocapture %b) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: mr 30, 4 +; CHECK-NEXT: bl test2 +; CHECK-NEXT: nop +; CHECK-NEXT: xscvdpspn 0, 1 +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: lvx 3, 0, 30 +; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: lvx 2, 0, 3 +; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NEXT: lvx 4, 0, 3 +; CHECK-NEXT: xxspltw 0, 0, 0 +; CHECK-NEXT: xvresp 1, 0 +; CHECK-NEXT: xvnmsubasp 34, 1, 0 +; CHECK-NEXT: xvmulsp 0, 35, 36 +; CHECK-NEXT: xvmaddasp 1, 1, 34 +; CHECK-NEXT: xvmulsp 34, 0, 1 +; CHECK-NEXT: stvx 2, 0, 30 +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: blr +entry: + %call = tail call fast float @test2(float %a) #2 + %vecinit = insertelement <4 x float> undef, float %call, i32 0 + %vecinit3 = shufflevector <4 x float> %vecinit, <4 x float> undef, <4 x i32> zeroinitializer + %0 = load <4 x float>, <4 x float>* %b, align 16 + %1 = fmul fast <4 x float> %0, + %mul = fdiv fast <4 x float> %1, %vecinit3 + store <4 x float> %mul, <4 x float>* %b, align 16 + ret void +} + +declare float @test2(float)