Index: llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -528,11 +528,19 @@ } } - Value *Z; - if (match(&I, m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))), - m_Value(Z)))) { - // Sink division: (X / Y) * Z --> (X * Z) / Y - Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I); + // Sink division: (X / Y) * Z --> (X * Z) / Y + // Don't sink if the fdiv is a reciprocal in a different basic block as this + // might pull a division into a loop undoing a transform from LICM. + if (match(Op0, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) && + (!match(X, m_FPOne()) || + cast(Op1)->getParent() == I.getParent())) { + Value *NewFMul = Builder.CreateFMulFMF(X, Op1, &I); + return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I); + } + if (match(Op1, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) && + (!match(X, m_FPOne()) || + cast(Op1)->getParent() == I.getParent())) { + Value *NewFMul = Builder.CreateFMulFMF(X, Op0, &I); return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I); } Index: llvm/test/Transforms/InstCombine/fmul.ll =================================================================== --- llvm/test/Transforms/InstCombine/fmul.ll +++ llvm/test/Transforms/InstCombine/fmul.ll @@ -1027,6 +1027,44 @@ ret float %mul } +; Make sure we don't sink this invariant fdiv into the loop. +define void @fmul_loop_invariant_fdiv(float* %a, float %x) { +; CHECK-LABEL: @fmul_loop_invariant_fdiv( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[X:%.*]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]] +; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], 1024 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + %0 = fdiv fast float 1.000000e+00, %x + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %entry, %for.body + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %idxprom = zext i32 %i.08 to i64 + %arrayidx = getelementptr inbounds float, float* %a, i64 %idxprom + %1 = load float, float* %arrayidx, align 4 + %2 = fmul fast float %1, %0 + store float %2, float* %arrayidx, align 4 + %inc = add nuw nsw i32 %i.08, 1 + %cmp.not = icmp eq i32 %inc, 1024 + br i1 %cmp.not, label %for.cond.cleanup, label %for.body +} + ; Avoid infinite looping by moving negation out of a constant expression. @g = external global {[2 x i8*]}, align 1