Index: llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -647,12 +648,27 @@ } } + // Sink division: (X / Y) * Z --> (X * Z) / Y + Value *FDiv; Value *Z; - if (match(&I, m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))), - m_Value(Z)))) { - // Sink division: (X / Y) * Z --> (X * Z) / Y - Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I); - return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I); + if (match(&I, + m_c_FMul(m_CombineAnd(m_Value(FDiv), + m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))), + m_Value(Z)))) { + // If we know the fmul lives in a loop then only sink the fdiv if we can + // prove it isn't loop invariant. We'd like to avoid putting an expensive + // math op into a loop that it doesn't need to be in. + // Otherwise, we only attempt to combine the fdiv and fmul if we know they + // live in the same block. + Loop *L = LI ? LI->getLoopFor(I.getParent()) : nullptr; + // The fdiv should always be an instruction so the cast is safe. + bool ShouldSink = + L ? !L->isLoopInvariant(FDiv) + : cast(FDiv)->getParent() == I.getParent(); + if (ShouldSink) { + Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I); + return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I); + } } // sqrt(X) * sqrt(Y) -> sqrt(X * Y) Index: llvm/test/Transforms/InstCombine/fmul.ll =================================================================== --- llvm/test/Transforms/InstCombine/fmul.ll +++ llvm/test/Transforms/InstCombine/fmul.ll @@ -1051,9 +1051,12 @@ ret float %mul } +; In this case the fdiv doesn't get sunk into the loop because +; the fmul and fdiv live in different blocks. define void @fmul_loop_invariant_fdiv(float* %a, float %x) { ; CHECK-LABEL: @fmul_loop_invariant_fdiv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[D:%.*]] = fdiv fast float 1.000000e+00, [[X:%.*]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -1062,7 +1065,7 @@ ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]] ; CHECK-NEXT: [[F:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[M:%.*]] = fdiv fast float [[F]], [[X:%.*]] +; CHECK-NEXT: [[M:%.*]] = fmul fast float [[F]], [[D]] ; CHECK-NEXT: store float [[M]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], 1024 @@ -1087,6 +1090,23 @@ br i1 %cmp.not, label %for.cond.cleanup, label %for.body } +define void @fmul_fdiv_same_block(float* %a, float %x) { +; CHECK-LABEL: @fmul_fdiv_same_block( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F:%.*]] = load float, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[M:%.*]] = fdiv fast float [[F]], [[X:%.*]] +; CHECK-NEXT: store float [[M]], ptr [[A]], align 4 +; CHECK-NEXT: ret void +; +entry: + %d = fdiv fast float 1.0, %x + %arrayidx = getelementptr inbounds float, float* %a, i64 0 + %f = load float, float* %arrayidx, align 4 + %m = fmul fast float %f, %d + store float %m, float* %arrayidx, align 4 + ret void +} + ; Avoid infinite looping by moving negation out of a constant expression. @g = external global {[2 x ptr]}, align 1 Index: llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll +++ llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll @@ -17,19 +17,19 @@ define void @vdiv(ptr %a, float %b) #0 { ; CHECK-LABEL: @vdiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[B:%.*]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast <4 x float> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], [[TMP0]] -; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP1]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -14,6 +14,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-LABEL: @vdiv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIV:%.*]] = fdiv fast double 1.000000e+00, [[A:%.*]] ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: @@ -27,132 +28,115 @@ ; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER15:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[DIV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[DIV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT9]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x double> poison, double [[DIV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT11]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x double> poison, double [[DIV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT13]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT10]] -; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT12]] -; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT14]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 4 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP6]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 8 -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP7]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 12 -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP8]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP1]] -; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[TMP2]] -; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP3]] -; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x double> [[TMP9]], ptr [[TMP13]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 4 -; CHECK-NEXT: store <4 x double> [[TMP10]], ptr [[TMP14]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 8 -; CHECK-NEXT: store <4 x double> [[TMP11]], ptr [[TMP15]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 12 -; CHECK-NEXT: store <4 x double> [[TMP12]], ptr [[TMP16]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 8, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP3]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 12 +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP4]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[BROADCAST_SPLAT10]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[BROADCAST_SPLAT12]] +; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[BROADCAST_SPLAT14]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x double> [[TMP5]], ptr [[TMP9]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 4 +; CHECK-NEXT: store <4 x double> [[TMP6]], ptr [[TMP10]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 8 +; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[TMP11]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 12 +; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[TMP12]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER15]] ; CHECK: for.body.preheader15: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 -; CHECK-NEXT: [[TMP19:%.*]] = add nsw i64 [[TMP18]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 +; CHECK-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP14]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 7 ; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]] -; CHECK: for.body.prol.preheader: -; CHECK-NEXT: [[TMP20:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]] +; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL:%.*]] ; CHECK: for.body.prol: -; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] -; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ] +; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER15]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PREHEADER15]] ] ; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_PROL]] ; CHECK-NEXT: [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP21:%.*]] = fmul fast double [[T0_PROL]], [[TMP20]] +; CHECK-NEXT: [[MUL_PROL:%.*]] = fmul fast double [[T0_PROL]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_PROL]] -; CHECK-NEXT: store double [[TMP21]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL_PROL]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]] ; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.body.prol.loopexit: ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER15]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] -; CHECK-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP19]], 7 -; CHECK-NEXT: br i1 [[TMP22]], label [[FOR_END]], label [[FOR_BODY_PREHEADER15_NEW:%.*]] -; CHECK: for.body.preheader15.new: -; CHECK-NEXT: [[TMP23:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP24:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP25:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP26:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP28:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP30:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i64 [[TMP15]], 7 +; CHECK-NEXT: br i1 [[TMP16]], label [[FOR_END]], label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER15_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_UNR]], [[FOR_BODY_PROL_LOOPEXIT]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP31:%.*]] = fmul fast double [[T0]], [[TMP23]] +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[T0]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store double [[TMP31]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP32:%.*]] = fmul fast double [[T0_1]], [[TMP24]] +; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast double [[T0_1]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: store double [[TMP32]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL_1]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]] ; CHECK-NEXT: [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP33:%.*]] = fmul fast double [[T0_2]], [[TMP25]] +; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast double [[T0_2]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: store double [[TMP33]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL_2]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]] ; CHECK-NEXT: [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP34:%.*]] = fmul fast double [[T0_3]], [[TMP26]] +; CHECK-NEXT: [[MUL_3:%.*]] = fmul fast double [[T0_3]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: store double [[TMP34]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL_3]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]] ; CHECK-NEXT: [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP35:%.*]] = fmul fast double [[T0_4]], [[TMP27]] +; CHECK-NEXT: [[MUL_4:%.*]] = fmul fast double [[T0_4]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]] -; CHECK-NEXT: store double [[TMP35]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL_4]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]] ; CHECK-NEXT: [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP36:%.*]] = fmul fast double [[T0_5]], [[TMP28]] +; CHECK-NEXT: [[MUL_5:%.*]] = fmul fast double [[T0_5]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]] -; CHECK-NEXT: store double [[TMP36]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL_5]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]] ; CHECK-NEXT: [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP37:%.*]] = fmul fast double [[T0_6]], [[TMP29]] +; CHECK-NEXT: [[MUL_6:%.*]] = fmul fast double [[T0_6]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]] -; CHECK-NEXT: store double [[TMP37]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL_6]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]] ; CHECK-NEXT: [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP38:%.*]] = fmul fast double [[T0_7]], [[TMP30]] +; CHECK-NEXT: [[MUL_7:%.*]] = fmul fast double [[T0_7]], [[DIV]] ; CHECK-NEXT: [[ARRAYIDX2_7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]] -; CHECK-NEXT: store double [[TMP38]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[MUL_7]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 ; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] Index: llvm/test/Transforms/PhaseOrdering/lto-licm.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/lto-licm.ll +++ llvm/test/Transforms/PhaseOrdering/lto-licm.ll @@ -4,6 +4,7 @@ define void @hoist_fdiv(ptr %a, float %b) { ; CHECK-LABEL: @hoist_fdiv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[B:%.*]] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] @@ -12,9 +13,9 @@ ; CHECK: for.inc: ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_0]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast float [[TMP0]], [[B:%.*]] -; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]] +; CHECK-NEXT: store float [[TMP2]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: