diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -302,6 +302,17 @@ Type *LHSType = LHSVal->getType(); Type *RHSType = RHSVal->getType(); + // Don't break up floating multipies and adds if they can be contracted. + if (Opc == Instruction::FAdd && FirstInst->hasAllowContract()) { + Instruction *LHSInstr = dyn_cast(LHSVal); + Instruction *RHSInstr = dyn_cast(RHSVal); + if ((LHSInstr && LHSInstr->getOpcode() == Instruction::FMul && + LHSInstr->hasAllowContract()) || + (RHSInstr && RHSInstr->getOpcode() == Instruction::FMul && + RHSInstr->hasAllowContract())) + return nullptr; + } + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast(PN.getIncomingValue(i)); diff --git a/llvm/test/Transforms/InstCombine/phi-preserve-fmas.ll b/llvm/test/Transforms/InstCombine/phi-preserve-fmas.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/phi-preserve-fmas.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S -o - | FileCheck %s +%0 = type { double, double, i32 } + +define dso_local double @test(%0* %arg, %0* %arg1, i32 signext %arg2) { +; CHECK-LABEL: @test( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[I:%.*]] = phi double [ 0.000000e+00, [[BB:%.*]] ], [ [[I52:%.*]], [[BB55:%.*]] ] +; CHECK-NEXT: [[I4:%.*]] = phi i32 [ 0, [[BB]] ], [ [[I56:%.*]], [[BB55]] ] +; CHECK-NEXT: [[I5:%.*]] = phi %0* [ [[ARG1:%.*]], [[BB]] ], [ [[I53:%.*]], [[BB55]] ] +; CHECK-NEXT: [[I6:%.*]] = phi %0* [ [[ARG:%.*]], [[BB]] ], [ [[I54:%.*]], [[BB55]] ] +; CHECK-NEXT: [[I7:%.*]] = icmp slt i32 [[I4]], [[ARG2:%.*]] +; CHECK-NEXT: br i1 [[I7]], label [[BB9:%.*]], label [[BB8:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB57:%.*]] +; CHECK: bb9: +; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[I6]], i64 0, i32 2 +; CHECK-NEXT: [[I11:%.*]] = load i32, i32* [[I10]], align 8 +; CHECK-NEXT: [[I12:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I5]], i64 0, i32 2 +; CHECK-NEXT: [[I13:%.*]] = load i32, i32* [[I12]], align 8 +; CHECK-NEXT: [[I14:%.*]] = icmp eq i32 [[I11]], [[I13]] +; CHECK-NEXT: br i1 [[I14]], label [[BB15:%.*]], label [[BB25:%.*]] +; CHECK: bb15: +; CHECK-NEXT: [[I16:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I6]], i64 0, i32 0 +; CHECK-NEXT: [[I17:%.*]] = load double, double* [[I16]], align 8 +; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I5]], i64 0, i32 0 +; CHECK-NEXT: [[I19:%.*]] = load double, double* [[I18]], align 8 +; CHECK-NEXT: [[I20:%.*]] = fsub fast double [[I17]], [[I19]] +; CHECK-NEXT: [[I21:%.*]] = fmul fast double [[I20]], [[I20]] +; CHECK-NEXT: [[I22:%.*]] = fadd fast double [[I]], [[I21]] +; CHECK-NEXT: [[I23:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I6]], i64 1 +; CHECK-NEXT: [[I24:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I5]], i64 1 +; CHECK-NEXT: br label [[BB51:%.*]] +; CHECK: bb25: +; CHECK-NEXT: [[I26:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I6]], i64 0, i32 2 +; CHECK-NEXT: [[I27:%.*]] = load i32, i32* [[I26]], align 8 +; CHECK-NEXT: [[I28:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I5]], i64 0, i32 2 +; CHECK-NEXT: [[I29:%.*]] = load i32, i32* [[I28]], align 8 +; CHECK-NEXT: [[I30:%.*]] = icmp slt i32 [[I27]], [[I29]] +; CHECK-NEXT: br i1 [[I30]], label [[BB31:%.*]], label [[BB39:%.*]] +; CHECK: bb31: +; CHECK-NEXT: [[I32:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I6]], i64 0, i32 0 +; CHECK-NEXT: [[I33:%.*]] = load double, double* [[I32]], align 8 +; CHECK-NEXT: [[I36:%.*]] = fmul fast double [[I33]], [[I33]] +; CHECK-NEXT: [[I37:%.*]] = fadd fast double [[I]], [[I36]] +; CHECK-NEXT: [[I38:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I6]], i64 1 +; CHECK-NEXT: br label [[BB47:%.*]] +; CHECK: bb39: +; CHECK-NEXT: [[I40:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I5]], i64 0, i32 0 +; CHECK-NEXT: [[I41:%.*]] = load double, double* [[I40]], align 8 +; CHECK-NEXT: [[I44:%.*]] = fmul fast double [[I41]], [[I41]] +; CHECK-NEXT: [[I45:%.*]] = fadd fast double [[I]], [[I44]] +; CHECK-NEXT: [[I46:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I5]], i64 1 +; CHECK-NEXT: br label [[BB47]] +; CHECK: bb47: +; CHECK-NEXT: [[I48:%.*]] = phi double [ [[I37]], [[BB31]] ], [ [[I45]], [[BB39]] ] +; CHECK-NEXT: [[I49:%.*]] = phi %0* [ [[I5]], [[BB31]] ], [ [[I46]], [[BB39]] ] +; CHECK-NEXT: [[I50:%.*]] = phi %0* [ [[I38]], [[BB31]] ], [ [[I6]], [[BB39]] ] +; CHECK-NEXT: br label [[BB51]] +; CHECK: bb51: +; CHECK-NEXT: [[I52]] = phi double [ [[I22]], [[BB15]] ], [ [[I48]], [[BB47]] ] +; CHECK-NEXT: [[I53]] = phi %0* [ [[I24]], [[BB15]] ], [ [[I49]], [[BB47]] ] +; CHECK-NEXT: [[I54]] = phi %0* [ [[I23]], [[BB15]] ], [ [[I50]], [[BB47]] ] +; CHECK-NEXT: br label [[BB55]] +; CHECK: bb55: +; CHECK-NEXT: [[I56]] = add nuw nsw i32 [[I4]], 1 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb57: +; CHECK-NEXT: ret double [[I]] +; +bb: + br label %bb3 + +bb3: ; preds = %bb55, %bb + %i = phi double [ 0.000000e+00, %bb ], [ %i52, %bb55 ] + %i4 = phi i32 [ 0, %bb ], [ %i56, %bb55 ] + %i5 = phi %0* [ %arg1, %bb ], [ %i53, %bb55 ] + %i6 = phi %0* [ %arg, %bb ], [ %i54, %bb55 ] + %i7 = icmp slt i32 %i4, %arg2 + br i1 %i7, label %bb9, label %bb8 + +bb8: ; preds = %bb3 + br label %bb57 + +bb9: ; preds = %bb3 + %i10 = getelementptr inbounds %0, %0* %i6, i32 0, i32 2 + %i11 = load i32, i32* %i10, align 8 + %i12 = getelementptr inbounds %0, %0* %i5, i32 0, i32 2 + %i13 = load i32, i32* %i12, align 8 + %i14 = icmp eq i32 %i11, %i13 + br i1 %i14, label %bb15, label %bb25 + +bb15: ; preds = %bb9 + %i16 = getelementptr inbounds %0, %0* %i6, i32 0, i32 0 + %i17 = load double, double* %i16, align 8 + %i18 = getelementptr inbounds %0, %0* %i5, i32 0, i32 0 + %i19 = load double, double* %i18, align 8 + %i20 = fsub fast double %i17, %i19 + %i21 = fmul fast double %i20, %i20 + %i22 = fadd fast double %i, %i21 + %i23 = getelementptr inbounds %0, %0* %i6, i32 1 + %i24 = getelementptr inbounds %0, %0* %i5, i32 1 + br label %bb51 + +bb25: ; preds = %bb9 + %i26 = getelementptr inbounds %0, %0* %i6, i32 0, i32 2 + %i27 = load i32, i32* %i26, align 8 + %i28 = getelementptr inbounds %0, %0* %i5, i32 0, i32 2 + %i29 = load i32, i32* %i28, align 8 + %i30 = icmp slt i32 %i27, %i29 + br i1 %i30, label %bb31, label %bb39 + +bb31: ; preds = %bb25 + %i32 = getelementptr inbounds %0, %0* %i6, i32 0, i32 0 + %i33 = load double, double* %i32, align 8 + %i34 = getelementptr inbounds %0, %0* %i6, i32 0, i32 0 + %i35 = load double, double* %i34, align 8 + %i36 = fmul fast double %i33, %i35 + %i37 = fadd fast double %i, %i36 + %i38 = getelementptr inbounds %0, %0* %i6, i32 1 + br label %bb47 + +bb39: ; preds = %bb25 + %i40 = getelementptr inbounds %0, %0* %i5, i32 0, i32 0 + %i41 = load double, double* %i40, align 8 + %i42 = getelementptr inbounds %0, %0* %i5, i32 0, i32 0 + %i43 = load double, double* %i42, align 8 + %i44 = fmul fast double %i41, %i43 + %i45 = fadd fast double %i, %i44 + %i46 = getelementptr inbounds %0, %0* %i5, i32 1 + br label %bb47 + +bb47: ; preds = %bb39, %bb31 + %i48 = phi double [ %i37, %bb31 ], [ %i45, %bb39 ] + %i49 = phi %0* [ %i5, %bb31 ], [ %i46, %bb39 ] + %i50 = phi %0* [ %i38, %bb31 ], [ %i6, %bb39 ] + br label %bb51 + +bb51: ; preds = %bb47, %bb15 + %i52 = phi double [ %i22, %bb15 ], [ %i48, %bb47 ] + %i53 = phi %0* [ %i24, %bb15 ], [ %i49, %bb47 ] + %i54 = phi %0* [ %i23, %bb15 ], [ %i50, %bb47 ] + br label %bb55 + +bb55: ; preds = %bb51 + %i56 = add nsw i32 %i4, 1 + br label %bb3 + +bb57: ; preds = %bb8 + ret double %i +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #0