diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2240,6 +2240,22 @@ if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) return I; + // Try to simplify the underlying FMul. + if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), + II->getFastMathFlags(), + SQ.getWithInstruction(II))) { + Value *RHS, *LHS; + if (match(V, m_FMul(m_Value(LHS), m_Value(RHS)))) { + II->setArgOperand(0, LHS); + II->setArgOperand(1, LHS); + return II; + } + + auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); + FAdd->copyFastMathFlags(II); + return FAdd; + } + // fma fneg(x), fneg(y), z -> fma x, y, z Value *Src0 = II->getArgOperand(0); Value *Src1 = II->getArgOperand(1); @@ -2258,15 +2274,9 @@ return II; } - // fma x, 1, z -> fadd x, z - if (match(Src1, m_FPOne())) { - auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2)); - FAdd->copyFastMathFlags(II); - return FAdd; - } - break; } + case Intrinsic::fabs: { Value *Cond; Constant *LHS, *RHS; diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll --- a/llvm/test/Transforms/InstCombine/fma.ll +++ b/llvm/test/Transforms/InstCombine/fma.ll @@ -182,7 +182,7 @@ define float @fmuladd_fneg_x_fneg_y_fast(float %x, float %y, float %z) { ; CHECK-LABEL: @fmuladd_fneg_x_fneg_y_fast( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[FMULADD:%.*]] = fadd fast float [[TMP1]], [[Z:%.*]] ; CHECK-NEXT: ret float [[FMULADD]] ; @@ -194,7 +194,7 @@ define float @fmuladd_unary_fneg_x_unary_fneg_y_fast(float %x, float %y, float %z) { ; CHECK-LABEL: @fmuladd_unary_fneg_x_unary_fneg_y_fast( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[FMULADD:%.*]] = fadd fast float [[TMP1]], [[Z:%.*]] ; CHECK-NEXT: ret float [[FMULADD]] ; @@ -369,5 +369,72 @@ ret float %fmuladd } +define <2 x double> @fmuladd_a_0_b(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @fmuladd_a_0_b( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <2 x double> [[B:%.*]] +; +entry: + %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> zeroinitializer, <2 x double> %b) + ret <2 x double> %res +} + +define <2 x double> @fmuladd_0_a_b(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @fmuladd_0_a_b( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <2 x double> [[B:%.*]] +; +entry: + %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> %a, <2 x double> %b) + ret <2 x double> %res +} + +define <2 x double> @fmuladd_a_0_b_missing_flags(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @fmuladd_a_0_b_missing_flags( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call nnan <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> zeroinitializer, <2 x double> [[B:%.*]]) +; CHECK-NEXT: ret <2 x double> [[RES]] +; +entry: + %res = call nnan <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> zeroinitializer, <2 x double> %b) + ret <2 x double> %res +} + +declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @fma_a_0_b(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @fma_a_0_b( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <2 x double> [[B:%.*]] +; +entry: + %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> zeroinitializer, <2 x double> %b) + ret <2 x double> %res +} + +define <2 x double> @fma_0_a_b(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @fma_0_a_b( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <2 x double> [[B:%.*]] +; +entry: + %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %a, <2 x double> %b) + ret <2 x double> %res +} + +define <2 x double> @fma_0_a_b_missing_flags(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @fma_0_a_b_missing_flags( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call nsz <2 x double> @llvm.fma.v2f64(<2 x double> [[A:%.*]], <2 x double> zeroinitializer, <2 x double> [[B:%.*]]) +; CHECK-NEXT: ret <2 x double> [[RES]] +; +entry: + %res = call nsz <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %a, <2 x double> %b) + ret <2 x double> %res +} + +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) + + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll --- a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll @@ -68,3 +68,70 @@ for.cond.for.end5_crit_edge: ; preds = %for.inc3 ret void } + +define void @c() { +; CHECK-LABEL: @d( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: br label [[FOR_BODY2_SPLIT:%.*]] +; CHECK: for.body2.preheader: +; CHECK-NEXT: br label [[FOR_BODY2:%.*]] +; CHECK: for.body2: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]] +; CHECK: for.body2.split: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 undef, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4 +; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.inc.split: +; CHECK-NEXT: [[LCSSA_:%.*]] = phi i1 [ [[CMP]], [[FOR_INC3]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i32 [[LSR_IV]], 1 +; CHECK-NEXT: br i1 [[LCSSA_]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]] +; CHECK: for.cond1.for.end_crit_edge: +; CHECK-NEXT: br label [[FOR_INC3]] +; CHECK: for.inc3: +; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1 +; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]] +; CHECK: for.cond.for.end5_crit_edge: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.inc3, %entry + %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ] + br label %for.body2 + +for.body2: ; preds = %for.inc, %for.body + %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ] + br label %for.inc + +for.inc: ; preds = %for.body2 + %idxprom = sext i32 %inc41 to i64 + %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp slt i32 %lsr.iv, 4 + %cmp.ext = zext i1 %cmp to i32 + store i32 %cmp.ext, i32* %arrayidx, align 4 + %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1 + br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge + +for.cond1.for.end_crit_edge: ; preds = %for.inc + br label %for.inc3 + +for.inc3: ; preds = %for.cond1.for.end_crit_edge + %inc4 = add nsw i32 %inc41, 1 + br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge + +for.cond.for.end5_crit_edge: ; preds = %for.inc3 + ret void +}