diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1829,6 +1829,23 @@ break; } + case Intrinsic::matrix_multiply: { + // Optimize multiplication where both operators are negated + // ~A * ~ B = A * B + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + if (match(Op0, m_FNeg(m_Value())) && match(Op1, m_FNeg(m_Value()))) { + Value *Op0NotNeg = cast(Op0)->getOperand(0); + Value *Op1NotNeg = cast(Op1)->getOperand(0); + Instruction *NewCall = II->clone(); + NewCall->insertAfter(II); + NewCall->setOperand(0, Op0NotNeg); + NewCall->setOperand(1, Op1NotNeg); + + return replaceInstUsesWith(*II, NewCall); + } + break; + } case Intrinsic::fmuladd: { // Canonicalize fast fmuladd to the separate fmul + fadd. if (II->isFast()) { diff --git a/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll b/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll --- a/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll +++ b/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll @@ -114,10 +114,8 @@ ; both negations can be deleted define <2 x double> @test_with_two_operands_negated1(<6 x double> %a, <3 x double> %b){ ; CHECK-LABEL: @test_with_two_operands_negated1( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 2, i32 3, i32 1) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a.neg = fneg <6 x double> %a %b.neg = fneg <3 x double> %b @@ -128,10 +126,8 @@ ; both negations will appear on `%b` other passes should optimise ~~b to b define <9 x double> @test_with_two_operands_negated2(<27 x double> %a, <3 x double> %b){ ; CHECK-LABEL: @test_with_two_operands_negated2( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1) -; CHECK-NEXT: ret <9 x double> [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) +; CHECK-NEXT: ret <9 x double> [[TMP1]] ; %a.neg = fneg <27 x double> %a %b.neg = fneg <3 x double> %b @@ -142,10 +138,8 @@ ; reverse of the previous case both negations should appear on %a define <9 x double> @test_with_two_operands_negated2_commute(<3 x double> %a, <27 x double> %b){ ; CHECK-LABEL: @test_with_two_operands_negated2_commute( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <27 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A_NEG]], <27 x double> [[B_NEG]], i32 1, i32 3, i32 9) -; CHECK-NEXT: ret <9 x double> [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.*]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9) +; CHECK-NEXT: ret <9 x double> [[TMP1]] ; %a.neg = fneg <3 x double> %a %b.neg = fneg <27 x double> %b @@ -154,10 +148,8 @@ } define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @matrix_multiply_two_operands_negated_with_same_size( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <2 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <2 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A_NEG]], <2 x double> [[B_NEG]], i32 2, i32 1, i32 2) -; CHECK-NEXT: ret <4 x double> [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 2, i32 1, i32 2) +; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %a.neg = fneg <2 x double> %a %b.neg = fneg <2 x double> %b @@ -167,11 +159,9 @@ define <2 x double> @matrix_multiply_two_operands_with_multiple_uses(<6 x double> %a, <3 x double> %b) { ; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 2, i32 3, i32 1) -; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> undef, <2 x i32> -; CHECK-NEXT: [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> +; CHECK-NEXT: [[RES_3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x double> [[RES_3]] ; %a.neg = fneg <6 x double> %a