diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1829,6 +1829,70 @@ break; } + case Intrinsic::matrix_multiply: { + // Optimize negation in matrix multiplication. + // If We have a negated operand where it's size is larger than the second + // operand or the result We can optimize the result by moving the negation + // operation to the smallest operand in the equation: + // Case 1: Both operands are negated + // -A * -B = A * B + // Case 2: the operand has the smallest element count i.e + // (-A) * B = A * (-B) + // Case 3: the result has the smallest element count + // (-A) * B = -(A * B) + + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + Value *Op0NotNeg, *Op1NotNeg; + if (match(Op0, m_FNeg(m_Value(Op0NotNeg))) && + match(Op1, m_FNeg(m_Value(Op1NotNeg)))) { + replaceOperand(*II, 0, Op0NotNeg); + replaceOperand(*II, 1, Op1NotNeg); + return II; + } + + VectorType *RetType = cast(II->getType()); + Value *OpNotNeg, *FirstOperand, *SecondOperand; + unsigned SecondOperandArg; + unsigned FirstOperandArg; + if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) { + FirstOperand = Op0; + SecondOperand = Op1; + FirstOperandArg = 0; + SecondOperandArg = 1; + } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) { + FirstOperand = Op1; + SecondOperand = Op0; + FirstOperandArg = 1; + SecondOperandArg = 0; + } else { + break; + } + + VectorType *FNegType = cast(FirstOperand->getType()); + VectorType *SecondOperandType = cast(SecondOperand->getType()); + if (ElementCount::isKnownGT(FNegType->getElementCount(), + SecondOperandType->getElementCount()) && + ElementCount::isKnownLT(SecondOperandType->getElementCount(), + RetType->getElementCount())) { + + Value *InverseSecondOp = Builder.CreateFNeg(SecondOperand); + replaceOperand(*II, FirstOperandArg, OpNotNeg); + replaceOperand(*II, SecondOperandArg, InverseSecondOp); + return II; + } + if (ElementCount::isKnownGT(FNegType->getElementCount(), + RetType->getElementCount())) { + replaceOperand(*II, FirstOperandArg, OpNotNeg); + // Insert after call instruction. + Builder.SetInsertPoint(II->getNextNode()); + Instruction *FNegInst = cast(Builder.CreateFNeg(II)); + replaceInstUsesWith(*II, FNegInst); + FNegInst->setOperand(0, II); + return II; + } + break; + } case Intrinsic::fmuladd: { // Canonicalize fast fmuladd to the separate fmul + fadd. if (II->isFast()) { diff --git a/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll b/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll --- a/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll +++ b/llvm/test/Transforms/InstCombine/matrix-multiplication-negation.ll @@ -4,9 +4,9 @@ ; The result has the fewest vector elements between the result and the two operands so the negation can be moved there define <2 x double> @test_negation_move_to_result(<6 x double> %a, <3 x double> %b) { ; CHECK-LABEL: @test_negation_move_to_result( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x double> [[RES]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a.neg = fneg <6 x double> %a %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1) @@ -17,9 +17,9 @@ ; Fast flag should be preserved define <2 x double> @test_negation_move_to_result_with_fastflags(<6 x double> %a, <3 x double> %b) { ; CHECK-LABEL: @test_negation_move_to_result_with_fastflags( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: [[RES:%.*]] = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x double> [[RES]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a.neg = fneg <6 x double> %a %res = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1) @@ -29,8 +29,8 @@ ; %b has the fewest vector elements between the result and the two operands so the negation can be moved there define <9 x double> @test_move_negation_to_second_operand(<27 x double> %a, <3 x double> %b) { ; CHECK-LABEL: @test_move_negation_to_second_operand( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]] +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1) ; CHECK-NEXT: ret <9 x double> [[RES]] ; %a.neg = fneg <27 x double> %a @@ -42,8 +42,8 @@ ; Fast flag should be preserved define <9 x double> @test_move_negation_to_second_operand_with_fast_flags(<27 x double> %a, <3 x double> %b) { ; CHECK-LABEL: @test_move_negation_to_second_operand_with_fast_flags( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]] +; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1) ; CHECK-NEXT: ret <9 x double> [[RES]] ; %a.neg = fneg <27 x double> %a @@ -54,9 +54,9 @@ ; The result has the fewest vector elements between the result and the two operands so the negation can be moved there define <2 x double> @test_negation_move_to_result_from_second_operand(<3 x double> %a, <6 x double> %b){ ; CHECK-LABEL: @test_negation_move_to_result_from_second_operand( -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <6 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], <6 x double> [[B_NEG]], i32 1, i32 3, i32 2) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], <6 x double> [[B:%.*]], i32 1, i32 3, i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x double> [[RES]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %b.neg = fneg <6 x double> %b %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> %a, <6 x double> %b.neg, i32 1, i32 3, i32 2) @@ -66,8 +66,8 @@ ; %a has the fewest vector elements between the result and the two operands so the negation can be moved there define <9 x double> @test_move_negation_to_first_operand(<3 x double> %a, <27 x double> %b) { ; CHECK-LABEL: @test_move_negation_to_first_operand( -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <27 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.*]], <27 x double> [[B_NEG]], i32 1, i32 3, i32 9) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[A:%.*]] +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[TMP1]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9) ; CHECK-NEXT: ret <9 x double> [[RES]] ; %b.neg = fneg <27 x double> %b @@ -114,9 +114,7 @@ ; both negations can be deleted define <2 x double> @test_with_two_operands_negated1(<6 x double> %a, <3 x double> %b){ ; CHECK-LABEL: @test_with_two_operands_negated1( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 2, i32 3, i32 1) +; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) ; CHECK-NEXT: ret <2 x double> [[RES]] ; %a.neg = fneg <6 x double> %a @@ -128,9 +126,7 @@ ; both negations will be removed define <9 x double> @test_with_two_operands_negated2(<27 x double> %a, <3 x double> %b){ ; CHECK-LABEL: @test_with_two_operands_negated2( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1) +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) ; CHECK-NEXT: ret <9 x double> [[RES]] ; %a.neg = fneg <27 x double> %a @@ -142,9 +138,7 @@ ; both negations will be removed define <9 x double> @test_with_two_operands_negated_with_fastflags(<27 x double> %a, <3 x double> %b){ ; CHECK-LABEL: @test_with_two_operands_negated_with_fastflags( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1) +; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) ; CHECK-NEXT: ret <9 x double> [[RES]] ; %a.neg = fneg <27 x double> %a @@ -156,9 +150,7 @@ ; both negations should be removed define <9 x double> @test_with_two_operands_negated2_commute(<3 x double> %a, <27 x double> %b){ ; CHECK-LABEL: @test_with_two_operands_negated2_commute( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <27 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A_NEG]], <27 x double> [[B_NEG]], i32 1, i32 3, i32 9) +; CHECK-NEXT: [[RES:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.*]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9) ; CHECK-NEXT: ret <9 x double> [[RES]] ; %a.neg = fneg <3 x double> %a @@ -169,9 +161,7 @@ define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @matrix_multiply_two_operands_negated_with_same_size( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <2 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <2 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A_NEG]], <2 x double> [[B_NEG]], i32 2, i32 1, i32 2) +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 2, i32 1, i32 2) ; CHECK-NEXT: ret <4 x double> [[RES]] ; %a.neg = fneg <2 x double> %a @@ -182,11 +172,9 @@ define <2 x double> @matrix_multiply_two_operands_with_multiple_uses(<6 x double> %a, <3 x double> %b) { ; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]] -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 2, i32 3, i32 1) -; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> undef, <2 x i32> -; CHECK-NEXT: [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]] +; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> +; CHECK-NEXT: [[RES_3:%.*]] = fsub <2 x double> [[RES]], [[TMP1]] ; CHECK-NEXT: ret <2 x double> [[RES_3]] ; %a.neg = fneg <6 x double> %a @@ -202,7 +190,7 @@ ; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses2( ; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] ; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B_NEG]], i32 9, i32 3, i32 1) +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A]], <3 x double> [[B]], i32 9, i32 3, i32 1) ; CHECK-NEXT: store <27 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 256 ; CHECK-NEXT: store <3 x double> [[B_NEG]], ptr [[B_LOC:%.*]], align 32 ; CHECK-NEXT: ret <9 x double> [[RES]] @@ -217,10 +205,10 @@ define <12 x double> @fneg_with_multiple_uses(<15 x double> %a, <20 x double> %b){ ; CHECK-LABEL: @fneg_with_multiple_uses( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) -; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <15 x double> [[A_NEG]], <15 x double> undef, <12 x i32> -; CHECK-NEXT: [[RES_3:%.*]] = fadd <12 x double> [[RES_2]], [[RES]] +; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A:%.*]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <12 x double> [[RES]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <15 x double> [[A]], <15 x double> poison, <12 x i32> +; CHECK-NEXT: [[RES_3:%.*]] = fsub <12 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <12 x double> [[RES_3]] ; %a.neg = fneg <15 x double> %a @@ -234,9 +222,10 @@ define <12 x double> @fneg_with_multiple_uses_2(<15 x double> %a, <20 x double> %b, ptr %a_loc){ ; CHECK-LABEL: @fneg_with_multiple_uses_2( ; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) +; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <12 x double> [[RES]] ; CHECK-NEXT: store <15 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 128 -; CHECK-NEXT: ret <12 x double> [[RES]] +; CHECK-NEXT: ret <12 x double> [[TMP1]] ; %a.neg = fneg <15 x double> %a %res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4) @@ -246,8 +235,8 @@ ; negation should be moved to the second operand given it has the smallest operand count define <72 x double> @chain_of_matrix_mutliplies(<27 x double> %a, <3 x double> %b, <8 x double> %c) { ; CHECK-LABEL: @chain_of_matrix_mutliplies( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A_NEG]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]] +; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1) ; CHECK-NEXT: [[RES_2:%.*]] = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> [[RES]], <8 x double> [[C:%.*]], i32 9, i32 1, i32 8) ; CHECK-NEXT: ret <72 x double> [[RES_2]] ; @@ -261,11 +250,11 @@ ; second negation should be moved to the result of the second multipication define <6 x double> @chain_of_matrix_mutliplies_with_two_negations(<3 x double> %a, <5 x double> %b, <10 x double> %c) { ; CHECK-LABEL: @chain_of_matrix_mutliplies_with_two_negations( -; CHECK-NEXT: [[B_NEG:%.*]] = fneg <5 x double> [[B:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.*]], <5 x double> [[B_NEG]], i32 3, i32 1, i32 5) -; CHECK-NEXT: [[RES_NEG:%.*]] = fneg <15 x double> [[RES]] -; CHECK-NEXT: [[RES_2:%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> [[RES_NEG]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2) -; CHECK-NEXT: ret <6 x double> [[RES_2]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[A:%.*]] +; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[TMP1]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5) +; CHECK-NEXT: [[RES_2:%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> [[RES]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = fneg <6 x double> [[RES_2]] +; CHECK-NEXT: ret <6 x double> [[TMP2]] ; %b.neg = fneg <5 x double> %b %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b.neg, i32 3, i32 1, i32 5) @@ -277,10 +266,10 @@ ; negation should be propagated to the result of the second matrix multiplication define <6 x double> @chain_of_matrix_mutliplies_propagation(<15 x double> %a, <20 x double> %b, <8 x double> %c){ ; CHECK-LABEL: @chain_of_matrix_mutliplies_propagation( -; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]] -; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) +; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A:%.*]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4) ; CHECK-NEXT: [[RES_2:%.*]] = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> [[RES]], <8 x double> [[C:%.*]], i32 3, i32 4, i32 2) -; CHECK-NEXT: ret <6 x double> [[RES_2]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg <6 x double> [[RES_2]] +; CHECK-NEXT: ret <6 x double> [[TMP1]] ; %a.neg = fneg <15 x double> %a %res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)