diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -403,6 +403,19 @@ /// Map from instructions to their produced column matrix. MapVector Inst2ColumnMatrix; +private: + static FastMathFlags getFastMathFlags(Instruction *Inst) { + FastMathFlags FMF; + + if (isa(*Inst)) { + FMF = Inst->getFastMathFlags(); + } + + FMF.setAllowContract(AllowContractEnabled || FMF.allowContract()); + + return FMF; + } + public: LowerMatrixIntrinsics(Function &F, TargetTransformInfo &TTI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, @@ -1149,9 +1162,8 @@ /// column-major. If \p IsScalarMatrixTransposed we assume the appropriate /// operand is transposed. void emitMatrixMultiply(MatrixTy &Result, const MatrixTy &A, - const MatrixTy &B, bool AllowContraction, - IRBuilder<> &Builder, bool IsTiled, - bool IsScalarMatrixTransposed) { + const MatrixTy &B, IRBuilder<> &Builder, bool IsTiled, + bool IsScalarMatrixTransposed, FastMathFlags FMF) { const unsigned VF = std::max( TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) .getFixedSize() / @@ -1166,6 +1178,9 @@ Result.isColumnMajor() == A.isColumnMajor() && "operands must agree on matrix layout"); unsigned NumComputeOps = 0; + + Builder.setFastMathFlags(FMF); + if (A.isColumnMajor()) { // Multiply columns from the first operand with scalars from the second // operand. Then move along the K axes and accumulate the columns. With @@ -1188,9 +1203,9 @@ B.getColumn(IsScalarMatrixTransposed ? K : J), IsScalarMatrixTransposed ? J : K); Value *Splat = Builder.CreateVectorSplat(BlockSize, RH, "splat"); - Sum = createMulAdd(isSumZero && K == 0 ? nullptr : Sum, L, Splat, - Result.getElementType()->isFloatingPointTy(), - Builder, AllowContraction, NumComputeOps); + Sum = + createMulAdd(isSumZero && K == 0 ? nullptr : Sum, L, Splat, + IsFP, Builder, FMF.allowContract(), NumComputeOps); } Result.setVector(J, insertVector(Result.getVector(J), I, Sum, Builder)); @@ -1215,8 +1230,9 @@ A.getVector(IsScalarMatrixTransposed ? K : I), IsScalarMatrixTransposed ? I : K); Value *Splat = Builder.CreateVectorSplat(BlockSize, LH, "splat"); - Sum = createMulAdd(isSumZero && K == 0 ? nullptr : Sum, Splat, R, - IsFP, Builder, AllowContraction, NumComputeOps); + Sum = + createMulAdd(isSumZero && K == 0 ? nullptr : Sum, Splat, R, + IsFP, Builder, FMF.allowContract(), NumComputeOps); } Result.setVector(I, insertVector(Result.getVector(I), J, Sum, Builder)); @@ -1354,8 +1370,7 @@ } void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape, - Value *RPtr, ShapeInfo RShape, StoreInst *Store, - bool AllowContract) { + Value *RPtr, ShapeInfo RShape, StoreInst *Store) { auto *EltType = cast(MatMul->getType())->getElementType(); // Create the main tiling loop nest. @@ -1391,7 +1406,8 @@ {TileSize, TileSize}, EltType, Builder); MatrixTy B = loadMatrix(RPtr, {}, false, RShape, TI.CurrentK, TI.CurrentCol, {TileSize, TileSize}, EltType, Builder); - emitMatrixMultiply(TileResult, A, B, AllowContract, Builder, true, false); + emitMatrixMultiply(TileResult, A, B, Builder, true, false, + getFastMathFlags(MatMul)); // Store result after the inner loop is done. Builder.SetInsertPoint(TI.RowLoopLatch->getTerminator()); storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(), @@ -1430,11 +1446,8 @@ Value *BPtr = getNonAliasingPointer(LoadOp1, Store, MatMul); Value *CPtr = Store->getPointerOperand(); - bool AllowContract = AllowContractEnabled || (isa(MatMul) && - MatMul->hasAllowContract()); if (TileUseLoops && (R % TileSize == 0 && C % TileSize == 0)) - createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store, - AllowContract); + createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store); else { IRBuilder<> Builder(Store); for (unsigned J = 0; J < C; J += TileSize) @@ -1453,7 +1466,8 @@ loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(), RShape, Builder.getInt64(K), Builder.getInt64(J), {TileM, TileC}, EltType, Builder); - emitMatrixMultiply(Res, A, B, AllowContract, Builder, true, false); + emitMatrixMultiply(Res, A, B, Builder, true, false, + getFastMathFlags(MatMul)); } storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M}, Builder.getInt64(I), Builder.getInt64(J), EltType, @@ -1520,10 +1534,8 @@ // Initialize the output MatrixTy Result(R, C, EltType); - bool AllowContract = - AllowContractEnabled || - (isa(MatMul) && MatMul->hasAllowContract()); - emitMatrixMultiply(Result, MA, MB, AllowContract, Builder, false, true); + emitMatrixMultiply(Result, MA, MB, Builder, false, true, + getFastMathFlags(MatMul)); FusedInsts.insert(MatMul); FusedInsts.insert(cast(Transpose)); @@ -1578,9 +1590,8 @@ assert(Lhs.getElementType() == Result.getElementType() && "Matrix multiply result element type does not match arguments."); - bool AllowContract = AllowContractEnabled || (isa(MatMul) && - MatMul->hasAllowContract()); - emitMatrixMultiply(Result, Lhs, Rhs, AllowContract, Builder, false, false); + emitMatrixMultiply(Result, Lhs, Rhs, Builder, false, false, + getFastMathFlags(MatMul)); finalizeLowering(MatMul, Result, Builder); } @@ -1665,6 +1676,8 @@ Result.isColumnMajor() == A.isColumnMajor() && "operands must agree on matrix layout"); + Builder.setFastMathFlags(getFastMathFlags(Inst)); + // Helper to perform binary op on vectors. auto BuildVectorOp = [&Builder, Inst](Value *LHS, Value *RHS) { switch (Inst->getOpcode()) { @@ -1709,6 +1722,8 @@ MatrixTy Result; MatrixTy M = getMatrix(Op, Shape, Builder); + Builder.setFastMathFlags(getFastMathFlags(Inst)); + // Helper to perform unary op on vectors. auto BuildVectorOp = [&Builder, Inst](Value *Op) { switch (Inst->getOpcode()) { diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll @@ -14,48 +14,48 @@ ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul contract <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]] +; CHECK-NEXT: [[TMP13:%.*]] = fmul contract <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]]) ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul contract <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP23]], <4 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll @@ -14,48 +14,48 @@ ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul contract <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]] +; CHECK-NEXT: [[TMP13:%.*]] = fmul contract <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]]) ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul contract <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP23]], <4 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll @@ -14,48 +14,48 @@ ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul contract <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul contract <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]] +; CHECK-NEXT: [[TMP13:%.*]] = fmul contract <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]]) ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul contract <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP23]], <4 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll @@ -14,48 +14,48 @@ ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul contract <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul contract <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]] ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]] +; CHECK-NEXT: [[TMP13:%.*]] = fmul contract <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]] ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]]) ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul contract <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP23]], <4 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll @@ -43,18 +43,18 @@ ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[RESULT_VEC_0]]) +; CHECK-NEXT: [[TMP6:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[RESULT_VEC_0]]) ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP7]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP6]]) +; CHECK-NEXT: [[TMP7]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP6]]) ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[RESULT_VEC_1]]) +; CHECK-NEXT: [[TMP8:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[RESULT_VEC_1]]) ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP9]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP8]]) +; CHECK-NEXT: [[TMP9]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP8]]) ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_STEP]], 4 -; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND_NOT:%.*]] = icmp eq i64 [[ROWS_STEP]], 4 @@ -140,7 +140,7 @@ ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_STEP]], 4 -; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND_NOT:%.*]] = icmp eq i64 [[ROWS_IV]], 0 @@ -232,7 +232,7 @@ ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_IV]], 0 -; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], [[LOOP3:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND_NOT:%.*]] = icmp eq i64 [[ROWS_STEP]], 4 @@ -344,18 +344,18 @@ ; CHECK-NEXT: [[VEC_CAST14:%.*]] = bitcast float* [[VEC_GEP13]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD15:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST14]], align 4 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT]], <2 x float> [[RESULT_VEC_0]]) +; CHECK-NEXT: [[TMP18:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT]], <2 x float> [[RESULT_VEC_0]]) ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP19]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT19]], <2 x float> [[TMP18]]) +; CHECK-NEXT: [[TMP19]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT19]], <2 x float> [[TMP18]]) ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT23]], <2 x float> [[RESULT_VEC_1]]) +; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT23]], <2 x float> [[RESULT_VEC_1]]) ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP21]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT26]], <2 x float> [[TMP20]]) +; CHECK-NEXT: [[TMP21]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT26]], <2 x float> [[TMP20]]) ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_IV]], 0 -; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND_NOT:%.*]] = icmp eq i64 [[ROWS_IV]], 0 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll @@ -67,13 +67,13 @@ ; CHECK-NEXT: [[VEC_CAST14:%.*]] = bitcast double* [[VEC_GEP13]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD15:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST14]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD12]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <2 x double> [[COL_LOAD12]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT18]], <2 x double> [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT18]], <2 x double> [[TMP12]]) ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <2 x double> [[COL_LOAD15]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT21]] +; CHECK-NEXT: [[TMP14:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <2 x double> [[COL_LOAD15]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT24]], <2 x double> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT24]], <2 x double> [[TMP14]]) ; CHECK-NEXT: [[VEC_CAST26:%.*]] = bitcast <9 x double>* [[C]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[VEC_CAST26]], align 8 ; CHECK-NEXT: [[VEC_GEP27:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 3 @@ -91,13 +91,13 @@ ; CHECK-NEXT: [[VEC_CAST39:%.*]] = bitcast double* [[VEC_GEP38]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD40:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST39]], align 8 ; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = shufflevector <2 x double> [[COL_LOAD37]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[COL_LOAD31]], [[SPLAT_SPLATINSERT42]] +; CHECK-NEXT: [[TMP17:%.*]] = fmul contract <1 x double> [[COL_LOAD31]], [[SPLAT_SPLATINSERT42]] ; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = shufflevector <2 x double> [[COL_LOAD37]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD34]], <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD34]], <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> [[TMP17]]) ; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = shufflevector <2 x double> [[COL_LOAD40]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[COL_LOAD31]], [[SPLAT_SPLATINSERT48]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul contract <1 x double> [[COL_LOAD31]], [[SPLAT_SPLATINSERT48]] ; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = shufflevector <2 x double> [[COL_LOAD40]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD34]], <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> [[TMP19]]) +; CHECK-NEXT: [[TMP20:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD34]], <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> [[TMP19]]) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 2 ; CHECK-NEXT: [[VEC_CAST54:%.*]] = bitcast double* [[TMP21]] to <1 x double>* ; CHECK-NEXT: store <1 x double> [[TMP18]], <1 x double>* [[VEC_CAST54]], align 8 @@ -113,9 +113,9 @@ ; CHECK-NEXT: [[VEC_CAST64:%.*]] = bitcast double* [[TMP22]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD65:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST64]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <2 x double> [[COL_LOAD65]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[COL_LOAD59]], [[SPLAT_SPLAT68]] +; CHECK-NEXT: [[TMP23:%.*]] = fmul contract <2 x double> [[COL_LOAD59]], [[SPLAT_SPLAT68]] ; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <2 x double> [[COL_LOAD65]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD62]], <2 x double> [[SPLAT_SPLAT71]], <2 x double> [[TMP23]]) +; CHECK-NEXT: [[TMP24:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD62]], <2 x double> [[SPLAT_SPLAT71]], <2 x double> [[TMP23]]) ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 6 ; CHECK-NEXT: [[VEC_CAST73:%.*]] = bitcast double* [[TMP25]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP24]], <2 x double>* [[VEC_CAST73]], align 8 @@ -129,16 +129,16 @@ ; CHECK-NEXT: [[VEC_CAST81:%.*]] = bitcast double* [[TMP27]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD82:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST81]], align 8 ; CHECK-NEXT: [[SPLAT_SPLATINSERT84:%.*]] = shufflevector <2 x double> [[COL_LOAD82]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP28:%.*]] = fmul <1 x double> [[COL_LOAD76]], [[SPLAT_SPLATINSERT84]] +; CHECK-NEXT: [[TMP28:%.*]] = fmul contract <1 x double> [[COL_LOAD76]], [[SPLAT_SPLATINSERT84]] ; CHECK-NEXT: [[SPLAT_SPLATINSERT87:%.*]] = shufflevector <2 x double> [[COL_LOAD82]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD79]], <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> [[TMP28]]) +; CHECK-NEXT: [[TMP29:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD79]], <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> [[TMP28]]) ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 8 ; CHECK-NEXT: [[VEC_CAST90:%.*]] = bitcast double* [[TMP30]] to <1 x double>* ; CHECK-NEXT: store <1 x double> [[TMP29]], <1 x double>* [[VEC_CAST90]], align 8 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[TMP31:%.*]] = fadd <3 x double> [[COL_LOAD196]], [[COL_LOAD196]] -; CHECK-NEXT: [[TMP32:%.*]] = fadd <3 x double> [[COL_LOAD199]], [[COL_LOAD199]] +; CHECK-NEXT: [[TMP31:%.*]] = fadd contract <3 x double> [[COL_LOAD196]], [[COL_LOAD196]] +; CHECK-NEXT: [[TMP32:%.*]] = fadd contract <3 x double> [[COL_LOAD199]], [[COL_LOAD199]] ; CHECK-NEXT: [[VEC_CAST213:%.*]] = bitcast <6 x double>* [[A]] to <3 x double>* ; CHECK-NEXT: store <3 x double> [[TMP31]], <3 x double>* [[VEC_CAST213]], align 8 ; CHECK-NEXT: [[VEC_GEP214:%.*]] = getelementptr <6 x double>, <6 x double>* [[A]], i64 0, i64 3 @@ -146,9 +146,9 @@ ; CHECK-NEXT: store <3 x double> [[TMP32]], <3 x double>* [[VEC_CAST215]], align 8 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: false: -; CHECK-NEXT: [[TMP33:%.*]] = fadd <2 x double> [[COL_LOAD201]], [[COL_LOAD201]] -; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[COL_LOAD204]], [[COL_LOAD204]] -; CHECK-NEXT: [[TMP35:%.*]] = fadd <2 x double> [[COL_LOAD207]], [[COL_LOAD207]] +; CHECK-NEXT: [[TMP33:%.*]] = fadd contract <2 x double> [[COL_LOAD201]], [[COL_LOAD201]] +; CHECK-NEXT: [[TMP34:%.*]] = fadd contract <2 x double> [[COL_LOAD204]], [[COL_LOAD204]] +; CHECK-NEXT: [[TMP35:%.*]] = fadd contract <2 x double> [[COL_LOAD207]], [[COL_LOAD207]] ; CHECK-NEXT: [[VEC_CAST208:%.*]] = bitcast <6 x double>* [[B]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP33]], <2 x double>* [[VEC_CAST208]], align 8 ; CHECK-NEXT: [[VEC_GEP209:%.*]] = getelementptr <6 x double>, <6 x double>* [[B]], i64 0, i64 2 @@ -204,13 +204,13 @@ ; CHECK-NEXT: [[VEC_CAST115:%.*]] = bitcast double* [[VEC_GEP114]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD116:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST115]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT119:%.*]] = shufflevector <2 x double> [[COL_LOAD113]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = fmul <2 x double> [[COL_LOAD107]], [[SPLAT_SPLAT119]] +; CHECK-NEXT: [[TMP48:%.*]] = fmul contract <2 x double> [[COL_LOAD107]], [[SPLAT_SPLAT119]] ; CHECK-NEXT: [[SPLAT_SPLAT122:%.*]] = shufflevector <2 x double> [[COL_LOAD113]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP49:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD110]], <2 x double> [[SPLAT_SPLAT122]], <2 x double> [[TMP48]]) +; CHECK-NEXT: [[TMP49:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD110]], <2 x double> [[SPLAT_SPLAT122]], <2 x double> [[TMP48]]) ; CHECK-NEXT: [[SPLAT_SPLAT125:%.*]] = shufflevector <2 x double> [[COL_LOAD116]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = fmul <2 x double> [[COL_LOAD107]], [[SPLAT_SPLAT125]] +; CHECK-NEXT: [[TMP50:%.*]] = fmul contract <2 x double> [[COL_LOAD107]], [[SPLAT_SPLAT125]] ; CHECK-NEXT: [[SPLAT_SPLAT128:%.*]] = shufflevector <2 x double> [[COL_LOAD116]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD110]], <2 x double> [[SPLAT_SPLAT128]], <2 x double> [[TMP50]]) +; CHECK-NEXT: [[TMP51:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD110]], <2 x double> [[SPLAT_SPLAT128]], <2 x double> [[TMP50]]) ; CHECK-NEXT: [[VEC_CAST130:%.*]] = bitcast <9 x double>* [[C]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP49]], <2 x double>* [[VEC_CAST130]], align 8 ; CHECK-NEXT: [[VEC_GEP131:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 3 @@ -228,13 +228,13 @@ ; CHECK-NEXT: [[VEC_CAST143:%.*]] = bitcast double* [[VEC_GEP142]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD144:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST143]], align 8 ; CHECK-NEXT: [[SPLAT_SPLATINSERT146:%.*]] = shufflevector <2 x double> [[COL_LOAD141]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[COL_LOAD135]], [[SPLAT_SPLATINSERT146]] +; CHECK-NEXT: [[TMP53:%.*]] = fmul contract <1 x double> [[COL_LOAD135]], [[SPLAT_SPLATINSERT146]] ; CHECK-NEXT: [[SPLAT_SPLATINSERT149:%.*]] = shufflevector <2 x double> [[COL_LOAD141]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP54:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD138]], <1 x double> [[SPLAT_SPLATINSERT149]], <1 x double> [[TMP53]]) +; CHECK-NEXT: [[TMP54:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD138]], <1 x double> [[SPLAT_SPLATINSERT149]], <1 x double> [[TMP53]]) ; CHECK-NEXT: [[SPLAT_SPLATINSERT152:%.*]] = shufflevector <2 x double> [[COL_LOAD144]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = fmul <1 x double> [[COL_LOAD135]], [[SPLAT_SPLATINSERT152]] +; CHECK-NEXT: [[TMP55:%.*]] = fmul contract <1 x double> [[COL_LOAD135]], [[SPLAT_SPLATINSERT152]] ; CHECK-NEXT: [[SPLAT_SPLATINSERT155:%.*]] = shufflevector <2 x double> [[COL_LOAD144]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP56:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD138]], <1 x double> [[SPLAT_SPLATINSERT155]], <1 x double> [[TMP55]]) +; CHECK-NEXT: [[TMP56:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD138]], <1 x double> [[SPLAT_SPLATINSERT155]], <1 x double> [[TMP55]]) ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 2 ; CHECK-NEXT: [[VEC_CAST158:%.*]] = bitcast double* [[TMP57]] to <1 x double>* ; CHECK-NEXT: store <1 x double> [[TMP54]], <1 x double>* [[VEC_CAST158]], align 8 @@ -250,9 +250,9 @@ ; CHECK-NEXT: [[VEC_CAST168:%.*]] = bitcast double* [[TMP58]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD169:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST168]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT172:%.*]] = shufflevector <2 x double> [[COL_LOAD169]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP59:%.*]] = fmul <2 x double> [[COL_LOAD163]], [[SPLAT_SPLAT172]] +; CHECK-NEXT: [[TMP59:%.*]] = fmul contract <2 x double> [[COL_LOAD163]], [[SPLAT_SPLAT172]] ; CHECK-NEXT: [[SPLAT_SPLAT175:%.*]] = shufflevector <2 x double> [[COL_LOAD169]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD166]], <2 x double> [[SPLAT_SPLAT175]], <2 x double> [[TMP59]]) +; CHECK-NEXT: [[TMP60:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD166]], <2 x double> [[SPLAT_SPLAT175]], <2 x double> [[TMP59]]) ; CHECK-NEXT: [[TMP61:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 6 ; CHECK-NEXT: [[VEC_CAST177:%.*]] = bitcast double* [[TMP61]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP60]], <2 x double>* [[VEC_CAST177]], align 8 @@ -266,9 +266,9 @@ ; CHECK-NEXT: [[VEC_CAST185:%.*]] = bitcast double* [[TMP63]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD186:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST185]], align 8 ; CHECK-NEXT: [[SPLAT_SPLATINSERT188:%.*]] = shufflevector <2 x double> [[COL_LOAD186]], <2 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[COL_LOAD180]], [[SPLAT_SPLATINSERT188]] +; CHECK-NEXT: [[TMP64:%.*]] = fmul contract <1 x double> [[COL_LOAD180]], [[SPLAT_SPLATINSERT188]] ; CHECK-NEXT: [[SPLAT_SPLATINSERT191:%.*]] = shufflevector <2 x double> [[COL_LOAD186]], <2 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD183]], <1 x double> [[SPLAT_SPLATINSERT191]], <1 x double> [[TMP64]]) +; CHECK-NEXT: [[TMP65:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD183]], <1 x double> [[SPLAT_SPLATINSERT191]], <1 x double> [[TMP64]]) ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr <9 x double>, <9 x double>* [[C]], i64 0, i64 8 ; CHECK-NEXT: [[VEC_CAST194:%.*]] = bitcast double* [[TMP66]] to <1 x double>* ; CHECK-NEXT: store <1 x double> [[TMP65]], <1 x double>* [[VEC_CAST194]], align 8 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll @@ -53,12 +53,12 @@ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) +; CHECK-NEXT: [[TMP11:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> @@ -66,19 +66,19 @@ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) +; CHECK-NEXT: [[TMP17:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP19:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 -; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 @@ -161,12 +161,12 @@ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) +; CHECK-NEXT: [[TMP11:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> @@ -174,19 +174,19 @@ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) +; CHECK-NEXT: [[TMP17:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP19:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 -; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 @@ -268,12 +268,12 @@ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) +; CHECK-NEXT: [[TMP11:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> @@ -281,19 +281,19 @@ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) +; CHECK-NEXT: [[TMP17:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP19:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 -; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], [[LOOP3:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 @@ -375,12 +375,12 @@ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) +; CHECK-NEXT: [[TMP11:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> @@ -388,19 +388,19 @@ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) +; CHECK-NEXT: [[TMP17:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]]) ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP19:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]]) ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: br label [[INNER_LATCH]] ; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 -; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: rows.latch: ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll @@ -57,13 +57,13 @@ ; CHECK-NEXT: [[VEC_CAST14:%.*]] = bitcast double* [[VEC_GEP13]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD15:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST14]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD12]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <2 x double> [[COL_LOAD12]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT18]], <2 x double> [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT18]], <2 x double> [[TMP12]]) ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <2 x double> [[COL_LOAD15]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT21]] +; CHECK-NEXT: [[TMP14:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <2 x double> [[COL_LOAD15]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT24]], <2 x double> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD9]], <2 x double> [[SPLAT_SPLAT24]], <2 x double> [[TMP14]]) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP5]], i64 0, i64 8 ; CHECK-NEXT: [[VEC_CAST26:%.*]] = bitcast double* [[TMP16]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD27:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST26]], align 8 @@ -77,13 +77,13 @@ ; CHECK-NEXT: [[VEC_CAST35:%.*]] = bitcast double* [[VEC_GEP34]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD36:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST35]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x double> [[COL_LOAD33]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD27]], <2 x double> [[SPLAT_SPLAT40]], <2 x double> [[TMP13]]) +; CHECK-NEXT: [[TMP18:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD27]], <2 x double> [[SPLAT_SPLAT40]], <2 x double> [[TMP13]]) ; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <2 x double> [[COL_LOAD33]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD30]], <2 x double> [[SPLAT_SPLAT43]], <2 x double> [[TMP18]]) +; CHECK-NEXT: [[TMP19:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD30]], <2 x double> [[SPLAT_SPLAT43]], <2 x double> [[TMP18]]) ; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x double> [[COL_LOAD36]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD27]], <2 x double> [[SPLAT_SPLAT47]], <2 x double> [[TMP15]]) +; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD27]], <2 x double> [[SPLAT_SPLAT47]], <2 x double> [[TMP15]]) ; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <2 x double> [[COL_LOAD36]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD30]], <2 x double> [[SPLAT_SPLAT50]], <2 x double> [[TMP20]]) +; CHECK-NEXT: [[TMP21:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD30]], <2 x double> [[SPLAT_SPLAT50]], <2 x double> [[TMP20]]) ; CHECK-NEXT: [[VEC_CAST52:%.*]] = bitcast <16 x double>* [[C]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP19]], <2 x double>* [[VEC_CAST52]], align 8 ; CHECK-NEXT: [[VEC_GEP53:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 4 @@ -101,13 +101,13 @@ ; CHECK-NEXT: [[VEC_CAST65:%.*]] = bitcast double* [[VEC_GEP64]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD66:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST65]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT69:%.*]] = shufflevector <2 x double> [[COL_LOAD63]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[COL_LOAD57]], [[SPLAT_SPLAT69]] +; CHECK-NEXT: [[TMP23:%.*]] = fmul contract <2 x double> [[COL_LOAD57]], [[SPLAT_SPLAT69]] ; CHECK-NEXT: [[SPLAT_SPLAT72:%.*]] = shufflevector <2 x double> [[COL_LOAD63]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD60]], <2 x double> [[SPLAT_SPLAT72]], <2 x double> [[TMP23]]) +; CHECK-NEXT: [[TMP24:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD60]], <2 x double> [[SPLAT_SPLAT72]], <2 x double> [[TMP23]]) ; CHECK-NEXT: [[SPLAT_SPLAT75:%.*]] = shufflevector <2 x double> [[COL_LOAD66]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP25:%.*]] = fmul <2 x double> [[COL_LOAD57]], [[SPLAT_SPLAT75]] +; CHECK-NEXT: [[TMP25:%.*]] = fmul contract <2 x double> [[COL_LOAD57]], [[SPLAT_SPLAT75]] ; CHECK-NEXT: [[SPLAT_SPLAT78:%.*]] = shufflevector <2 x double> [[COL_LOAD66]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD60]], <2 x double> [[SPLAT_SPLAT78]], <2 x double> [[TMP25]]) +; CHECK-NEXT: [[TMP26:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD60]], <2 x double> [[SPLAT_SPLAT78]], <2 x double> [[TMP25]]) ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP5]], i64 0, i64 10 ; CHECK-NEXT: [[VEC_CAST80:%.*]] = bitcast double* [[TMP27]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD81:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST80]], align 8 @@ -121,13 +121,13 @@ ; CHECK-NEXT: [[VEC_CAST89:%.*]] = bitcast double* [[VEC_GEP88]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD90:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST89]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT94:%.*]] = shufflevector <2 x double> [[COL_LOAD87]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP29:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD81]], <2 x double> [[SPLAT_SPLAT94]], <2 x double> [[TMP24]]) +; CHECK-NEXT: [[TMP29:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD81]], <2 x double> [[SPLAT_SPLAT94]], <2 x double> [[TMP24]]) ; CHECK-NEXT: [[SPLAT_SPLAT97:%.*]] = shufflevector <2 x double> [[COL_LOAD87]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP30:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD84]], <2 x double> [[SPLAT_SPLAT97]], <2 x double> [[TMP29]]) +; CHECK-NEXT: [[TMP30:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD84]], <2 x double> [[SPLAT_SPLAT97]], <2 x double> [[TMP29]]) ; CHECK-NEXT: [[SPLAT_SPLAT101:%.*]] = shufflevector <2 x double> [[COL_LOAD90]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD81]], <2 x double> [[SPLAT_SPLAT101]], <2 x double> [[TMP26]]) +; CHECK-NEXT: [[TMP31:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD81]], <2 x double> [[SPLAT_SPLAT101]], <2 x double> [[TMP26]]) ; CHECK-NEXT: [[SPLAT_SPLAT104:%.*]] = shufflevector <2 x double> [[COL_LOAD90]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP32:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD84]], <2 x double> [[SPLAT_SPLAT104]], <2 x double> [[TMP31]]) +; CHECK-NEXT: [[TMP32:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD84]], <2 x double> [[SPLAT_SPLAT104]], <2 x double> [[TMP31]]) ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 2 ; CHECK-NEXT: [[VEC_CAST106:%.*]] = bitcast double* [[TMP33]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP30]], <2 x double>* [[VEC_CAST106]], align 8 @@ -146,13 +146,13 @@ ; CHECK-NEXT: [[VEC_CAST119:%.*]] = bitcast double* [[VEC_GEP118]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD120:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST119]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT123:%.*]] = shufflevector <2 x double> [[COL_LOAD117]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP35:%.*]] = fmul <2 x double> [[COL_LOAD111]], [[SPLAT_SPLAT123]] +; CHECK-NEXT: [[TMP35:%.*]] = fmul contract <2 x double> [[COL_LOAD111]], [[SPLAT_SPLAT123]] ; CHECK-NEXT: [[SPLAT_SPLAT126:%.*]] = shufflevector <2 x double> [[COL_LOAD117]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD114]], <2 x double> [[SPLAT_SPLAT126]], <2 x double> [[TMP35]]) +; CHECK-NEXT: [[TMP36:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD114]], <2 x double> [[SPLAT_SPLAT126]], <2 x double> [[TMP35]]) ; CHECK-NEXT: [[SPLAT_SPLAT129:%.*]] = shufflevector <2 x double> [[COL_LOAD120]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP37:%.*]] = fmul <2 x double> [[COL_LOAD111]], [[SPLAT_SPLAT129]] +; CHECK-NEXT: [[TMP37:%.*]] = fmul contract <2 x double> [[COL_LOAD111]], [[SPLAT_SPLAT129]] ; CHECK-NEXT: [[SPLAT_SPLAT132:%.*]] = shufflevector <2 x double> [[COL_LOAD120]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD114]], <2 x double> [[SPLAT_SPLAT132]], <2 x double> [[TMP37]]) +; CHECK-NEXT: [[TMP38:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD114]], <2 x double> [[SPLAT_SPLAT132]], <2 x double> [[TMP37]]) ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP5]], i64 0, i64 8 ; CHECK-NEXT: [[VEC_CAST134:%.*]] = bitcast double* [[TMP39]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD135:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST134]], align 8 @@ -166,13 +166,13 @@ ; CHECK-NEXT: [[VEC_CAST143:%.*]] = bitcast double* [[VEC_GEP142]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD144:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST143]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT148:%.*]] = shufflevector <2 x double> [[COL_LOAD141]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP41:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD135]], <2 x double> [[SPLAT_SPLAT148]], <2 x double> [[TMP36]]) +; CHECK-NEXT: [[TMP41:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD135]], <2 x double> [[SPLAT_SPLAT148]], <2 x double> [[TMP36]]) ; CHECK-NEXT: [[SPLAT_SPLAT151:%.*]] = shufflevector <2 x double> [[COL_LOAD141]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP42:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD138]], <2 x double> [[SPLAT_SPLAT151]], <2 x double> [[TMP41]]) +; CHECK-NEXT: [[TMP42:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD138]], <2 x double> [[SPLAT_SPLAT151]], <2 x double> [[TMP41]]) ; CHECK-NEXT: [[SPLAT_SPLAT155:%.*]] = shufflevector <2 x double> [[COL_LOAD144]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP43:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD135]], <2 x double> [[SPLAT_SPLAT155]], <2 x double> [[TMP38]]) +; CHECK-NEXT: [[TMP43:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD135]], <2 x double> [[SPLAT_SPLAT155]], <2 x double> [[TMP38]]) ; CHECK-NEXT: [[SPLAT_SPLAT158:%.*]] = shufflevector <2 x double> [[COL_LOAD144]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP44:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD138]], <2 x double> [[SPLAT_SPLAT158]], <2 x double> [[TMP43]]) +; CHECK-NEXT: [[TMP44:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD138]], <2 x double> [[SPLAT_SPLAT158]], <2 x double> [[TMP43]]) ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 8 ; CHECK-NEXT: [[VEC_CAST160:%.*]] = bitcast double* [[TMP45]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP42]], <2 x double>* [[VEC_CAST160]], align 8 @@ -192,13 +192,13 @@ ; CHECK-NEXT: [[VEC_CAST173:%.*]] = bitcast double* [[VEC_GEP172]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD174:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST173]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT177:%.*]] = shufflevector <2 x double> [[COL_LOAD171]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = fmul <2 x double> [[COL_LOAD165]], [[SPLAT_SPLAT177]] +; CHECK-NEXT: [[TMP48:%.*]] = fmul contract <2 x double> [[COL_LOAD165]], [[SPLAT_SPLAT177]] ; CHECK-NEXT: [[SPLAT_SPLAT180:%.*]] = shufflevector <2 x double> [[COL_LOAD171]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP49:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD168]], <2 x double> [[SPLAT_SPLAT180]], <2 x double> [[TMP48]]) +; CHECK-NEXT: [[TMP49:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD168]], <2 x double> [[SPLAT_SPLAT180]], <2 x double> [[TMP48]]) ; CHECK-NEXT: [[SPLAT_SPLAT183:%.*]] = shufflevector <2 x double> [[COL_LOAD174]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = fmul <2 x double> [[COL_LOAD165]], [[SPLAT_SPLAT183]] +; CHECK-NEXT: [[TMP50:%.*]] = fmul contract <2 x double> [[COL_LOAD165]], [[SPLAT_SPLAT183]] ; CHECK-NEXT: [[SPLAT_SPLAT186:%.*]] = shufflevector <2 x double> [[COL_LOAD174]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD168]], <2 x double> [[SPLAT_SPLAT186]], <2 x double> [[TMP50]]) +; CHECK-NEXT: [[TMP51:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD168]], <2 x double> [[SPLAT_SPLAT186]], <2 x double> [[TMP50]]) ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr <16 x double>, <16 x double>* [[TMP5]], i64 0, i64 10 ; CHECK-NEXT: [[VEC_CAST188:%.*]] = bitcast double* [[TMP52]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD189:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST188]], align 8 @@ -212,13 +212,13 @@ ; CHECK-NEXT: [[VEC_CAST197:%.*]] = bitcast double* [[VEC_GEP196]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD198:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST197]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT202:%.*]] = shufflevector <2 x double> [[COL_LOAD195]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP54:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD189]], <2 x double> [[SPLAT_SPLAT202]], <2 x double> [[TMP49]]) +; CHECK-NEXT: [[TMP54:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD189]], <2 x double> [[SPLAT_SPLAT202]], <2 x double> [[TMP49]]) ; CHECK-NEXT: [[SPLAT_SPLAT205:%.*]] = shufflevector <2 x double> [[COL_LOAD195]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP55:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD192]], <2 x double> [[SPLAT_SPLAT205]], <2 x double> [[TMP54]]) +; CHECK-NEXT: [[TMP55:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD192]], <2 x double> [[SPLAT_SPLAT205]], <2 x double> [[TMP54]]) ; CHECK-NEXT: [[SPLAT_SPLAT209:%.*]] = shufflevector <2 x double> [[COL_LOAD198]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP56:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD189]], <2 x double> [[SPLAT_SPLAT209]], <2 x double> [[TMP51]]) +; CHECK-NEXT: [[TMP56:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD189]], <2 x double> [[SPLAT_SPLAT209]], <2 x double> [[TMP51]]) ; CHECK-NEXT: [[SPLAT_SPLAT212:%.*]] = shufflevector <2 x double> [[COL_LOAD198]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD192]], <2 x double> [[SPLAT_SPLAT212]], <2 x double> [[TMP56]]) +; CHECK-NEXT: [[TMP57:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD192]], <2 x double> [[SPLAT_SPLAT212]], <2 x double> [[TMP56]]) ; CHECK-NEXT: [[TMP58:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 10 ; CHECK-NEXT: [[VEC_CAST214:%.*]] = bitcast double* [[TMP58]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP55]], <2 x double>* [[VEC_CAST214]], align 8 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-minimal.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-minimal.ll --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-minimal.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-minimal.ll @@ -31,21 +31,21 @@ ; CHECK-NEXT: [[VEC_CAST12:%.*]] = bitcast double* [[VEC_GEP11]] to <4 x double>* ; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <4 x double>, <4 x double>* [[VEC_CAST12]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[COL_LOAD10]], <4 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <4 x double> [[COL_LOAD10]], <4 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP0]]) ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <4 x double> [[COL_LOAD10]], <4 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD5]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD5]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP1]]) ; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <4 x double> [[COL_LOAD10]], <4 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD8]], <2 x double> [[SPLAT_SPLAT22]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD8]], <2 x double> [[SPLAT_SPLAT22]], <2 x double> [[TMP2]]) ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <4 x double> [[COL_LOAD13]], <4 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT25]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT25]] ; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x double> [[COL_LOAD13]], <4 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT28]], <2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT28]], <2 x double> [[TMP4]]) ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <4 x double> [[COL_LOAD13]], <4 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD5]], <2 x double> [[SPLAT_SPLAT31]], <2 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD5]], <2 x double> [[SPLAT_SPLAT31]], <2 x double> [[TMP5]]) ; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x double> [[COL_LOAD13]], <4 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD8]], <2 x double> [[SPLAT_SPLAT34]], <2 x double> [[TMP6]]) +; CHECK-NEXT: [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD8]], <2 x double> [[SPLAT_SPLAT34]], <2 x double> [[TMP6]]) ; CHECK-NEXT: [[VEC_CAST35:%.*]] = bitcast <4 x double>* [[C:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST35]], align 8 ; CHECK-NEXT: [[VEC_GEP36:%.*]] = getelementptr <4 x double>, <4 x double>* [[C]], i64 0, i64 2 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s + +; Function Attrs: nofree nounwind uwtable willreturn mustprogress +define <4 x float> @preserve_fmf(<4 x float> %m, <4 x float> %n, float %x, float %y) { +; CHECK-LABEL: @preserve_fmf( +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <4 x float> [[M:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <4 x float> [[M]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <4 x float> [[N:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> +; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]] +; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <1 x float> [[TMP10]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP11]], <2 x i32> +; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP13]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]] +; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP14]]) +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <1 x float> [[TMP16]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP17]], <2 x i32> +; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP19]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = fmul fast <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]] +; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP20]]) +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x float> [[TMP22]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x float> [[TMP18]], <2 x float> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[SPLIT25:%.*]] = shufflevector <4 x float> [[M]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[SPLIT26:%.*]] = shufflevector <4 x float> [[M]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = fadd fast <2 x float> [[TMP12]], [[SPLIT25]] +; CHECK-NEXT: [[TMP26:%.*]] = fadd fast <2 x float> [[TMP24]], [[SPLIT26]] +; CHECK-NEXT: [[SPLIT27:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[SPLIT28:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = fsub reassoc <2 x float> [[TMP25]], [[SPLIT27]] +; CHECK-NEXT: [[TMP28:%.*]] = fsub reassoc <2 x float> [[TMP26]], [[SPLIT28]] +; CHECK-NEXT: [[TMP29:%.*]] = fneg contract <2 x float> [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = fneg contract <2 x float> [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = fmul reassoc contract <2 x float> [[TMP27]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = fmul reassoc contract <2 x float> [[TMP28]], [[TMP30]] +; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <2 x float> [[TMP29]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x float> [[TMP31]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x float> poison, float [[TMP33]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT30]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = fmul reassoc <1 x float> [[BLOCK29]], [[SPLAT_SPLAT31]] +; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <2 x float> [[TMP30]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[TMP31]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x float> poison, float [[TMP35]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT33]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = fmul reassoc <1 x float> [[BLOCK32]], [[SPLAT_SPLAT34]] +; CHECK-NEXT: [[TMP37:%.*]] = fadd reassoc <1 x float> [[TMP34]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <1 x float> [[TMP37]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP38]], <2 x i32> +; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <2 x float> [[TMP29]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[TMP31]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x float> poison, float [[TMP40]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT36]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP41:%.*]] = fmul reassoc <1 x float> [[BLOCK35]], [[SPLAT_SPLAT37]] +; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <2 x float> [[TMP30]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[TMP31]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x float> poison, float [[TMP42]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT39]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = fmul reassoc <1 x float> [[BLOCK38]], [[SPLAT_SPLAT40]] +; CHECK-NEXT: [[TMP44:%.*]] = fadd reassoc <1 x float> [[TMP41]], [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <1 x float> [[TMP44]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x float> [[TMP39]], <2 x float> [[TMP45]], <2 x i32> +; CHECK-NEXT: [[BLOCK41:%.*]] = shufflevector <2 x float> [[TMP29]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[TMP32]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x float> poison, float [[TMP47]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT42]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP48:%.*]] = fmul reassoc <1 x float> [[BLOCK41]], [[SPLAT_SPLAT43]] +; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <2 x float> [[TMP30]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x float> [[TMP32]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x float> poison, float [[TMP49]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT45]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP50:%.*]] = fmul reassoc <1 x float> [[BLOCK44]], [[SPLAT_SPLAT46]] +; CHECK-NEXT: [[TMP51:%.*]] = fadd reassoc <1 x float> [[TMP48]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <1 x float> [[TMP51]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP52]], <2 x i32> +; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <2 x float> [[TMP29]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <2 x float> [[TMP32]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x float> poison, float [[TMP54]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT48]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP55:%.*]] = fmul reassoc <1 x float> [[BLOCK47]], [[SPLAT_SPLAT49]] +; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <2 x float> [[TMP30]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x float> [[TMP32]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x float> poison, float [[TMP56]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT51]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = fmul reassoc <1 x float> [[BLOCK50]], [[SPLAT_SPLAT52]] +; CHECK-NEXT: [[TMP58:%.*]] = fadd reassoc <1 x float> [[TMP55]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <1 x float> [[TMP58]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <2 x float> [[TMP53]], <2 x float> [[TMP59]], <2 x i32> +; CHECK-NEXT: [[BLOCK53:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <2 x float> [[TMP46]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x float> poison, float [[TMP61]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT54]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP62:%.*]] = fmul reassoc contract <1 x float> [[BLOCK53]], [[SPLAT_SPLAT55]] +; CHECK-NEXT: [[BLOCK56:%.*]] = shufflevector <2 x float> [[TMP32]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <2 x float> [[TMP46]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x float> poison, float [[TMP63]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT58:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT57]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP64:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK56]], <1 x float> [[SPLAT_SPLAT58]], <1 x float> [[TMP62]]) +; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <1 x float> [[TMP64]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP65]], <2 x i32> +; CHECK-NEXT: [[BLOCK59:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <2 x float> [[TMP46]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x float> poison, float [[TMP67]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT60]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP68:%.*]] = fmul reassoc contract <1 x float> [[BLOCK59]], [[SPLAT_SPLAT61]] +; CHECK-NEXT: [[BLOCK62:%.*]] = shufflevector <2 x float> [[TMP32]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = extractelement <2 x float> [[TMP46]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x float> poison, float [[TMP69]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT63]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP70:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK62]], <1 x float> [[SPLAT_SPLAT64]], <1 x float> [[TMP68]]) +; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <1 x float> [[TMP70]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <2 x float> [[TMP66]], <2 x float> [[TMP71]], <2 x i32> +; CHECK-NEXT: [[BLOCK65:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <2 x float> [[TMP60]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT66:%.*]] = insertelement <1 x float> poison, float [[TMP73]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT67:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT66]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = fmul reassoc contract <1 x float> [[BLOCK65]], [[SPLAT_SPLAT67]] +; CHECK-NEXT: [[BLOCK68:%.*]] = shufflevector <2 x float> [[TMP32]], <2 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP75:%.*]] = extractelement <2 x float> [[TMP60]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT69:%.*]] = insertelement <1 x float> poison, float [[TMP75]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT70:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT69]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP76:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK68]], <1 x float> [[SPLAT_SPLAT70]], <1 x float> [[TMP74]]) +; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <1 x float> [[TMP76]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP78:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP77]], <2 x i32> +; CHECK-NEXT: [[BLOCK71:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <2 x float> [[TMP60]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT72:%.*]] = insertelement <1 x float> poison, float [[TMP79]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT73:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT72]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP80:%.*]] = fmul reassoc contract <1 x float> [[BLOCK71]], [[SPLAT_SPLAT73]] +; CHECK-NEXT: [[BLOCK74:%.*]] = shufflevector <2 x float> [[TMP32]], <2 x float> poison, <1 x i32> +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <2 x float> [[TMP60]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT75:%.*]] = insertelement <1 x float> poison, float [[TMP81]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT76:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT75]], <1 x float> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP82:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK74]], <1 x float> [[SPLAT_SPLAT76]], <1 x float> [[TMP80]]) +; CHECK-NEXT: [[TMP83:%.*]] = shufflevector <1 x float> [[TMP82]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <2 x float> [[TMP78]], <2 x float> [[TMP83]], <2 x i32> +; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <2 x float> [[TMP72]], <2 x float> [[TMP84]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP85]] + %res = tail call fast <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> %m, <4 x float> %n, i32 2, i32 2, i32 2) + %res.2 = fadd fast <4 x float> %res, %m + %res.3 = fsub reassoc <4 x float> %res.2, %n + %res.4 = fneg contract <4 x float> %res.3 + %res.5 = fmul reassoc contract <4 x float> %res.3, %res.4 + %res.6 = tail call reassoc <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> %res.4, <4 x float> %res.5, i32 2, i32 2, i32 2) + %res.7 = tail call contract reassoc <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> %res.5, <4 x float> %res.6, i32 2, i32 2, i32 2) + ret <4 x float> %res.7 +} + +declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)