diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1940,6 +1940,8 @@ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 4 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v8i16, 1 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 1 }, @@ -2015,6 +2017,8 @@ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 }, { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 }, + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // and+extract+packuswb { ISD::TRUNCATE, MVT::v16i8, MVT::v8i32, 5 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -81,8 +81,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -100,8 +100,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -214,8 +214,8 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -315,8 +315,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -334,8 +334,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) @@ -448,8 +448,8 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1037,8 +1037,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1056,8 +1056,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1170,8 +1170,8 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1275,8 +1275,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1294,8 +1294,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) @@ -1408,8 +1408,8 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -167,8 +167,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %G = trunc <8 x i64> undef to <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'zext_sext' @@ -197,8 +197,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %G = trunc <8 x i64> undef to <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'zext_sext' diff --git a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll --- a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll +++ b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll @@ -218,8 +218,8 @@ ; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = trunc <8 x i64> undef to <8 x i32> ; VEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %B = trunc <8 x i64> undef to <8 x i16> ; VEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C = trunc <8 x i64> undef to <8 x i8> -; VEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D = trunc <16 x i32> undef to <16 x i16> -; VEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %E = trunc <16 x i32> undef to <16 x i8> +; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = trunc <16 x i32> undef to <16 x i16> +; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = trunc <16 x i32> undef to <16 x i8> ; VEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <32 x i16> undef to <32 x i8> ; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -117,8 +117,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'trunc_vXi16' @@ -132,8 +132,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'trunc_vXi16' @@ -162,8 +162,8 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %i64 = trunc i64 undef to i16 @@ -267,9 +267,9 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> @@ -291,9 +291,9 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> @@ -363,9 +363,9 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> diff --git a/mlir/include/mlir/Analysis/Presburger/Simplex.h b/mlir/include/mlir/Analysis/Presburger/Simplex.h --- a/mlir/include/mlir/Analysis/Presburger/Simplex.h +++ b/mlir/include/mlir/Analysis/Presburger/Simplex.h @@ -237,6 +237,13 @@ void print(raw_ostream &os) const; void dump() const; + /// Check if the specified inequality already holds in the polytope. + bool isRedundant(ArrayRef coeffs); + + /// Check if the specified FAC contains the polytope, + /// i.e. whether all inequalities of the specified FAC hold for the polytope. + bool containedIn(const FlatAffineConstraints &fac); + private: friend class GBRSimplex; diff --git a/mlir/include/mlir/Analysis/PresburgerSet.h b/mlir/include/mlir/Analysis/PresburgerSet.h --- a/mlir/include/mlir/Analysis/PresburgerSet.h +++ b/mlir/include/mlir/Analysis/PresburgerSet.h @@ -94,6 +94,12 @@ /// any of the FACs in the union are unbounded. bool findIntegerSample(SmallVectorImpl &sample); + /// Simplifies the representation of a PresburgerSet. + /// + /// Inparticular, removes all FACs which are subset of other FACs in the + /// union. + PresburgerSet coalesce(); + private: /// Construct an empty PresburgerSet. PresburgerSet(unsigned nDim = 0, unsigned nSym = 0) diff --git a/mlir/lib/Analysis/Presburger/Simplex.cpp b/mlir/lib/Analysis/Presburger/Simplex.cpp --- a/mlir/lib/Analysis/Presburger/Simplex.cpp +++ b/mlir/lib/Analysis/Presburger/Simplex.cpp @@ -1240,4 +1240,25 @@ void Simplex::dump() const { print(llvm::errs()); } +bool Simplex::containedIn(const FlatAffineConstraints &fac) { + + if (isEmpty()) + return true; + + for (unsigned i = 0, e = fac.getNumInequalities(); i < e; ++i) + if (!isRedundant(fac.getInequality(i))) + return false; + + for (unsigned i = 0, e = fac.getNumEqualities(); i < e; ++i) + if (!isRedundant(fac.getEquality(i))) + return false; + + return true; +} + +bool Simplex::isRedundant(ArrayRef coeffs) { + Optional minimum = computeOptimum(Direction::Down, coeffs); + return minimum && *minimum >= Fraction(0, 1); +} + } // namespace mlir diff --git a/mlir/lib/Analysis/PresburgerSet.cpp b/mlir/lib/Analysis/PresburgerSet.cpp --- a/mlir/lib/Analysis/PresburgerSet.cpp +++ b/mlir/lib/Analysis/PresburgerSet.cpp @@ -381,6 +381,39 @@ return false; } +PresburgerSet PresburgerSet::coalesce() { + PresburgerSet newSet = + PresburgerSet::getEmptySet(this->getNumDims(), this->getNumSyms()); + SmallVector marked(this->getNumFACs()); + + for (unsigned i = 0, e = flatAffineConstraints.size(); i < e; i++) { + if (marked[i]) + continue; + Simplex simplex(flatAffineConstraints[i]); + + if (simplex.isEmpty()) { + marked[i] = true; + continue; + } + // check if bs1 is contained in any basicSet + for (unsigned j = 0, e = flatAffineConstraints.size(); j < e; ++j) { + if (j == i || marked[j]) + continue; + + if (simplex.containedIn(flatAffineConstraints[j])) { + marked[i] = true; + break; + } + } + } + + for (unsigned i = 0, e = flatAffineConstraints.size(); i < e; ++i) { + if (!marked[i]) + newSet.unionFACInPlace(flatAffineConstraints[i]); + } + return newSet; +} + void PresburgerSet::print(raw_ostream &os) const { os << getNumFACs() << " FlatAffineConstraints:\n"; for (const FlatAffineConstraints &fac : flatAffineConstraints) { diff --git a/mlir/unittests/Analysis/FACUtils.h b/mlir/unittests/Analysis/FACUtils.h new file mode 100644 --- /dev/null +++ b/mlir/unittests/Analysis/FACUtils.h @@ -0,0 +1,41 @@ +//===- FACUtils.h - utility functions for testing -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/AffineStructures.h" + +namespace mlir { + +/// Construct a FlatAffineConstraints from a set of inequality and +/// equality constraints. `numIds` is the total number of ids, of which +/// `numLocals` is the number of local ids. +static FlatAffineConstraints +makeFACFromConstraints(unsigned numIds, ArrayRef> ineqs, + ArrayRef> eqs, + unsigned numLocals = 0) { + FlatAffineConstraints fac(/*numReservedInequalities=*/ineqs.size(), + /*numReservedEqualities=*/eqs.size(), + /*numReservedCols=*/numIds + 1, + /*numDims=*/numIds - numLocals, + /*numSymbols=*/0, numLocals); + for (const SmallVector &eq : eqs) + fac.addEquality(eq); + for (const SmallVector &ineq : ineqs) + fac.addInequality(ineq); + return fac; +} + +/// Construct a FlatAffineConstraints having `numDims` dimensions from the given +/// set of inequality constraints. This is a convenience function to be used +/// when the FAC to be constructed does not have any local ids and does not have +/// equalties. +static FlatAffineConstraints +makeFACFromIneqs(unsigned dims, ArrayRef> ineqs) { + return makeFACFromConstraints(dims, ineqs, {}); +} + +} // namespace mlir diff --git a/mlir/unittests/Analysis/Presburger/CMakeLists.txt b/mlir/unittests/Analysis/Presburger/CMakeLists.txt --- a/mlir/unittests/Analysis/Presburger/CMakeLists.txt +++ b/mlir/unittests/Analysis/Presburger/CMakeLists.txt @@ -4,5 +4,5 @@ ) target_link_libraries(MLIRPresburgerTests - PRIVATE MLIRPresburger) - + PRIVATE MLIRPresburger + MLIRLoopAnalysis) diff --git a/mlir/unittests/Analysis/Presburger/SimplexTest.cpp b/mlir/unittests/Analysis/Presburger/SimplexTest.cpp --- a/mlir/unittests/Analysis/Presburger/SimplexTest.cpp +++ b/mlir/unittests/Analysis/Presburger/SimplexTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Analysis/Presburger/Simplex.h" +#include "../FACUtils.h" #include #include @@ -410,4 +411,62 @@ EXPECT_EQ(simplex.getNumConstraints(), 0u); } +TEST(SimplexTest, isRedundant) { + Simplex simplex(2); + simplex.addInequality({0, -1, 2}); // [0]: y <= 2. + simplex.addInequality({1, 0, 0}); // [1]: x >= 0. + simplex.addEquality({-1, 1, 0}); // [2]: y = x. + + EXPECT_TRUE(simplex.isRedundant({-1, 0, 2})); // x <= 2. + EXPECT_TRUE(simplex.isRedundant({0, 1, 0})); // y >= 0. + + EXPECT_FALSE(simplex.isRedundant({-1, 0, -1})); // x <= -1. + EXPECT_FALSE(simplex.isRedundant({0, 1, -2})); // y >= 2. + EXPECT_FALSE(simplex.isRedundant({0, 1, -1})); // y >= 1. +} + +TEST(SimplexTest, ContainedIn) { + + FlatAffineConstraints univ = FlatAffineConstraints::getUniverse(1, 0); + + FlatAffineConstraints empty = makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, -1}}); // x <= -1. + + FlatAffineConstraints s1 = makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, 4}}); // x <= 4. + + FlatAffineConstraints s2 = makeFACFromIneqs(1, {{1, -1}, // x >= 1. + {-1, 3}}); // x <= 3. + + Simplex simUniv(univ); + Simplex simEmpty(empty); + Simplex sim1(s1); + Simplex sim2(s2); + + EXPECT_TRUE(simUniv.containedIn(univ)); + EXPECT_TRUE(simEmpty.containedIn(empty)); + EXPECT_TRUE(sim1.containedIn(s1)); + EXPECT_TRUE(sim2.containedIn(s2)); + + EXPECT_TRUE(simEmpty.containedIn(univ)); + EXPECT_TRUE(simEmpty.containedIn(s1)); + EXPECT_TRUE(simEmpty.containedIn(s2)); + EXPECT_TRUE(simEmpty.containedIn(empty)); + + EXPECT_TRUE(simUniv.containedIn(univ)); + EXPECT_FALSE(simUniv.containedIn(s1)); + EXPECT_FALSE(simUniv.containedIn(s2)); + EXPECT_FALSE(simUniv.containedIn(empty)); + + EXPECT_TRUE(sim1.containedIn(univ)); + EXPECT_TRUE(sim1.containedIn(s1)); + EXPECT_FALSE(sim1.containedIn(s2)); + EXPECT_FALSE(sim1.containedIn(empty)); + + EXPECT_TRUE(sim2.containedIn(univ)); + EXPECT_TRUE(sim2.containedIn(s1)); + EXPECT_TRUE(sim2.containedIn(s2)); + EXPECT_FALSE(sim2.containedIn(empty)); +} + } // namespace mlir diff --git a/mlir/unittests/Analysis/PresburgerSetTest.cpp b/mlir/unittests/Analysis/PresburgerSetTest.cpp --- a/mlir/unittests/Analysis/PresburgerSetTest.cpp +++ b/mlir/unittests/Analysis/PresburgerSetTest.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Analysis/PresburgerSet.h" +#include "./FACUtils.h" #include #include @@ -79,34 +80,6 @@ } } -/// Construct a FlatAffineConstraints from a set of inequality and -/// equality constraints. `numIds` is the total number of ids, of which -/// `numLocals` is the number of local ids. -static FlatAffineConstraints -makeFACFromConstraints(unsigned numIds, ArrayRef> ineqs, - ArrayRef> eqs, - unsigned numLocals = 0) { - FlatAffineConstraints fac(/*numReservedInequalities=*/ineqs.size(), - /*numReservedEqualities=*/eqs.size(), - /*numReservedCols=*/numIds + 1, - /*numDims=*/numIds - numLocals, - /*numSymbols=*/0, numLocals); - for (const SmallVector &eq : eqs) - fac.addEquality(eq); - for (const SmallVector &ineq : ineqs) - fac.addInequality(ineq); - return fac; -} - -/// Construct a FlatAffineConstraints having `numDims` dimensions from the given -/// set of inequality constraints. This is a convenience function to be used -/// when the FAC to be constructed does not have any local ids and does not have -/// equalties. -static FlatAffineConstraints -makeFACFromIneqs(unsigned numDims, ArrayRef> ineqs) { - return makeFACFromConstraints(numDims, ineqs, /*eqs=*/{}); -} - /// Construct a PresburgerSet having `numDims` dimensions and no symbols from /// the given list of FlatAffineConstraints. Each FAC in `facs` should also have /// `numDims` dimensions and no symbols, although it can have any number of @@ -638,4 +611,173 @@ expectEqual(multiples3.intersect(evens), multiples6); } +void expectCoalesce(size_t expectedNumBasicSets, PresburgerSet set) { + PresburgerSet newSet = set.coalesce(); + if (!set.isEqual(newSet) || !(expectedNumBasicSets == newSet.getNumFACs())) { + set.dump(); + newSet.dump(); + } + EXPECT_TRUE(set.isEqual(newSet)); + EXPECT_TRUE(expectedNumBasicSets == newSet.getNumFACs()); +} + +TEST(SetTest, coalesceContainedOneDim) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, 4}}), // x <= 4. + makeFACFromIneqs(1, {{1, -1}, // x >= 1. + {-1, 2}}), // x <= 2. + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceFirstEmpty) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, -1}}), // x <= -1. + makeFACFromIneqs(1, {{1, -1}, // x >= 1. + {-1, 2}}), // x <= 2. + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceSecondEmpty) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, -1}, // x >= 1. + {-1, 2}}), // x <= 2. + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, -1}}), // x <= -1. + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceBothEmpty) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, -3}, // x >= 3. + {-1, -1}}), // x <= -1. + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, -1}}), // x <= -1. + }); + expectCoalesce(0, set); +} + +TEST(SetTest, coalesceFirstUniv) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {}), + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, 1}}), // x <= 1. + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceSecondUniv) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, 1}}), // x <= 1. + makeFACFromIneqs(1, {}), + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceBothUniv) { + PresburgerSet set = makeSetFromFACs(1, { + makeFACFromIneqs(1, {}), + makeFACFromIneqs(1, {}), + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceFirstUnivSecondEmpty) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {}), + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, -1}}), // x <= -1. + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceFirstEmptySecondUniv) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {}), + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, -1}}), // x <= -1. + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceCutOneDim) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, 3}}), // x <= 3. + makeFACFromIneqs(1, {{1, -2}, // x >= 2. + {-1, 4}}), // x <= 4. + }); + expectCoalesce(2, set); +} + +TEST(SetTest, coalesceSeparateOneDim) { + PresburgerSet set = + makeSetFromFACs(1, { + makeFACFromIneqs(1, {{1, 0}, // x >= 0. + {-1, 2}}), // x <= 2. + makeFACFromIneqs(1, {{1, -3}, // x >= 3. + {-1, 4}}), // x <= 4. + }); + expectCoalesce(2, set); +} + +TEST(SetTest, coalesceContainedTwoDim) { + PresburgerSet set = + makeSetFromFACs(2, { + makeFACFromIneqs(2, {{1, 0, 0}, // x >= 0. + {-1, 0, 3}, // x <= 3. + {0, 1, 0}, // y >= 0 + {0, -1, 3}}), // y <= 3. + makeFACFromIneqs(2, {{1, 0, 0}, // x >= 0. + {-1, 0, 3}, // x <= 3. + {0, 1, -2}, // y >= 2. + {0, -1, 3}}), // y <= 3 + }); + expectCoalesce(1, set); +} + +TEST(SetTest, coalesceCutTwoDim) { + PresburgerSet set = + makeSetFromFACs(2, { + makeFACFromIneqs(2, {{1, 0, 0}, // x >= 0. + {-1, 0, 3}, // x <= 3. + {0, 1, 0}, // y >= 0 + {0, -1, 2}}), // y <= 2. + makeFACFromIneqs(2, {{1, 0, 0}, // x >= 0. + {-1, 0, 3}, // x <= 3. + {0, 1, -1}, // y >= 1. + {0, -1, 3}}), // y <= 3 + }); + expectCoalesce(2, set); +} + +TEST(SetTest, coalesceSeparateTwoDim) { + PresburgerSet set = + makeSetFromFACs(2, { + makeFACFromIneqs(2, {{1, 0, 0}, // x >= 0. + {-1, 0, 3}, // x <= 3. + {0, 1, 0}, // y >= 0 + {0, -1, 1}}), // y <= 1. + makeFACFromIneqs(2, {{1, 0, 0}, // x >= 0. + {-1, 0, 3}, // x <= 3. + {0, 1, -2}, // y >= 2. + {0, -1, 3}}), // y <= 3 + }); + expectCoalesce(2, set); +} + } // namespace mlir