Index: llvm/include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -194,12 +194,23 @@ /// the number of vector elements or scalar bitwidth. The intent is a /// G_MERGE_VALUES can be constructed from \p Ty0 elements, and unmerged into /// \p Ty1. +LLVM_READNONE LLT getLCMType(LLT Ty0, LLT Ty1); -/// Return a type that is greatest common divisor of \p OrigTy and \p -/// TargetTy. This will either change the number of vector elements, or -/// bitwidth of scalars. The intent is the result type can be used as the -/// result of a G_UNMERGE_VALUES from \p OrigTy. +/// Return a type where the total size is the greatest common divisor of \p +/// OrigTy and \p TargetTy. This will try to either change the number of vector +/// elements, or bitwidth of scalars. The intent is the result type can be used +/// as the result of a G_UNMERGE_VALUES from \p OrigTy, and then some +/// combination of G_MERGE_VALUES, G_BUILD_VECTOR and G_CONCAT_VECTORS (possibly +/// with intermediate casts) can re-form \p TargetTy. +/// +/// If these are vectors with different element types, this will try to produce +/// a vector with a compatible total size, but the element type of \p OrigTy. If +/// this can't be satisfied, this will produce a scalar smaller than the +/// original vector elements. +/// +/// In the worst case, this returns LLT::scalar(1) +LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy); } // End namespace llvm. Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -252,7 +252,7 @@ LLT NarrowTy, Register SrcReg) { LLT SrcTy = MRI.getType(SrcReg); - LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy)); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); if (SrcTy == GCDTy) { // If the source already evenly divides the result type, we don't need to do // anything. Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -542,22 +542,45 @@ } LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { - if (OrigTy.isVector() && TargetTy.isVector()) { - assert(OrigTy.getElementType() == TargetTy.getElementType()); - int GCD = greatestCommonDivisor(OrigTy.getNumElements(), - TargetTy.getNumElements()); - return LLT::scalarOrVector(GCD, OrigTy.getElementType()); - } + const unsigned OrigSize = OrigTy.getSizeInBits(); + const unsigned TargetSize = TargetTy.getSizeInBits(); + + if (OrigSize == TargetSize) + return OrigTy; + + if (OrigTy.isVector()) { + LLT OrigElt = OrigTy.getElementType(); + if (TargetTy.isVector()) { + LLT TargetElt = TargetTy.getElementType(); + if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) { + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigElt); + } + } else { + // If the source is a vector of pointers, return a pointer element. + if (OrigElt.getSizeInBits() == TargetSize) + return OrigElt; + } + + unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); + if (GCD == OrigElt.getSizeInBits()) + return OrigElt; - if (OrigTy.isVector() && !TargetTy.isVector()) { - assert(OrigTy.getElementType() == TargetTy); - return TargetTy; + // If we can't produce the original element type, we have to use a smaller + // scalar. + if (GCD < OrigElt.getSizeInBits()) + return LLT::scalar(GCD); + return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt); } - assert(!OrigTy.isVector() && !TargetTy.isVector() && - "GCD type of vector and scalar not implemented"); + if (TargetTy.isVector()) { + // Try to preserve the original element type. + LLT TargetElt = TargetTy.getElementType(); + if (TargetElt.getSizeInBits() == OrigSize) + return OrigTy; + } - int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), - TargetTy.getSizeInBits()); + unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); return LLT::scalar(GCD); } Index: llvm/unittests/CodeGen/GlobalISel/GISelUtilsTest.cpp =================================================================== --- llvm/unittests/CodeGen/GlobalISel/GISelUtilsTest.cpp +++ llvm/unittests/CodeGen/GlobalISel/GISelUtilsTest.cpp @@ -13,13 +13,18 @@ namespace { static const LLT S1 = LLT::scalar(1); +static const LLT S8 = LLT::scalar(8); static const LLT S16 = LLT::scalar(16); static const LLT S32 = LLT::scalar(32); static const LLT S64 = LLT::scalar(64); static const LLT P0 = LLT::pointer(0, 64); static const LLT P1 = LLT::pointer(1, 32); +static const LLT V2S8 = LLT::vector(2, 8); +static const LLT V4S8 = LLT::vector(4, 8); + static const LLT V2S16 = LLT::vector(2, 16); +static const LLT V3S16 = LLT::vector(3, 16); static const LLT V4S16 = LLT::vector(4, 16); static const LLT V2S32 = LLT::vector(2, 32); @@ -27,11 +32,17 @@ static const LLT V4S32 = LLT::vector(4, 32); static const LLT V6S32 = LLT::vector(6, 32); +static const LLT V2S64 = LLT::vector(2, 64); +static const LLT V4S64 = LLT::vector(4, 64); + static const LLT V2P0 = LLT::vector(2, P0); static const LLT V3P0 = LLT::vector(3, P0); static const LLT V4P0 = LLT::vector(4, P0); static const LLT V6P0 = LLT::vector(6, P0); +static const LLT V2P1 = LLT::vector(2, P1); +static const LLT V4P1 = LLT::vector(4, P1); + TEST(GISelUtilsTest, getGCDType) { EXPECT_EQ(S1, getGCDType(S1, S1)); EXPECT_EQ(S32, getGCDType(S32, S32)); @@ -56,7 +67,7 @@ EXPECT_EQ(S32, getGCDType(P0, S32)); EXPECT_EQ(S32, getGCDType(S32, P0)); - EXPECT_EQ(S64, getGCDType(P0, S64)); + EXPECT_EQ(P0, getGCDType(P0, S64)); EXPECT_EQ(S64, getGCDType(S64, P0)); EXPECT_EQ(S32, getGCDType(P0, P1)); @@ -64,6 +75,76 @@ EXPECT_EQ(P0, getGCDType(V3P0, V2P0)); EXPECT_EQ(P0, getGCDType(V2P0, V3P0)); + + EXPECT_EQ(P0, getGCDType(P0, V2P0)); + EXPECT_EQ(P0, getGCDType(V2P0, P0)); + + + EXPECT_EQ(V2P0, getGCDType(V2P0, V2P0)); + EXPECT_EQ(P0, getGCDType(V3P0, V2P0)); + EXPECT_EQ(P0, getGCDType(V2P0, V3P0)); + EXPECT_EQ(V2P0, getGCDType(V4P0, V2P0)); + + EXPECT_EQ(V2P0, getGCDType(V2P0, V4P1)); + EXPECT_EQ(V4P1, getGCDType(V4P1, V2P0)); + + EXPECT_EQ(V2P0, getGCDType(V4P0, V4P1)); + EXPECT_EQ(V4P1, getGCDType(V4P1, V4P0)); + + // Elements have same size, but have different pointeriness, so prefer the + // original element type. + EXPECT_EQ(V2P0, getGCDType(V2P0, V4S64)); + EXPECT_EQ(V2S64, getGCDType(V4S64, V2P0)); + + EXPECT_EQ(V2S16, getGCDType(V2S16, V4P1)); + EXPECT_EQ(P1, getGCDType(V4P1, V2S16)); + EXPECT_EQ(V2P1, getGCDType(V4P1, V4S16)); + EXPECT_EQ(V4S16, getGCDType(V4S16, V2P1)); + + EXPECT_EQ(P0, getGCDType(P0, V2S64)); + EXPECT_EQ(S64, getGCDType(V2S64, P0)); + + EXPECT_EQ(S16, getGCDType(V2S16, V3S16)); + EXPECT_EQ(S16, getGCDType(V3S16, V2S16)); + EXPECT_EQ(S16, getGCDType(V3S16, S16)); + EXPECT_EQ(S16, getGCDType(S16, V3S16)); + + EXPECT_EQ(V2S16, getGCDType(V2S16, V2S32)); + EXPECT_EQ(S32, getGCDType(V2S32, V2S16)); + + EXPECT_EQ(V4S8, getGCDType(V4S8, V2S32)); + EXPECT_EQ(S32, getGCDType(V2S32, V4S8)); + + // Test cases where neither element type nicely divides. + EXPECT_EQ(LLT::scalar(3), getGCDType(LLT::vector(3, 5), LLT::vector(2, 6))); + EXPECT_EQ(LLT::scalar(3), getGCDType(LLT::vector(2, 6), LLT::vector(3, 5))); + + // Have to go smaller than a pointer element. + EXPECT_EQ(LLT::scalar(3), getGCDType(LLT::vector(2, LLT::pointer(3, 6)), + LLT::vector(3, 5))); + EXPECT_EQ(LLT::scalar(3), getGCDType(LLT::vector(3, 5), + LLT::vector(2, LLT::pointer(3, 6)))); + + EXPECT_EQ(V4S8, getGCDType(V4S8, S32)); + EXPECT_EQ(S32, getGCDType(S32, V4S8)); + EXPECT_EQ(V4S8, getGCDType(V4S8, P1)); + EXPECT_EQ(P1, getGCDType(P1, V4S8)); + + EXPECT_EQ(V2S8, getGCDType(V2S8, V4S16)); + EXPECT_EQ(S16, getGCDType(V4S16, V2S8)); + + EXPECT_EQ(S8, getGCDType(V2S8, LLT::vector(4, 2))); + EXPECT_EQ(LLT::vector(4, 2), getGCDType(LLT::vector(4, 2), S8)); + + + EXPECT_EQ(LLT::pointer(4, 8), getGCDType(LLT::vector(2, LLT::pointer(4, 8)), + LLT::vector(4, 2))); + + EXPECT_EQ(LLT::vector(4, 2), getGCDType(LLT::vector(4, 2), + LLT::vector(2, LLT::pointer(4, 8)))); + + EXPECT_EQ(LLT::scalar(4), getGCDType(LLT::vector(3, 4), S8)); + EXPECT_EQ(LLT::scalar(4), getGCDType(S8, LLT::vector(3, 4))); } TEST(GISelUtilsTest, getLCMType) {