diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp --- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -60,6 +60,92 @@ const X86TargetMachine &TM) : Subtarget(STI), TM(TM) { + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + const LLT sMax = Subtarget.is64Bit() ? s64 : s32; + + const LLT v16s8 = LLT::fixed_vector(16, 8); + const LLT v8s16 = LLT::fixed_vector(8, 16); + const LLT v4s32 = LLT::fixed_vector(4, 32); + const LLT v2s64 = LLT::fixed_vector(2, 64); + + const LLT v32s8 = LLT::fixed_vector(32, 8); + const LLT v16s16 = LLT::fixed_vector(16, 16); + const LLT v8s32 = LLT::fixed_vector(8, 32); + const LLT v4s64 = LLT::fixed_vector(4, 64); + + const LLT v64s8 = LLT::fixed_vector(64, 8); + const LLT v32s16 = LLT::fixed_vector(32, 16); + const LLT v16s32 = LLT::fixed_vector(16, 32); + const LLT v8s64 = LLT::fixed_vector(8, 64); + + // integer addition/subtraction + auto &ActionAddSub = getActionDefinitionsBuilder({G_ADD, G_SUB}); + + ActionAddSub.legalFor({s8, s16, s32}); + + if (Subtarget.is64Bit()) + ActionAddSub.legalFor({s64}); + + if (Subtarget.hasSSE2()) + ActionAddSub.legalFor({v16s8, v8s16, v4s32, v2s64}) + .clampMinNumElements(0, s8, 16) + .clampMinNumElements(0, s16, 8) + .clampMinNumElements(0, s32, 4) + .clampMinNumElements(0, s64, 2); + + if (Subtarget.hasAVX2()) + ActionAddSub.legalFor({v32s8, v16s16, v8s32, v4s64}); + else if (Subtarget.hasSSE2()) + ActionAddSub.clampMaxNumElements(0, s8, 16) + .clampMaxNumElements(0, s16, 8) + .clampMaxNumElements(0, s32, 4) + .clampMaxNumElements(0, s64, 2); + + if (Subtarget.hasAVX512()) + ActionAddSub.legalFor({v16s32, v8s64}) + .clampMaxNumElements(0, s32, 16) + .clampMaxNumElements(0, s64, 8); + else if (Subtarget.hasAVX2()) + ActionAddSub.clampMaxNumElements(0, s32, 8).clampMaxNumElements(0, s64, 4); + + if (Subtarget.hasAVX512() && Subtarget.hasBWI()) + ActionAddSub.legalFor({v64s8, v32s16}) + .clampMaxNumElements(0, s8, 64) + .clampMaxNumElements(0, s16, 32); + else if (Subtarget.hasAVX2()) + ActionAddSub.clampMaxNumElements(0, s8, 32).clampMaxNumElements(0, s16, 16); + + ActionAddSub.widenScalarToNextPow2(0, /*Min=*/32) + .clampScalar(0, s8, sMax) + .scalarize(0); + + // popcount + if (Subtarget.hasPOPCNT()) { + auto &ActionPOPCNT = getActionDefinitionsBuilder(G_CTPOP); + + ActionPOPCNT.legalFor({{s16, s16}, {s32, s32}}); + + if (Subtarget.is64Bit()) + ActionPOPCNT.legalFor({{s64, s64}}); + + ActionPOPCNT.widenScalarToNextPow2(1, /*Min=*/16).clampScalar(1, s16, sMax); + } + + // count leading zeros (LZCNT) + if (Subtarget.hasLZCNT()) { + auto &ActionLZCNT = getActionDefinitionsBuilder(G_CTLZ); + + ActionLZCNT.legalFor({{s16, s16}, {s32, s32}}); + + if (Subtarget.is64Bit()) + ActionLZCNT.legalFor({{s64, s64}}); + + ActionLZCNT.widenScalarToNextPow2(1, /*Min=*/16).clampScalar(1, s16, sMax); + } + setLegalizerInfo32bit(); setLegalizerInfo64bit(); setLegalizerInfoSSE1(); @@ -72,10 +158,6 @@ setLegalizerInfoAVX512DQ(); setLegalizerInfoAVX512BW(); - const LLT s16 = LLT::scalar(16); - const LLT s32 = LLT::scalar(32); - const LLT s64 = LLT::scalar(64); - getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN) .scalarize(0) .minScalar(0, LLT::scalar(32)) @@ -83,7 +165,7 @@ auto &LegacyInfo = getLegacyLegalizerInfo(); LegacyInfo.setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1); - for (unsigned BinOp : {G_SUB, G_MUL, G_AND, G_OR, G_XOR}) + for (unsigned BinOp : {G_MUL, G_AND, G_OR, G_XOR}) LegacyInfo.setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1); for (unsigned MemOp : {G_LOAD, G_STORE}) LegacyInfo.setLegalizeScalarToDifferentSizeStrategy( @@ -97,41 +179,6 @@ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); - - if (Subtarget.is64Bit()) { - if (Subtarget.hasPOPCNT()) { - // popcount - getActionDefinitionsBuilder(G_CTPOP) - .legalFor({{s16, s16}, {s32, s32}, {s64, s64}}) - .widenScalarToNextPow2(1, /*Min=*/16) - .clampScalar(1, s16, s64); - } - - if (Subtarget.hasLZCNT()) { - // count leading zeros (LZCNT) - getActionDefinitionsBuilder(G_CTLZ) - .legalFor({{s16, s16}, {s32, s32}, {s64, s64}}) - .widenScalarToNextPow2(1, /*Min=*/16) - .clampScalar(1, s16, s64); - } - } else { // 32-bit - if (Subtarget.hasPOPCNT()) { - // popcount - getActionDefinitionsBuilder(G_CTPOP) - .legalFor({{s16, s16}, {s32, s32}}) - .widenScalarToNextPow2(1, /*Min=*/16) - .clampScalar(1, s16, s32); - } - - if (Subtarget.hasLZCNT()) { - // count leading zeros (LZCNT) - getActionDefinitionsBuilder(G_CTLZ) - .legalFor({{s16, s16}, {s32, s32}}) - .widenScalarToNextPow2(1, /*Min=*/16) - .clampScalar(1, s16, s32); - } - } - LegacyInfo.computeTables(); verify(*STI.getInstrInfo()); } @@ -159,7 +206,7 @@ for (auto Ty : {s8, s16, s32, p0}) LegacyInfo.setAction({G_PHI, Ty}, LegacyLegalizeActions::Legal); - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) + for (unsigned BinOp : {G_MUL, G_AND, G_OR, G_XOR}) for (auto Ty : {s8, s16, s32}) LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal); @@ -259,7 +306,7 @@ LegacyInfo.setAction({G_PHI, s64}, LegacyLegalizeActions::Legal); - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) + for (unsigned BinOp : {G_MUL, G_AND, G_OR, G_XOR}) LegacyInfo.setAction({BinOp, s64}, LegacyLegalizeActions::Legal); for (unsigned MemOp : {G_LOAD, G_STORE}) @@ -384,10 +431,6 @@ for (auto Ty : {s64, v2s64}) LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal); - for (unsigned BinOp : {G_ADD, G_SUB}) - for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) - LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal); - LegacyInfo.setAction({G_MUL, v8s16}, LegacyLegalizeActions::Legal); LegacyInfo.setAction({G_FPEXT, s64}, LegacyLegalizeActions::Legal); @@ -494,10 +537,6 @@ auto &LegacyInfo = getLegacyLegalizerInfo(); - for (unsigned BinOp : {G_ADD, G_SUB}) - for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) - LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal); - for (auto Ty : {v16s16, v8s32}) LegacyInfo.setAction({G_MUL, Ty}, LegacyLegalizeActions::Legal); @@ -538,9 +577,6 @@ auto &LegacyInfo = getLegacyLegalizerInfo(); - for (unsigned BinOp : {G_ADD, G_SUB}) - for (auto Ty : {v16s32, v8s64}) - LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal); LegacyInfo.setAction({G_MUL, v16s32}, LegacyLegalizeActions::Legal); @@ -595,10 +631,6 @@ auto &LegacyInfo = getLegacyLegalizerInfo(); - for (unsigned BinOp : {G_ADD, G_SUB}) - for (auto Ty : {v64s8, v32s16}) - LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal); - LegacyInfo.setAction({G_MUL, v32s16}, LegacyLegalizeActions::Legal); /************ VLX *******************/