diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.h b/llvm/lib/Target/X86/X86LegalizerInfo.h --- a/llvm/lib/Target/X86/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/X86LegalizerInfo.h @@ -41,6 +41,7 @@ void setLegalizerInfoSSE1(); void setLegalizerInfoSSE2(); void setLegalizerInfoSSE41(); + void setLegalizerInfoSSE42(); void setLegalizerInfoAVX(); void setLegalizerInfoAVX2(); void setLegalizerInfoAVX512(); diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp --- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -65,6 +65,7 @@ setLegalizerInfoSSE1(); setLegalizerInfoSSE2(); setLegalizerInfoSSE41(); + setLegalizerInfoSSE42(); setLegalizerInfoAVX(); setLegalizerInfoAVX2(); setLegalizerInfoAVX512(); @@ -384,6 +385,27 @@ LegacyInfo.setAction({G_MUL, v4s32}, LegacyLegalizeActions::Legal); } +void X86LegalizerInfo::setLegalizerInfoSSE42() { + if (!Subtarget.hasSSE42()) + return; + + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + + // popcount + getActionDefinitionsBuilder(G_CTPOP) + .legalFor({{s16, s16}, {s32, s32}, {s64, s64}}) + .widenScalarToNextPow2(1, /*Min=*/16) + .clampScalar(1, s16, s64); + + // count leading zeros (LZCNT) + getActionDefinitionsBuilder(G_CTLZ) + .legalFor({{s16, s16}, {s32, s32}, {s64, s64}}) + .widenScalarToNextPow2(1, /*Min=*/16) + .clampScalar(1, s16, s64); +} + void X86LegalizerInfo::setLegalizerInfoAVX() { if (!Subtarget.hasAVX()) return; diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ctpop.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ctpop.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse4.2 -run-pass=legalizer %s -o - | FileCheck %s + +# test popcount for s16, s32, and s64 + +--- +name: test_ctpop35 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctpop35 + ; CHECK: [[DEF:%[0-9]+]]:_(s35) = IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[DEF]](s35) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[ZEXT]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[CTPOP]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s35) = COPY [[TRUNC]](s35) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s35) + %0:_(s35) = IMPLICIT_DEF + %1:_(s35) = G_CTPOP %0 + %2:_(s35) = COPY %1(s35) + RET 0, implicit %2 + +... +--- +name: test_ctpop8 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctpop8 + ; CHECK: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[DEF]](s8) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s16) = G_CTPOP [[ZEXT]](s16) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[CTPOP]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s8) + %0:_(s8) = IMPLICIT_DEF + %1:_(s8) = G_CTPOP %0 + %2:_(s8) = COPY %1(s8) + RET 0, implicit %2 + +... +--- +name: test_ctpop64 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctpop64 + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[DEF]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = G_CTPOP %0 + %2:_(s64) = COPY %1(s64) + RET 0, implicit %2 + +... +--- +name: test_ctpop32 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctpop32 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[DEF]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s32) + %0:_(s32) = IMPLICIT_DEF + %1:_(s32) = G_CTPOP %0 + %2:_(s32) = COPY %1(s32) + RET 0, implicit %2 + +... +--- +name: test_ctpop16 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctpop16 + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = IMPLICIT_DEF + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s16) = G_CTPOP [[DEF]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[CTPOP]](s16) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s16) + %0:_(s16) = IMPLICIT_DEF + %1:_(s16) = G_CTPOP %0 + %2:_(s16) = COPY %1(s16) + RET 0, implicit %2 + +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -0,0 +1,118 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse4.2 -run-pass=legalizer %s -o - | FileCheck %s + +# test count leading zeros for s16, s32, and s64 + +--- +name: test_ctlz35 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctlz35 + ; CHECK: [[DEF:%[0-9]+]]:_(s35) = IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[DEF]](s35) + ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[ZEXT]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 29 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[SUB]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s35) = COPY [[TRUNC]](s35) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s35) + %0:_(s35) = IMPLICIT_DEF + %1:_(s35) = G_CTLZ %0 + %2:_(s35) = COPY %1(s35) + RET 0, implicit %2 + +... +--- +name: test_ctlz8 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctlz8 + ; CHECK: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[DEF]](s8) + ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s16) = G_CTLZ [[ZEXT]](s16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[CTLZ]], [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SUB]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s8) + %0:_(s8) = IMPLICIT_DEF + %1:_(s8) = G_CTLZ %0 + %2:_(s8) = COPY %1(s8) + RET 0, implicit %2 + +... +--- +name: test_ctlz64 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctlz64 + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = G_CTLZ %0 + %2:_(s64) = COPY %1(s64) + RET 0, implicit %2 + +... +--- +name: test_ctlz32 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctlz32 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF + ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[DEF]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s32) + %0:_(s32) = IMPLICIT_DEF + %1:_(s32) = G_CTLZ %0 + %2:_(s32) = COPY %1(s32) + RET 0, implicit %2 + +... +--- +name: test_ctlz16 +alignment: 16 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } +body: | + bb.1: + ; CHECK-LABEL: name: test_ctlz16 + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = IMPLICIT_DEF + ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s16) = G_CTLZ [[DEF]](s16) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[CTLZ]](s16) + ; CHECK-NEXT: RET 0, implicit [[COPY]](s16) + %0:_(s16) = IMPLICIT_DEF + %1:_(s16) = G_CTLZ %0 + %2:_(s16) = COPY %1(s16) + RET 0, implicit %2 + +...