diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp --- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -99,6 +99,11 @@ if (Subtarget.is64Bit()) { + getActionDefinitionsBuilder(G_BSWAP) + .legalFor({s32, s64}) + .widenScalarToNextPow2(0, /*Min=*/32) + .clampScalar(0, s32, s64); + if (Subtarget.hasPOPCNT()) { // popcount getActionDefinitionsBuilder(G_CTPOP) @@ -115,6 +120,14 @@ .clampScalar(1, s16, s64); } } else { // 32-bit + getActionDefinitionsBuilder(G_BSWAP) + .legalIf([=](const LegalityQuery &Query) { + // workaround for legalFor + return Query.Types[0] == s32; + }) + .widenScalarToNextPow2(0, /*Min=*/32) + .clampScalar(0, s32, s32); + if (Subtarget.hasPOPCNT()) { // popcount getActionDefinitionsBuilder(G_CTPOP) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir @@ -0,0 +1,76 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=X86-64 +# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=X86-32 + +# test BSWAP s32 and s64 + +... +--- +name: test_bswaps64 +body: | + bb.1: + ; X86-64-LABEL: name: test_bswaps64 + ; X86-64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-64-NEXT: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]] + ; X86-64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[BSWAP]](s64) + ; X86-64-NEXT: RET 0, implicit [[COPY]](s64) + ; X86-32-LABEL: name: test_bswaps64 + ; X86-32: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X86-32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; X86-32-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] + ; X86-32-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] + ; X86-32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) + ; X86-32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; X86-32-NEXT: RET 0, implicit [[COPY]](s64) + %0:_(s64) = IMPLICIT_DEF + %1:_(s64) = G_BSWAP %0 + %2:_(s64) = COPY %1(s64) + RET 0, implicit %2 + +... +--- +name: test_bswap_s32 +body: | + bb.1: + ; X86-64-LABEL: name: test_bswap_s32 + ; X86-64: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF + ; X86-64-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[DEF]] + ; X86-64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[BSWAP]](s32) + ; X86-64-NEXT: RET 0, implicit [[COPY]](s32) + ; X86-32-LABEL: name: test_bswap_s32 + ; X86-32: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF + ; X86-32-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[DEF]] + ; X86-32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[BSWAP]](s32) + ; X86-32-NEXT: RET 0, implicit [[COPY]](s32) + %0:_(s32) = IMPLICIT_DEF + %1:_(s32) = G_BSWAP %0 + %2:_(s32) = COPY %1(s32) + RET 0, implicit %2 + +... +--- +name: test_bswap_s16 +body: | + bb.1: + ; X86-64-LABEL: name: test_bswap_s16 + ; X86-64: [[DEF:%[0-9]+]]:_(s16) = IMPLICIT_DEF + ; X86-64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16) + ; X86-64-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]] + ; X86-64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 16 + ; X86-64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s8) + ; X86-64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; X86-64-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; X86-64-NEXT: RET 0, implicit [[COPY]](s16) + ; X86-32-LABEL: name: test_bswap_s16 + ; X86-32: [[DEF:%[0-9]+]]:_(s16) = IMPLICIT_DEF + ; X86-32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16) + ; X86-32-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]] + ; X86-32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 16 + ; X86-32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s8) + ; X86-32-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; X86-32-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; X86-32-NEXT: RET 0, implicit [[COPY]](s16) + %0:_(s16) = IMPLICIT_DEF + %1:_(s16) = G_BSWAP %0 + %2:_(s16) = COPY %1(s16) + RET 0, implicit %2