diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28977,18 +28977,28 @@ SDValue In = Op.getOperand(0); SDLoc DL(Op); + assert(VT.getScalarType() == MVT::i8 && + "Only byte vector BITREVERSE supported"); + // Split v64i8 without BWI so that we can still use the PSHUFB lowering. if (VT == MVT::v64i8 && !Subtarget.hasBWI()) return splitVectorIntUnary(Op, DAG); - unsigned NumElts = VT.getVectorNumElements(); - assert(VT.getScalarType() == MVT::i8 && - "Only byte vector BITREVERSE supported"); - // Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2. - if (VT.is256BitVector() && !Subtarget.hasInt256()) + if (VT == MVT::v32i8 && !Subtarget.hasInt256()) return splitVectorIntUnary(Op, DAG); + unsigned NumElts = VT.getVectorNumElements(); + + // If we have GFNI, we can use GF2P8AFFINEQB to reverse the bits. + if (Subtarget.hasGFNI()) { + MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8); + SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT); + Matrix = DAG.getBitcast(VT, Matrix); + return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix, + DAG.getTargetConstant(0, DL, MVT::i8)); + } + // Perform BITREVERSE using PSHUFB lookups. Each byte is split into // two nibbles and a PSHUFB lookup to find the bitreverse of each // 0-15 value (moved to the other nibble). diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2783,6 +2783,22 @@ MVT MTy = LT.second; // Attempt to lookup cost. + if (ISD == ISD::BITREVERSE && ST->hasGFNI() && ST->hasSSSE3() && + MTy.isVector()) { + // With PSHUFB the code is very similar for all types. If we have integer + // byte operations, we just need a GF2P8AFFINEQB for vXi8. For other types + // we also need a PSHUFB. + unsigned Cost = MTy.getVectorElementType() == MVT::i8 ? 1 : 2; + + // Without byte operations, we need twice as many GF2P8AFFINEQB and PSHUFB + // instructions. We also need an extract and an insert. + if (!(MTy.is128BitVector() || (ST->hasAVX2() && MTy.is256BitVector()) || + (ST->hasBWI() && MTy.is512BitVector()))) + Cost = Cost * 2 + 2; + + return LT.first * Cost; + } + if (ST->useGLMDivSqrtCosts()) if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy)) return LT.first * Entry->Cost; diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse.ll b/llvm/test/Analysis/CostModel/X86/bitreverse.ll --- a/llvm/test/Analysis/CostModel/X86/bitreverse.ll +++ b/llvm/test/Analysis/CostModel/X86/bitreverse.ll @@ -13,6 +13,11 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=X64,AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop | FileCheck %s -check-prefixes=XOP ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+gfni,+ssse3 | FileCheck %s -check-prefixes=GFNISSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+gfni,+avx | FileCheck %s -check-prefixes=GFNIAVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+gfni,+avx2 | FileCheck %s -check-prefixes=GFNIAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+gfni,+avx512f | FileCheck %s -check-prefixes=GFNIAVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+gfni,+avx512bw | FileCheck %s -check-prefixes=GFNIAVX512BW ; Verify the cost of scalar bitreverse instructions. @@ -33,6 +38,26 @@ ; XOP-LABEL: 'var_bitreverse_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_i64' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_i64' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_i64' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_i64' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_i64' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bitreverse ; %bitreverse = call i64 @llvm.bitreverse.i64(i64 %a) ret i64 %bitreverse @@ -50,6 +75,26 @@ ; XOP-LABEL: 'var_bitreverse_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_i32' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_i32' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_i32' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_i32' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_i32' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bitreverse ; %bitreverse = call i32 @llvm.bitreverse.i32(i32 %a) ret i32 %bitreverse @@ -67,6 +112,26 @@ ; XOP-LABEL: 'var_bitreverse_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_i16' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_i16' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_i16' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_i16' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_i16' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bitreverse ; %bitreverse = call i16 @llvm.bitreverse.i16(i16 %a) ret i16 %bitreverse @@ -84,6 +149,26 @@ ; XOP-LABEL: 'var_bitreverse_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_i8' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_i8' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_i8' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_i8' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_i8' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %bitreverse ; %bitreverse = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %bitreverse @@ -126,6 +211,26 @@ ; XOP-LABEL: 'var_bitreverse_v2i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v2i64' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v2i64' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v2i64' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v2i64' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v2i64' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bitreverse ; %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) ret <2 x i64> %bitreverse @@ -155,6 +260,26 @@ ; XOP-LABEL: 'var_bitreverse_v4i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v4i64' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v4i64' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v4i64' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v4i64' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v4i64' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bitreverse ; %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ret <4 x i64> %bitreverse @@ -188,6 +313,26 @@ ; XOP-LABEL: 'var_bitreverse_v8i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v8i64' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v8i64' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v8i64' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v8i64' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v8i64' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bitreverse ; %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ret <8 x i64> %bitreverse @@ -213,6 +358,26 @@ ; XOP-LABEL: 'var_bitreverse_v4i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v4i32' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v4i32' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v4i32' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v4i32' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v4i32' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bitreverse ; %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) ret <4 x i32> %bitreverse @@ -242,6 +407,26 @@ ; XOP-LABEL: 'var_bitreverse_v8i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v8i32' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v8i32' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v8i32' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v8i32' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v8i32' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bitreverse ; %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ret <8 x i32> %bitreverse @@ -275,6 +460,26 @@ ; XOP-LABEL: 'var_bitreverse_v16i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v16i32' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v16i32' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v16i32' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v16i32' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v16i32' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bitreverse ; %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ret <16 x i32> %bitreverse @@ -300,6 +505,26 @@ ; XOP-LABEL: 'var_bitreverse_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v8i16' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v8i16' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v8i16' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v8i16' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v8i16' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bitreverse ; %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) ret <8 x i16> %bitreverse @@ -329,6 +554,26 @@ ; XOP-LABEL: 'var_bitreverse_v16i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v16i16' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v16i16' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v16i16' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v16i16' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v16i16' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bitreverse ; %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ret <16 x i16> %bitreverse @@ -362,6 +607,26 @@ ; XOP-LABEL: 'var_bitreverse_v32i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v32i16' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v32i16' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v32i16' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v32i16' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v32i16' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bitreverse ; %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ret <32 x i16> %bitreverse @@ -387,6 +652,26 @@ ; XOP-LABEL: 'var_bitreverse_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v16i8' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v16i8' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v16i8' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v16i8' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v16i8' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bitreverse ; %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) ret <16 x i8> %bitreverse @@ -416,6 +701,26 @@ ; XOP-LABEL: 'var_bitreverse_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v32i8' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v32i8' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v32i8' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v32i8' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v32i8' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bitreverse ; %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) ret <32 x i8> %bitreverse @@ -449,6 +754,26 @@ ; XOP-LABEL: 'var_bitreverse_v64i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bitreverse +; +; GFNISSE-LABEL: 'var_bitreverse_v64i8' +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) +; GFNISSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bitreverse +; +; GFNIAVX-LABEL: 'var_bitreverse_v64i8' +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) +; GFNIAVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bitreverse +; +; GFNIAVX2-LABEL: 'var_bitreverse_v64i8' +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) +; GFNIAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bitreverse +; +; GFNIAVX512F-LABEL: 'var_bitreverse_v64i8' +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) +; GFNIAVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bitreverse +; +; GFNIAVX512BW-LABEL: 'var_bitreverse_v64i8' +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) +; GFNIAVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bitreverse ; %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) ret <64 x i8> %bitreverse diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -7,6 +7,11 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+gfni | FileCheck %s --check-prefix=ALL --check-prefix=GFNISSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefix=ALL --check-prefix=GFNIAVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefix=ALL --check-prefix=GFNIAVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+gfni | FileCheck %s --check-prefix=ALL --check-prefix=GFNIAVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+gfni | FileCheck %s --check-prefix=ALL --check-prefix=GFNIAVX512BW ; Make sure we don't crash with avx512bw and xop ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx512bw @@ -57,6 +62,101 @@ ; XOP-NEXT: vmovd %xmm0, %eax ; XOP-NEXT: # kill: def $al killed $al killed $eax ; XOP-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_i8: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi +; GFNISSE-NEXT: rolb $4, %dil +; GFNISSE-NEXT: movl %edi, %eax +; GFNISSE-NEXT: andb $51, %al +; GFNISSE-NEXT: shlb $2, %al +; GFNISSE-NEXT: andb $-52, %dil +; GFNISSE-NEXT: shrb $2, %dil +; GFNISSE-NEXT: orb %al, %dil +; GFNISSE-NEXT: movl %edi, %eax +; GFNISSE-NEXT: andb $85, %al +; GFNISSE-NEXT: addb %al, %al +; GFNISSE-NEXT: andb $-86, %dil +; GFNISSE-NEXT: shrb %dil +; GFNISSE-NEXT: addl %edi, %eax +; GFNISSE-NEXT: # kill: def $al killed $al killed $eax +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_i8: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX-NEXT: rolb $4, %dil +; GFNIAVX-NEXT: movl %edi, %eax +; GFNIAVX-NEXT: andb $51, %al +; GFNIAVX-NEXT: shlb $2, %al +; GFNIAVX-NEXT: andb $-52, %dil +; GFNIAVX-NEXT: shrb $2, %dil +; GFNIAVX-NEXT: orb %al, %dil +; GFNIAVX-NEXT: movl %edi, %eax +; GFNIAVX-NEXT: andb $85, %al +; GFNIAVX-NEXT: addb %al, %al +; GFNIAVX-NEXT: andb $-86, %dil +; GFNIAVX-NEXT: shrb %dil +; GFNIAVX-NEXT: addl %edi, %eax +; GFNIAVX-NEXT: # kill: def $al killed $al killed $eax +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX2-NEXT: rolb $4, %dil +; GFNIAVX2-NEXT: movl %edi, %eax +; GFNIAVX2-NEXT: andb $51, %al +; GFNIAVX2-NEXT: shlb $2, %al +; GFNIAVX2-NEXT: andb $-52, %dil +; GFNIAVX2-NEXT: shrb $2, %dil +; GFNIAVX2-NEXT: orb %al, %dil +; GFNIAVX2-NEXT: movl %edi, %eax +; GFNIAVX2-NEXT: andb $85, %al +; GFNIAVX2-NEXT: addb %al, %al +; GFNIAVX2-NEXT: andb $-86, %dil +; GFNIAVX2-NEXT: shrb %dil +; GFNIAVX2-NEXT: addl %edi, %eax +; GFNIAVX2-NEXT: # kill: def $al killed $al killed $eax +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_i8: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX512F-NEXT: rolb $4, %dil +; GFNIAVX512F-NEXT: movl %edi, %eax +; GFNIAVX512F-NEXT: andb $51, %al +; GFNIAVX512F-NEXT: shlb $2, %al +; GFNIAVX512F-NEXT: andb $-52, %dil +; GFNIAVX512F-NEXT: shrb $2, %dil +; GFNIAVX512F-NEXT: orb %al, %dil +; GFNIAVX512F-NEXT: movl %edi, %eax +; GFNIAVX512F-NEXT: andb $85, %al +; GFNIAVX512F-NEXT: addb %al, %al +; GFNIAVX512F-NEXT: andb $-86, %dil +; GFNIAVX512F-NEXT: shrb %dil +; GFNIAVX512F-NEXT: addl %edi, %eax +; GFNIAVX512F-NEXT: # kill: def $al killed $al killed $eax +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_i8: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX512BW-NEXT: rolb $4, %dil +; GFNIAVX512BW-NEXT: movl %edi, %eax +; GFNIAVX512BW-NEXT: andb $51, %al +; GFNIAVX512BW-NEXT: shlb $2, %al +; GFNIAVX512BW-NEXT: andb $-52, %dil +; GFNIAVX512BW-NEXT: shrb $2, %dil +; GFNIAVX512BW-NEXT: orb %al, %dil +; GFNIAVX512BW-NEXT: movl %edi, %eax +; GFNIAVX512BW-NEXT: andb $85, %al +; GFNIAVX512BW-NEXT: addb %al, %al +; GFNIAVX512BW-NEXT: andb $-86, %dil +; GFNIAVX512BW-NEXT: shrb %dil +; GFNIAVX512BW-NEXT: addl %edi, %eax +; GFNIAVX512BW-NEXT: # kill: def $al killed $al killed $eax +; GFNIAVX512BW-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b } @@ -115,6 +215,121 @@ ; XOP-NEXT: vmovd %xmm0, %eax ; XOP-NEXT: # kill: def $ax killed $ax killed $eax ; XOP-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_i16: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi +; GFNISSE-NEXT: rolw $8, %di +; GFNISSE-NEXT: movl %edi, %eax +; GFNISSE-NEXT: andl $3855, %eax # imm = 0xF0F +; GFNISSE-NEXT: shll $4, %eax +; GFNISSE-NEXT: andl $61680, %edi # imm = 0xF0F0 +; GFNISSE-NEXT: shrl $4, %edi +; GFNISSE-NEXT: orl %eax, %edi +; GFNISSE-NEXT: movl %edi, %eax +; GFNISSE-NEXT: andl $13107, %eax # imm = 0x3333 +; GFNISSE-NEXT: andl $52428, %edi # imm = 0xCCCC +; GFNISSE-NEXT: shrl $2, %edi +; GFNISSE-NEXT: leal (%rdi,%rax,4), %eax +; GFNISSE-NEXT: movl %eax, %ecx +; GFNISSE-NEXT: andl $21845, %ecx # imm = 0x5555 +; GFNISSE-NEXT: andl $43690, %eax # imm = 0xAAAA +; GFNISSE-NEXT: shrl %eax +; GFNISSE-NEXT: leal (%rax,%rcx,2), %eax +; GFNISSE-NEXT: # kill: def $ax killed $ax killed $eax +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_i16: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX-NEXT: rolw $8, %di +; GFNIAVX-NEXT: movl %edi, %eax +; GFNIAVX-NEXT: andl $3855, %eax # imm = 0xF0F +; GFNIAVX-NEXT: shll $4, %eax +; GFNIAVX-NEXT: andl $61680, %edi # imm = 0xF0F0 +; GFNIAVX-NEXT: shrl $4, %edi +; GFNIAVX-NEXT: orl %eax, %edi +; GFNIAVX-NEXT: movl %edi, %eax +; GFNIAVX-NEXT: andl $13107, %eax # imm = 0x3333 +; GFNIAVX-NEXT: andl $52428, %edi # imm = 0xCCCC +; GFNIAVX-NEXT: shrl $2, %edi +; GFNIAVX-NEXT: leal (%rdi,%rax,4), %eax +; GFNIAVX-NEXT: movl %eax, %ecx +; GFNIAVX-NEXT: andl $21845, %ecx # imm = 0x5555 +; GFNIAVX-NEXT: andl $43690, %eax # imm = 0xAAAA +; GFNIAVX-NEXT: shrl %eax +; GFNIAVX-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX-NEXT: # kill: def $ax killed $ax killed $eax +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX2-NEXT: rolw $8, %di +; GFNIAVX2-NEXT: movl %edi, %eax +; GFNIAVX2-NEXT: andl $3855, %eax # imm = 0xF0F +; GFNIAVX2-NEXT: shll $4, %eax +; GFNIAVX2-NEXT: andl $61680, %edi # imm = 0xF0F0 +; GFNIAVX2-NEXT: shrl $4, %edi +; GFNIAVX2-NEXT: orl %eax, %edi +; GFNIAVX2-NEXT: movl %edi, %eax +; GFNIAVX2-NEXT: andl $13107, %eax # imm = 0x3333 +; GFNIAVX2-NEXT: andl $52428, %edi # imm = 0xCCCC +; GFNIAVX2-NEXT: shrl $2, %edi +; GFNIAVX2-NEXT: leal (%rdi,%rax,4), %eax +; GFNIAVX2-NEXT: movl %eax, %ecx +; GFNIAVX2-NEXT: andl $21845, %ecx # imm = 0x5555 +; GFNIAVX2-NEXT: andl $43690, %eax # imm = 0xAAAA +; GFNIAVX2-NEXT: shrl %eax +; GFNIAVX2-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX2-NEXT: # kill: def $ax killed $ax killed $eax +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_i16: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX512F-NEXT: rolw $8, %di +; GFNIAVX512F-NEXT: movl %edi, %eax +; GFNIAVX512F-NEXT: andl $3855, %eax # imm = 0xF0F +; GFNIAVX512F-NEXT: shll $4, %eax +; GFNIAVX512F-NEXT: andl $61680, %edi # imm = 0xF0F0 +; GFNIAVX512F-NEXT: shrl $4, %edi +; GFNIAVX512F-NEXT: orl %eax, %edi +; GFNIAVX512F-NEXT: movl %edi, %eax +; GFNIAVX512F-NEXT: andl $13107, %eax # imm = 0x3333 +; GFNIAVX512F-NEXT: andl $52428, %edi # imm = 0xCCCC +; GFNIAVX512F-NEXT: shrl $2, %edi +; GFNIAVX512F-NEXT: leal (%rdi,%rax,4), %eax +; GFNIAVX512F-NEXT: movl %eax, %ecx +; GFNIAVX512F-NEXT: andl $21845, %ecx # imm = 0x5555 +; GFNIAVX512F-NEXT: andl $43690, %eax # imm = 0xAAAA +; GFNIAVX512F-NEXT: shrl %eax +; GFNIAVX512F-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX512F-NEXT: # kill: def $ax killed $ax killed $eax +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_i16: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX512BW-NEXT: rolw $8, %di +; GFNIAVX512BW-NEXT: movl %edi, %eax +; GFNIAVX512BW-NEXT: andl $3855, %eax # imm = 0xF0F +; GFNIAVX512BW-NEXT: shll $4, %eax +; GFNIAVX512BW-NEXT: andl $61680, %edi # imm = 0xF0F0 +; GFNIAVX512BW-NEXT: shrl $4, %edi +; GFNIAVX512BW-NEXT: orl %eax, %edi +; GFNIAVX512BW-NEXT: movl %edi, %eax +; GFNIAVX512BW-NEXT: andl $13107, %eax # imm = 0x3333 +; GFNIAVX512BW-NEXT: andl $52428, %edi # imm = 0xCCCC +; GFNIAVX512BW-NEXT: shrl $2, %edi +; GFNIAVX512BW-NEXT: leal (%rdi,%rax,4), %eax +; GFNIAVX512BW-NEXT: movl %eax, %ecx +; GFNIAVX512BW-NEXT: andl $21845, %ecx # imm = 0x5555 +; GFNIAVX512BW-NEXT: andl $43690, %eax # imm = 0xAAAA +; GFNIAVX512BW-NEXT: shrl %eax +; GFNIAVX512BW-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX512BW-NEXT: # kill: def $ax killed $ax killed $eax +; GFNIAVX512BW-NEXT: retq %b = call i16 @llvm.bitreverse.i16(i16 %a) ret i16 %b } @@ -170,6 +385,116 @@ ; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: vmovd %xmm0, %eax ; XOP-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_i32: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi +; GFNISSE-NEXT: bswapl %edi +; GFNISSE-NEXT: movl %edi, %eax +; GFNISSE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; GFNISSE-NEXT: shll $4, %eax +; GFNISSE-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 +; GFNISSE-NEXT: shrl $4, %edi +; GFNISSE-NEXT: orl %eax, %edi +; GFNISSE-NEXT: movl %edi, %eax +; GFNISSE-NEXT: andl $858993459, %eax # imm = 0x33333333 +; GFNISSE-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC +; GFNISSE-NEXT: shrl $2, %edi +; GFNISSE-NEXT: leal (%rdi,%rax,4), %eax +; GFNISSE-NEXT: movl %eax, %ecx +; GFNISSE-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; GFNISSE-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; GFNISSE-NEXT: shrl %eax +; GFNISSE-NEXT: leal (%rax,%rcx,2), %eax +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_i32: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX-NEXT: bswapl %edi +; GFNIAVX-NEXT: movl %edi, %eax +; GFNIAVX-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; GFNIAVX-NEXT: shll $4, %eax +; GFNIAVX-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 +; GFNIAVX-NEXT: shrl $4, %edi +; GFNIAVX-NEXT: orl %eax, %edi +; GFNIAVX-NEXT: movl %edi, %eax +; GFNIAVX-NEXT: andl $858993459, %eax # imm = 0x33333333 +; GFNIAVX-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC +; GFNIAVX-NEXT: shrl $2, %edi +; GFNIAVX-NEXT: leal (%rdi,%rax,4), %eax +; GFNIAVX-NEXT: movl %eax, %ecx +; GFNIAVX-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; GFNIAVX-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; GFNIAVX-NEXT: shrl %eax +; GFNIAVX-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX2-NEXT: bswapl %edi +; GFNIAVX2-NEXT: movl %edi, %eax +; GFNIAVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; GFNIAVX2-NEXT: shll $4, %eax +; GFNIAVX2-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 +; GFNIAVX2-NEXT: shrl $4, %edi +; GFNIAVX2-NEXT: orl %eax, %edi +; GFNIAVX2-NEXT: movl %edi, %eax +; GFNIAVX2-NEXT: andl $858993459, %eax # imm = 0x33333333 +; GFNIAVX2-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC +; GFNIAVX2-NEXT: shrl $2, %edi +; GFNIAVX2-NEXT: leal (%rdi,%rax,4), %eax +; GFNIAVX2-NEXT: movl %eax, %ecx +; GFNIAVX2-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; GFNIAVX2-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; GFNIAVX2-NEXT: shrl %eax +; GFNIAVX2-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_i32: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX512F-NEXT: bswapl %edi +; GFNIAVX512F-NEXT: movl %edi, %eax +; GFNIAVX512F-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; GFNIAVX512F-NEXT: shll $4, %eax +; GFNIAVX512F-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 +; GFNIAVX512F-NEXT: shrl $4, %edi +; GFNIAVX512F-NEXT: orl %eax, %edi +; GFNIAVX512F-NEXT: movl %edi, %eax +; GFNIAVX512F-NEXT: andl $858993459, %eax # imm = 0x33333333 +; GFNIAVX512F-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC +; GFNIAVX512F-NEXT: shrl $2, %edi +; GFNIAVX512F-NEXT: leal (%rdi,%rax,4), %eax +; GFNIAVX512F-NEXT: movl %eax, %ecx +; GFNIAVX512F-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; GFNIAVX512F-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; GFNIAVX512F-NEXT: shrl %eax +; GFNIAVX512F-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_i32: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: # kill: def $edi killed $edi def $rdi +; GFNIAVX512BW-NEXT: bswapl %edi +; GFNIAVX512BW-NEXT: movl %edi, %eax +; GFNIAVX512BW-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; GFNIAVX512BW-NEXT: shll $4, %eax +; GFNIAVX512BW-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 +; GFNIAVX512BW-NEXT: shrl $4, %edi +; GFNIAVX512BW-NEXT: orl %eax, %edi +; GFNIAVX512BW-NEXT: movl %edi, %eax +; GFNIAVX512BW-NEXT: andl $858993459, %eax # imm = 0x33333333 +; GFNIAVX512BW-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC +; GFNIAVX512BW-NEXT: shrl $2, %edi +; GFNIAVX512BW-NEXT: leal (%rdi,%rax,4), %eax +; GFNIAVX512BW-NEXT: movl %eax, %ecx +; GFNIAVX512BW-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; GFNIAVX512BW-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; GFNIAVX512BW-NEXT: shrl %eax +; GFNIAVX512BW-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX512BW-NEXT: retq %b = call i32 @llvm.bitreverse.i32(i32 %a) ret i32 %b } @@ -229,6 +554,126 @@ ; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: vmovq %xmm0, %rax ; XOP-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_i64: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: bswapq %rdi +; GFNISSE-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F +; GFNISSE-NEXT: andq %rdi, %rax +; GFNISSE-NEXT: shlq $4, %rax +; GFNISSE-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0 +; GFNISSE-NEXT: andq %rdi, %rcx +; GFNISSE-NEXT: shrq $4, %rcx +; GFNISSE-NEXT: orq %rax, %rcx +; GFNISSE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; GFNISSE-NEXT: andq %rcx, %rax +; GFNISSE-NEXT: movabsq $-3689348814741910324, %rdx # imm = 0xCCCCCCCCCCCCCCCC +; GFNISSE-NEXT: andq %rcx, %rdx +; GFNISSE-NEXT: shrq $2, %rdx +; GFNISSE-NEXT: leaq (%rdx,%rax,4), %rax +; GFNISSE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; GFNISSE-NEXT: andq %rax, %rcx +; GFNISSE-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA +; GFNISSE-NEXT: andq %rax, %rdx +; GFNISSE-NEXT: shrq %rdx +; GFNISSE-NEXT: leaq (%rdx,%rcx,2), %rax +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_i64: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: bswapq %rdi +; GFNIAVX-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F +; GFNIAVX-NEXT: andq %rdi, %rax +; GFNIAVX-NEXT: shlq $4, %rax +; GFNIAVX-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0 +; GFNIAVX-NEXT: andq %rdi, %rcx +; GFNIAVX-NEXT: shrq $4, %rcx +; GFNIAVX-NEXT: orq %rax, %rcx +; GFNIAVX-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; GFNIAVX-NEXT: andq %rcx, %rax +; GFNIAVX-NEXT: movabsq $-3689348814741910324, %rdx # imm = 0xCCCCCCCCCCCCCCCC +; GFNIAVX-NEXT: andq %rcx, %rdx +; GFNIAVX-NEXT: shrq $2, %rdx +; GFNIAVX-NEXT: leaq (%rdx,%rax,4), %rax +; GFNIAVX-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; GFNIAVX-NEXT: andq %rax, %rcx +; GFNIAVX-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA +; GFNIAVX-NEXT: andq %rax, %rdx +; GFNIAVX-NEXT: shrq %rdx +; GFNIAVX-NEXT: leaq (%rdx,%rcx,2), %rax +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_i64: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: bswapq %rdi +; GFNIAVX2-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F +; GFNIAVX2-NEXT: andq %rdi, %rax +; GFNIAVX2-NEXT: shlq $4, %rax +; GFNIAVX2-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0 +; GFNIAVX2-NEXT: andq %rdi, %rcx +; GFNIAVX2-NEXT: shrq $4, %rcx +; GFNIAVX2-NEXT: orq %rax, %rcx +; GFNIAVX2-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; GFNIAVX2-NEXT: andq %rcx, %rax +; GFNIAVX2-NEXT: movabsq $-3689348814741910324, %rdx # imm = 0xCCCCCCCCCCCCCCCC +; GFNIAVX2-NEXT: andq %rcx, %rdx +; GFNIAVX2-NEXT: shrq $2, %rdx +; GFNIAVX2-NEXT: leaq (%rdx,%rax,4), %rax +; GFNIAVX2-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; GFNIAVX2-NEXT: andq %rax, %rcx +; GFNIAVX2-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA +; GFNIAVX2-NEXT: andq %rax, %rdx +; GFNIAVX2-NEXT: shrq %rdx +; GFNIAVX2-NEXT: leaq (%rdx,%rcx,2), %rax +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_i64: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: bswapq %rdi +; GFNIAVX512F-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F +; GFNIAVX512F-NEXT: andq %rdi, %rax +; GFNIAVX512F-NEXT: shlq $4, %rax +; GFNIAVX512F-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0 +; GFNIAVX512F-NEXT: andq %rdi, %rcx +; GFNIAVX512F-NEXT: shrq $4, %rcx +; GFNIAVX512F-NEXT: orq %rax, %rcx +; GFNIAVX512F-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; GFNIAVX512F-NEXT: andq %rcx, %rax +; GFNIAVX512F-NEXT: movabsq $-3689348814741910324, %rdx # imm = 0xCCCCCCCCCCCCCCCC +; GFNIAVX512F-NEXT: andq %rcx, %rdx +; GFNIAVX512F-NEXT: shrq $2, %rdx +; GFNIAVX512F-NEXT: leaq (%rdx,%rax,4), %rax +; GFNIAVX512F-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; GFNIAVX512F-NEXT: andq %rax, %rcx +; GFNIAVX512F-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA +; GFNIAVX512F-NEXT: andq %rax, %rdx +; GFNIAVX512F-NEXT: shrq %rdx +; GFNIAVX512F-NEXT: leaq (%rdx,%rcx,2), %rax +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_i64: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: bswapq %rdi +; GFNIAVX512BW-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F +; GFNIAVX512BW-NEXT: andq %rdi, %rax +; GFNIAVX512BW-NEXT: shlq $4, %rax +; GFNIAVX512BW-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0 +; GFNIAVX512BW-NEXT: andq %rdi, %rcx +; GFNIAVX512BW-NEXT: shrq $4, %rcx +; GFNIAVX512BW-NEXT: orq %rax, %rcx +; GFNIAVX512BW-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; GFNIAVX512BW-NEXT: andq %rcx, %rax +; GFNIAVX512BW-NEXT: movabsq $-3689348814741910324, %rdx # imm = 0xCCCCCCCCCCCCCCCC +; GFNIAVX512BW-NEXT: andq %rcx, %rdx +; GFNIAVX512BW-NEXT: shrq $2, %rdx +; GFNIAVX512BW-NEXT: leaq (%rdx,%rax,4), %rax +; GFNIAVX512BW-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; GFNIAVX512BW-NEXT: andq %rax, %rcx +; GFNIAVX512BW-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA +; GFNIAVX512BW-NEXT: andq %rax, %rdx +; GFNIAVX512BW-NEXT: shrq %rdx +; GFNIAVX512BW-NEXT: leaq (%rdx,%rcx,2), %rax +; GFNIAVX512BW-NEXT: retq %b = call i64 @llvm.bitreverse.i64(i64 %a) ret i64 %b } @@ -288,6 +733,31 @@ ; XOP: # %bb.0: ; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v16i8: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: gf2p8affineqb $0, {{.*}}(%rip), %xmm0 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v16i8: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v16i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v16i8: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v16i8: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX512BW-NEXT: retq %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) ret <16 x i8> %b } @@ -353,6 +823,36 @@ ; XOP: # %bb.0: ; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v8i16: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNISSE-NEXT: gf2p8affineqb $0, {{.*}}(%rip), %xmm0 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v8i16: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v8i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v8i16: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v8i16: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX512BW-NEXT: retq %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) ret <8 x i16> %b } @@ -423,6 +923,36 @@ ; XOP: # %bb.0: ; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v4i32: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNISSE-NEXT: gf2p8affineqb $0, {{.*}}(%rip), %xmm0 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v4i32: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v4i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v4i32: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v4i32: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX512BW-NEXT: retq %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) ret <4 x i32> %b } @@ -495,6 +1025,36 @@ ; XOP: # %bb.0: ; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v2i64: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNISSE-NEXT: gf2p8affineqb $0, {{.*}}(%rip), %xmm0 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v2i64: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v2i64: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v2i64: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v2i64: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{.*}}(%rip), %xmm0, %xmm0 +; GFNIAVX512BW-NEXT: retq %b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) ret <2 x i64> %b } @@ -634,6 +1194,40 @@ ; XOPAVX2-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v32i8: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745] +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm2, %xmm0 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm2, %xmm1 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v32i8: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745] +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm2, %xmm1, %xmm1 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm2, %xmm0, %xmm0 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v32i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v32i8: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v32i8: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX512BW-NEXT: retq %b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) ret <32 x i8> %b } @@ -789,6 +1383,49 @@ ; XOPAVX2-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v16i16: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNISSE-NEXT: pshufb %xmm2, %xmm0 +; GFNISSE-NEXT: movdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745] +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 +; GFNISSE-NEXT: pshufb %xmm2, %xmm1 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v16i16: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745] +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm1, %xmm1 +; GFNIAVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm0, %xmm0 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v16i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v16i16: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v16i16: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; GFNIAVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX512BW-NEXT: retq %b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ret <16 x i16> %b } @@ -953,6 +1590,49 @@ ; XOPAVX2-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v8i32: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNISSE-NEXT: pshufb %xmm2, %xmm0 +; GFNISSE-NEXT: movdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745] +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 +; GFNISSE-NEXT: pshufb %xmm2, %xmm1 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v8i32: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745] +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm1, %xmm1 +; GFNIAVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm0, %xmm0 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v8i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v8i32: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v8i32: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; GFNIAVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX512BW-NEXT: retq %b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ret <8 x i32> %b } @@ -1121,6 +1801,49 @@ ; XOPAVX2-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v4i64: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNISSE-NEXT: pshufb %xmm2, %xmm0 +; GFNISSE-NEXT: movdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745] +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 +; GFNISSE-NEXT: pshufb %xmm2, %xmm1 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v4i64: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745] +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm1, %xmm1 +; GFNIAVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm0, %xmm0 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v4i64: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v4i64: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] +; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v4i64: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] +; GFNIAVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0 +; GFNIAVX512BW-NEXT: retq %b = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ret <4 x i64> %b } @@ -1366,6 +2089,49 @@ ; XOPAVX2-NEXT: vpperm %xmm3, %xmm1, %xmm0, %xmm1 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v64i8: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [9241421688590303745,9241421688590303745] +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm4, %xmm0 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm4, %xmm1 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm4, %xmm2 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm4, %xmm3 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v64i8: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vextractf128 $1, %ymm0, %xmm2 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745] +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm2, %xmm2 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm0, %xmm0 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; GFNIAVX-NEXT: vextractf128 $1, %ymm1, %xmm2 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm2, %xmm2 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm3, %xmm1, %xmm1 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v64i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v64i8: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1 +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v64i8: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GFNIAVX512BW-NEXT: retq %b = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) ret <64 x i8> %b } @@ -1645,6 +2411,66 @@ ; XOPAVX2-NEXT: vpperm %xmm3, %xmm1, %xmm0, %xmm1 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v32i16: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNISSE-NEXT: pshufb %xmm4, %xmm0 +; GFNISSE-NEXT: movdqa {{.*#+}} xmm5 = [9241421688590303745,9241421688590303745] +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 +; GFNISSE-NEXT: pshufb %xmm4, %xmm1 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 +; GFNISSE-NEXT: pshufb %xmm4, %xmm2 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 +; GFNISSE-NEXT: pshufb %xmm4, %xmm3 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v32i16: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vextractf128 $1, %ymm0, %xmm2 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm3 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm4 = [9241421688590303745,9241421688590303745] +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm0, %xmm0 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; GFNIAVX-NEXT: vextractf128 $1, %ymm1, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm1, %xmm1 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v32i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v32i16: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; GFNIAVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1 +; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v32i16: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30,33,32,35,34,37,36,39,38,41,40,43,42,45,44,47,46,49,48,51,50,53,52,55,54,57,56,59,58,61,60,63,62] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GFNIAVX512BW-NEXT: retq %b = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ret <32 x i16> %b } @@ -1941,6 +2767,66 @@ ; XOPAVX2-NEXT: vpperm %xmm3, %xmm1, %xmm0, %xmm1 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v16i32: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNISSE-NEXT: pshufb %xmm4, %xmm0 +; GFNISSE-NEXT: movdqa {{.*#+}} xmm5 = [9241421688590303745,9241421688590303745] +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 +; GFNISSE-NEXT: pshufb %xmm4, %xmm1 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 +; GFNISSE-NEXT: pshufb %xmm4, %xmm2 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 +; GFNISSE-NEXT: pshufb %xmm4, %xmm3 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v16i32: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vextractf128 $1, %ymm0, %xmm2 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm3 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm4 = [9241421688590303745,9241421688590303745] +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm0, %xmm0 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; GFNIAVX-NEXT: vextractf128 $1, %ymm1, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm1, %xmm1 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v16i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v16i32: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; GFNIAVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1 +; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v16i32: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28,35,34,33,32,39,38,37,36,43,42,41,40,47,46,45,44,51,50,49,48,55,54,53,52,59,58,57,56,63,62,61,60] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GFNIAVX512BW-NEXT: retq %b = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ret <16 x i32> %b } @@ -2245,6 +3131,66 @@ ; XOPAVX2-NEXT: vpperm %xmm3, %xmm1, %xmm0, %xmm1 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: retq +; +; GFNISSE-LABEL: test_bitreverse_v8i64: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNISSE-NEXT: pshufb %xmm4, %xmm0 +; GFNISSE-NEXT: movdqa {{.*#+}} xmm5 = [9241421688590303745,9241421688590303745] +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 +; GFNISSE-NEXT: pshufb %xmm4, %xmm1 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 +; GFNISSE-NEXT: pshufb %xmm4, %xmm2 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 +; GFNISSE-NEXT: pshufb %xmm4, %xmm3 +; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: test_bitreverse_v8i64: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vextractf128 $1, %ymm0, %xmm2 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm3 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; GFNIAVX-NEXT: vmovdqa {{.*#+}} xmm4 = [9241421688590303745,9241421688590303745] +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm0, %xmm0 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; GFNIAVX-NEXT: vextractf128 $1, %ymm1, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2 +; GFNIAVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; GFNIAVX-NEXT: vgf2p8affineqb $0, %xmm4, %xmm1, %xmm1 +; GFNIAVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v8i64: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: test_bitreverse_v8i64: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; GFNIAVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1 +; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0 +; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: test_bitreverse_v8i64: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24,39,38,37,36,35,34,33,32,47,46,45,44,43,42,41,40,55,54,53,52,51,50,49,48,63,62,61,60,59,58,57,56] +; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GFNIAVX512BW-NEXT: retq %b = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ret <8 x i64> %b } @@ -2277,6 +3223,31 @@ ; XOP: # %bb.0: ; XOP-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,64,191,32,223,96,159,16,239,80,175,48,207,112,143] ; XOP-NEXT: retq +; +; GFNISSE-LABEL: fold_bitreverse_v16i8: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,64,191,32,223,96,159,16,239,80,175,48,207,112,143] +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: fold_bitreverse_v16i8: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,64,191,32,223,96,159,16,239,80,175,48,207,112,143] +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: fold_bitreverse_v16i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,64,191,32,223,96,159,16,239,80,175,48,207,112,143] +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: fold_bitreverse_v16i8: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,64,191,32,223,96,159,16,239,80,175,48,207,112,143] +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: fold_bitreverse_v16i8: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,64,191,32,223,96,159,16,239,80,175,48,207,112,143] +; GFNIAVX512BW-NEXT: retq %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> ) ret <16 x i8> %b } @@ -2297,6 +3268,32 @@ ; XOP: # %bb.0: ; XOP-NEXT: vmovaps {{.*#+}} ymm0 = [0,65535,16384,49151,8192,57343,24576,40959,4096,61439,20480,45055,12288,53247,28672,36863] ; XOP-NEXT: retq +; +; GFNISSE-LABEL: fold_bitreverse_v16i16: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movaps {{.*#+}} xmm0 = [0,65535,16384,49151,8192,57343,24576,40959] +; GFNISSE-NEXT: movaps {{.*#+}} xmm1 = [4096,61439,20480,45055,12288,53247,28672,36863] +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: fold_bitreverse_v16i16: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,65535,16384,49151,8192,57343,24576,40959,4096,61439,20480,45055,12288,53247,28672,36863] +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: fold_bitreverse_v16i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,65535,16384,49151,8192,57343,24576,40959,4096,61439,20480,45055,12288,53247,28672,36863] +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: fold_bitreverse_v16i16: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [0,65535,16384,49151,8192,57343,24576,40959,4096,61439,20480,45055,12288,53247,28672,36863] +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: fold_bitreverse_v16i16: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vmovaps {{.*#+}} ymm0 = [0,65535,16384,49151,8192,57343,24576,40959,4096,61439,20480,45055,12288,53247,28672,36863] +; GFNIAVX512BW-NEXT: retq %b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> ) ret <16 x i16> %b } @@ -2332,6 +3329,36 @@ ; XOP-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,1073741824,3221225471,536870912,3758096383,1610612736,2684354559] ; XOP-NEXT: vmovaps {{.*#+}} ymm1 = [268435456,4026531839,1342177280,2952790015,805306368,3489660927,1879048192,2415919103] ; XOP-NEXT: retq +; +; GFNISSE-LABEL: fold_bitreverse_v16i32: +; GFNISSE: # %bb.0: +; GFNISSE-NEXT: movaps {{.*#+}} xmm0 = [0,4294967295,1073741824,3221225471] +; GFNISSE-NEXT: movaps {{.*#+}} xmm1 = [536870912,3758096383,1610612736,2684354559] +; GFNISSE-NEXT: movaps {{.*#+}} xmm2 = [268435456,4026531839,1342177280,2952790015] +; GFNISSE-NEXT: movaps {{.*#+}} xmm3 = [805306368,3489660927,1879048192,2415919103] +; GFNISSE-NEXT: retq +; +; GFNIAVX-LABEL: fold_bitreverse_v16i32: +; GFNIAVX: # %bb.0: +; GFNIAVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,1073741824,3221225471,536870912,3758096383,1610612736,2684354559] +; GFNIAVX-NEXT: vmovaps {{.*#+}} ymm1 = [268435456,4026531839,1342177280,2952790015,805306368,3489660927,1879048192,2415919103] +; GFNIAVX-NEXT: retq +; +; GFNIAVX2-LABEL: fold_bitreverse_v16i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,1073741824,3221225471,536870912,3758096383,1610612736,2684354559] +; GFNIAVX2-NEXT: vmovaps {{.*#+}} ymm1 = [268435456,4026531839,1342177280,2952790015,805306368,3489660927,1879048192,2415919103] +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512F-LABEL: fold_bitreverse_v16i32: +; GFNIAVX512F: # %bb.0: +; GFNIAVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,4294967295,1073741824,3221225471,536870912,3758096383,1610612736,2684354559,268435456,4026531839,1342177280,2952790015,805306368,3489660927,1879048192,2415919103] +; GFNIAVX512F-NEXT: retq +; +; GFNIAVX512BW-LABEL: fold_bitreverse_v16i32: +; GFNIAVX512BW: # %bb.0: +; GFNIAVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [0,4294967295,1073741824,3221225471,536870912,3758096383,1610612736,2684354559,268435456,4026531839,1342177280,2952790015,805306368,3489660927,1879048192,2415919103] +; GFNIAVX512BW-NEXT: retq %b = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> ) ret <16 x i32> %b }