diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32994,11 +32994,6 @@ bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { unsigned Bits = Ty->getScalarSizeInBits(); - // 8-bit shifts are always expensive, but versions with a scalar amount aren't - // particularly cheaper than those without. - if (Bits == 8) - return false; - // XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts. // Splitting for v32i8/v16i16 on XOP+AVX2 targets is still preferred. if (Subtarget.hasXOP() && diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { @@ -87,12 +87,39 @@ } define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { -; ALL-LABEL: @vector_variable_shift_right_v32i8( -; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] -; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] -; ALL-NEXT: ret <32 x i8> [[SH]] +; AVX1-LABEL: @vector_variable_shift_right_v32i8( +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX1-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX2-LABEL: @vector_variable_shift_right_v32i8( +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX2-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX512BW-LABEL: @vector_variable_shift_right_v32i8( +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX512BW-NEXT: ret <32 x i8> [[TMP3]] +; +; XOP-LABEL: @vector_variable_shift_right_v32i8( +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] +; XOP-NEXT: ret <32 x i8> [[SH]] ; %splat1 = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { @@ -85,12 +85,39 @@ } define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { -; ALL-LABEL: @vector_variable_shift_right_v32i8( -; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer -; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer -; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] -; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] -; ALL-NEXT: ret <32 x i8> [[SH]] +; AVX1-LABEL: @vector_variable_shift_right_v32i8( +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX1-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX2-LABEL: @vector_variable_shift_right_v32i8( +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX2-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX512BW-LABEL: @vector_variable_shift_right_v32i8( +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX512BW-NEXT: ret <32 x i8> [[TMP3]] +; +; XOP-LABEL: @vector_variable_shift_right_v32i8( +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] +; XOP-NEXT: ret <32 x i8> [[SH]] ; %splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer %splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll @@ -8,14 +8,34 @@ target triple = "x86_64-apple-darwin10.9.0" define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { -; CHECK-LABEL: @test_8bit( -; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if_true: -; CHECK-NEXT: ret <16 x i8> [[MASK]] -; CHECK: if_false: -; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] -; CHECK-NEXT: ret <16 x i8> [[RES]] +; CHECK-SSE2-LABEL: @test_8bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <16 x i8> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-XOP-LABEL: @test_8bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <16 x i8> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-AVX-LABEL: @test_8bit( +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX: if_true: +; CHECK-AVX-NEXT: ret <16 x i8> [[MASK]] +; CHECK-AVX: if_false: +; CHECK-AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-AVX-NEXT: ret <16 x i8> [[RES]] ; %mask = shufflevector <16 x i8> %tmp, <16 x i8> poison, <16 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll @@ -8,14 +8,34 @@ target triple = "x86_64-apple-darwin10.9.0" define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { -; CHECK-LABEL: @test_8bit( -; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if_true: -; CHECK-NEXT: ret <16 x i8> [[MASK]] -; CHECK: if_false: -; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] -; CHECK-NEXT: ret <16 x i8> [[RES]] +; CHECK-SSE2-LABEL: @test_8bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <16 x i8> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-XOP-LABEL: @test_8bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <16 x i8> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-AVX-LABEL: @test_8bit( +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX: if_true: +; CHECK-AVX-NEXT: ret <16 x i8> [[MASK]] +; CHECK-AVX: if_false: +; CHECK-AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-AVX-NEXT: ret <16 x i8> [[RES]] ; %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false