Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1132,19 +1132,14 @@ for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); - + for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { + setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); + setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); + setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); + setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); + setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); + setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); + } setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::SETCC, MVT::i1, Custom); setOperationAction(ISD::SETCCE, MVT::i1, Custom); @@ -1246,7 +1241,20 @@ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); + + // FIXME. This commands are available on SSE/AVX2, add relevant patterns. + setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal); } + setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); @@ -1433,6 +1441,7 @@ setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); @@ -1493,6 +1502,15 @@ setOperationPromotedToType(ISD::OR, VT, MVT::v8i64); setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64); } + + for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { + setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); + if (Subtarget.hasVLX()) { + // FIXME. This commands are available on SSE/AVX2, add relevant patterns. + setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal); + setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal); + } + } } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -6380,9 +6380,10 @@ } multiclass avx512_extend_common opc, string OpcodeStr, - X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, - X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ - + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, + X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode, + bit IsCodeGenOnly>{ + let isCodeGenOnly = IsCodeGenOnly in { defm rr : AVX512_maskable, @@ -6394,145 +6395,159 @@ (DestInfo.VT (LdFrag addr:$src))>, EVEX; } + }//isCodeGenOnly } // support full register inputs (like SSE paterns) -multiclass avx512_extend_lowering { def : Pat<(To.VT (OpNode (From.VT From.RC:$src))), (!cast(NAME#To.ZSuffix#"rr") (EXTRACT_SUBREG From.RC:$src, SubRegIdx))>; } -multiclass avx512_extend_BW opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_extend_BW opc, string OpcodeStr, + SDPatternOperator OpNode, bit IsCodeGenOnly, string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasBWI] in { defm Z128: avx512_extend_common, + v16i8x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128; defm Z256: avx512_extend_common, + v16i8x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>, avx512_extend_lowering, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasBWI] in { defm Z : avx512_extend_common, + v32i8x_info, i256mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512; } } -multiclass avx512_extend_BD opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_extend_BD opc, string OpcodeStr, + SDPatternOperator OpNode, bit IsCodeGenOnly, string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, + v16i8x_info, i32mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128; defm Z256: avx512_extend_common, + v16i8x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>, avx512_extend_lowering, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, + v16i8x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512; } } -multiclass avx512_extend_BQ opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_extend_BQ opc, string OpcodeStr, + SDPatternOperator OpNode, bit IsCodeGenOnly, string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, + v16i8x_info, i16mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128; defm Z256: avx512_extend_common, + v16i8x_info, i32mem, LdFrag, OpNode, IsCodeGenOnly>, avx512_extend_lowering, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, + v16i8x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512; } } -multiclass avx512_extend_WD opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_extend_WD opc, string OpcodeStr, + SDPatternOperator OpNode, bit IsCodeGenOnly, string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, + v8i16x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128; defm Z256: avx512_extend_common, + v8i16x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>, avx512_extend_lowering, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, + v16i16x_info, i256mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512; } } -multiclass avx512_extend_WQ opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_extend_WQ opc, string OpcodeStr, + SDPatternOperator OpNode, bit IsCodeGenOnly, string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, + v8i16x_info, i32mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128; defm Z256: avx512_extend_common, + v8i16x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>, avx512_extend_lowering, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, + v8i16x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512; } } -multiclass avx512_extend_DQ opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_extend_DQ opc, string OpcodeStr, + SDPatternOperator OpNode, bit IsCodeGenOnly, string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, + v4i32x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; defm Z256: avx512_extend_common, + v4i32x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>, avx512_extend_lowering, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, + v8i32x_info, i256mem, LdFrag, OpNode, IsCodeGenOnly>, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; } } -defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">; -defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">; -defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">; -defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">; -defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">; -defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">; - - -defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">; -defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">; -defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">; -defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">; -defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">; -defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">; +defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, 0, "z">; +defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, 0, "z">; +defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, 0, "z">; +defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, 0, "z">; +defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, 0, "z">; +defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, 0, "z">; + +defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, 0, "s">; +defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, 0, "s">; +defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, 0, "s">; +defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, 0, "s">; +defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, 0, "s">; +defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, 0, "s">; + +// EXTLOAD patterns, implemented using vpmovz +defm VPMOVAXBW : avx512_extend_BW<0x30, "vpmovzxbw", null_frag, 1, "">; +defm VPMOVAXBD : avx512_extend_BD<0x31, "vpmovzxbd", null_frag, 1, "">; +defm VPMOVAXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", null_frag, 1, "">; +defm VPMOVAXWD : avx512_extend_WD<0x33, "vpmovzxwd", null_frag, 1, "">; +defm VPMOVAXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", null_frag, 1, "">; +defm VPMOVAXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", null_frag, 1, "">; //===----------------------------------------------------------------------===// // GATHER - SCATTER Operations Index: llvm/trunk/test/CodeGen/X86/avx512-any_extend_load.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-any_extend_load.ll +++ llvm/trunk/test/CodeGen/X86/avx512-any_extend_load.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gn -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gn -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX + + +define void @any_extend_load_v8i64(<8 x i8> * %ptr) { +; ALL-LABEL: any_extend_load_v8i64: +; ALL: # BB#0: +; ALL-NEXT: vpmovzxbq (%rdi), %zmm0 +; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; ALL-NEXT: vpmovqb %zmm0, (%rdi) +; ALL-NEXT: retq + %wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1 + %1 = zext <8 x i8> %wide.load to <8 x i64> + %2 = add nuw nsw <8 x i64> %1, + %3 = xor <8 x i64> %2, zeroinitializer + %4 = trunc <8 x i64> %3 to <8 x i8> + store <8 x i8> %4, <8 x i8>* %ptr, align 1 + ret void +} + +define void @any_extend_load_v8i32(<8 x i8> * %ptr) { +; KNL-LABEL: any_extend_load_v8i32: +; KNL: # BB#0: +; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; KNL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpmovdw %zmm0, %ymm0 +; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; KNL-NEXT: vmovq %xmm0, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: any_extend_load_v8i32: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxbd (%rdi), %ymm0 +; SKX-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; SKX-NEXT: vpmovdb %ymm0, (%rdi) +; SKX-NEXT: retq + %wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1 + %1 = zext <8 x i8> %wide.load to <8 x i32> + %2 = add nuw nsw <8 x i32> %1, + %3 = xor <8 x i32> %2, zeroinitializer + %4 = trunc <8 x i32> %3 to <8 x i8> + store <8 x i8> %4, <8 x i8>* %ptr, align 1 + ret void +} + +define void @any_extend_load_v8i16(<8 x i8> * %ptr) { +; KNL-LABEL: any_extend_load_v8i16: +; KNL: # BB#0: +; KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; KNL-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; KNL-NEXT: vmovq %xmm0, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: any_extend_load_v8i16: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxbw (%rdi), %xmm0 +; SKX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpmovwb %xmm0, (%rdi) +; SKX-NEXT: retq + %wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1 + %1 = zext <8 x i8> %wide.load to <8 x i16> + %2 = add nuw nsw <8 x i16> %1, + %3 = xor <8 x i16> %2, zeroinitializer + %4 = trunc <8 x i16> %3 to <8 x i8> + store <8 x i8> %4, <8 x i8>* %ptr, align 1 + ret void +}