Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -5890,45 +5890,48 @@ multiclass SS41I_pmovx_rm_all opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, OpndItins SSEItins, OpndItins AVXItins, - OpndItins AVX2Itins> { + OpndItins AVX2Itins, Predicate prd> { defm NAME : SS41I_pmovx_rrrm; - let Predicates = [HasAVX, NoVLX] in + let Predicates = [HasAVX, prd] in defm V#NAME : SS41I_pmovx_rrrm, VEX; - let Predicates = [HasAVX2, NoVLX] in + let Predicates = [HasAVX2, prd] in defm V#NAME#Y : SS41I_pmovx_rrrm, VEX, VEX_L; } -multiclass SS41I_pmovx_rm opc, string OpcodeStr, - X86MemOperand MemOp, X86MemOperand MemYOp> { +multiclass SS41I_pmovx_rm opc, string OpcodeStr, X86MemOperand MemOp, + X86MemOperand MemYOp, Predicate prd> { defm PMOVSX#NAME : SS41I_pmovx_rm_all; + DEFAULT_ITINS_SHUFFLESCHED, prd>; defm PMOVZX#NAME : SS41I_pmovx_rm_all; + DEFAULT_ITINS_SHUFFLESCHED, prd>; } -defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem>; -defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem>; -defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem>; +defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; +defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>; +defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>; -defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem>; -defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>; +defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>; +defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; -defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>; +defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; // AVX2 Patterns multiclass SS41I_pmovx_avx2_patterns { // Register-Register patterns + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), (!cast(OpcPrefix#BWYrr) VR128:$src)>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))), (!cast(OpcPrefix#BDYrr) VR128:$src)>; def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))), @@ -5941,10 +5944,13 @@ def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), (!cast(OpcPrefix#DQYrr) VR128:$src)>; - + } // On AVX2, we also support 256bit inputs. + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))), (!cast(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))), (!cast(OpcPrefix#BDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; def : Pat<(v4i64 (ExtOp (v32i8 VR256:$src))), @@ -5957,10 +5963,14 @@ def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))), (!cast(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + } // Simple Register-Memory patterns + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (!cast(ExtTy#"extloadvi8") addr:$src)), (!cast(OpcPrefix#BWYrm) addr:$src)>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8i32 (!cast(ExtTy#"extloadvi8") addr:$src)), (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v4i64 (!cast(ExtTy#"extloadvi8") addr:$src)), @@ -5973,8 +5983,10 @@ def : Pat<(v4i64 (!cast(ExtTy#"extloadvi32") addr:$src)), (!cast(OpcPrefix#DQYrm) addr:$src)>; - + } + // AVX2 Register-Memory patterns + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BWYrm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), @@ -5983,7 +5995,8 @@ (!cast(OpcPrefix#BWYrm) addr:$src)>; def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BWYrm) addr:$src)>; - + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), @@ -6028,18 +6041,20 @@ (!cast(OpcPrefix#DQYrm) addr:$src)>; def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#DQYrm) addr:$src)>; + } } -let Predicates = [HasAVX2, NoVLX] in { - defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>; - defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>; -} +defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>; +defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>; // SSE4.1/AVX patterns. multiclass SS41I_pmovx_patterns { + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), (!cast(OpcPrefix#BWrr) VR128:$src)>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), (!cast(OpcPrefix#BDrr) VR128:$src)>; def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), @@ -6052,9 +6067,12 @@ def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), (!cast(OpcPrefix#DQrr) VR128:$src)>; - + } + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v8i16 (!cast(ExtTy#"extloadvi8") addr:$src)), (!cast(OpcPrefix#BWrm) addr:$src)>; + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (!cast(ExtTy#"extloadvi8") addr:$src)), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v2i64 (!cast(ExtTy#"extloadvi8") addr:$src)), @@ -6067,7 +6085,8 @@ def : Pat<(v2i64 (!cast(ExtTy#"extloadvi32") addr:$src)), (!cast(OpcPrefix#DQrm) addr:$src)>; - + } + let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), @@ -6078,7 +6097,8 @@ (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#BWrm) addr:$src)>; - + } + let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), @@ -6127,12 +6147,11 @@ (!cast(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), (!cast(OpcPrefix#DQrm) addr:$src)>; + } } -let Predicates = [HasAVX, NoVLX] in { - defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>; - defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>; -} +defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>; +defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>; let Predicates = [UseSSE41] in { defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>; Index: test/CodeGen/X86/avx-isa-check.ll =================================================================== --- test/CodeGen/X86/avx-isa-check.ll +++ test/CodeGen/X86/avx-isa-check.ll @@ -568,3 +568,10 @@ %shift = shl <8 x i16> %a, ret <8 x i16> %shift } + +define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> + %C = zext <8 x i8> %B to <8 x i16> + ret <8 x i16> %C +}