Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -72,6 +72,10 @@ // FP scalar memory operand for intrinsics - ssmem/sdmem. Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast("ssmem"), !if (!eq (EltTypeName, "f64"), !cast("sdmem"), ?)); + ComplexPattern IntLoadScalarPat = !if (!eq (EltTypeName, "f32"), + !cast("sse_load_f32"), + !if (!eq (EltTypeName, "f64"), + !cast("sse_load_f64"), ?)); // Load patterns // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64 @@ -4143,10 +4147,10 @@ itins.rr>; defm rm_Int : AVX512_maskable_scalar; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { @@ -5510,7 +5514,7 @@ "$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base; defm m_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base; defm rb_Int: AVX512_maskable_3src_scalar, VEX, VEX_L, VEX_WIG; let ExeDomain = SSEPackedInt in def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), + (ins i256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4i64 VR256:$src), addr:$dst)], @@ -8385,7 +8385,8 @@ // multiclass avx2_perm opc, string OpcodeStr, PatFrag mem_frag, - ValueType OpVT, X86FoldableSchedWrite Sched> { + ValueType OpVT, X86FoldableSchedWrite Sched, + X86MemOperand memOp> { let Predicates = [HasAVX2, NoVLX] in { def Yrr : AVX28I, Sched<[Sched]>, VEX_4V, VEX_L; def Yrm : AVX28I; +defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256, + i256mem>; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>; +defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256, + f256mem>; multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, - ValueType OpVT, X86FoldableSchedWrite Sched> { + ValueType OpVT, X86FoldableSchedWrite Sched, + X86MemOperand memOp> { let Predicates = [HasAVX2, NoVLX] in { def Yri : AVX2AIi8, Sched<[Sched]>, VEX, VEX_L; def Ymi : AVX2AIi8, VEX_W; + WriteShuffle256, i256mem>, VEX_W; let ExeDomain = SSEPackedDouble in defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, - WriteFShuffle256>, VEX_W; + WriteFShuffle256, f256mem>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks