Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -3589,7 +3589,7 @@ let ExeDomain = SSEPackedInt in def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), + (ins i128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], @@ -3609,7 +3609,7 @@ IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG; let ExeDomain = SSEPackedInt in def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), + (ins i256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4i64 VR256:$src), addr:$dst)], @@ -8383,7 +8383,8 @@ // multiclass avx2_perm opc, string OpcodeStr, PatFrag mem_frag, - ValueType OpVT, X86FoldableSchedWrite Sched> { + ValueType OpVT, X86FoldableSchedWrite Sched, + X86MemOperand memOp> { let Predicates = [HasAVX2, NoVLX] in { def Yrr : AVX28I, Sched<[Sched]>, VEX_4V, VEX_L; def Yrm : AVX28I; +defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256, + i256mem>; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>; +defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256, + f256mem>; multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, - ValueType OpVT, X86FoldableSchedWrite Sched> { + ValueType OpVT, X86FoldableSchedWrite Sched, + X86MemOperand memOp> { let Predicates = [HasAVX2, NoVLX] in { def Yri : AVX2AIi8, Sched<[Sched]>, VEX, VEX_L; def Ymi : AVX2AIi8, VEX_W; + WriteShuffle256, i256mem>, VEX_W; let ExeDomain = SSEPackedDouble in defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, - WriteFShuffle256>, VEX_W; + WriteFShuffle256, f256mem>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks