Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1641,7 +1641,7 @@ multiclass avx512_move_scalar { - let hasSideEffects = 0 in { + let hasSideEffects = 0, AddedComplexity = 30 in { def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128X:$dst, (vt (OpNode VR128X:$src1, @@ -3418,7 +3418,8 @@ (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))), (VCVTPH2PSZrr VR256X:$src)>; -let Defs = [EFLAGS], Predicates = [HasAVX512] in { +let Defs = [EFLAGS], Predicates = [HasAVX512], + AddedComplexity = 10 in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; Index: lib/Target/X86/X86InstrFormats.td =================================================================== --- lib/Target/X86/X86InstrFormats.td +++ lib/Target/X86/X86InstrFormats.td @@ -410,7 +410,7 @@ list pattern, InstrItinClass itin = NoItinerary> : I { let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512], - !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX], + !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX], !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1], !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2], !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2], @@ -427,7 +427,7 @@ list pattern, InstrItinClass itin = NoItinerary> : Ii8 { let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512], - !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX], + !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX], !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1], [UseSSE2]))); Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -1850,14 +1850,14 @@ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>, + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>, + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { @@ -1935,14 +1935,14 @@ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>, + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>, + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, @@ -5063,7 +5063,7 @@ //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // -let Predicates = [UseAVX] in +let Predicates = [HasAVX] in def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), (VMOVPQI2QImr addr:$dst, VR128:$src)>; let Predicates = [UseSSE2] in Index: test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86.ll +++ test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s --check-prefix CHECK --check-prefix AVX +; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=skx | FileCheck %s --check-prefix CHECK --check-prefix SKX define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vaesdec @@ -2192,7 +2193,8 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions - ; CHECK: vmovups + ; AVX: vmovups + ; SKX: vmovdqu ; add operation forces the execution domain. %a2 = add <32 x i8> %a1, call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)