Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -448,6 +448,7 @@ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, + { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 }, { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, @@ -509,6 +510,7 @@ // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, + // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, @@ -526,6 +528,7 @@ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 }, { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 }, + { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 }, { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, @@ -559,6 +562,7 @@ { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, // AVX 256-bit foldable instructions + { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 }, { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, Index: test/CodeGen/X86/avx1-stack-reload-folding.ll =================================================================== --- test/CodeGen/X86/avx1-stack-reload-folding.ll +++ test/CodeGen/X86/avx1-stack-reload-folding.ll @@ -3,8 +3,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" -; Function Attrs: nounwind readonly uwtable -define <32 x double> @_Z14vstack_foldDv32_dS_(<32 x double> %a, <32 x double> %b) #0 { +define <32 x double> @stack_fold_vmulpd(<32 x double> %a, <32 x double> %b) #0 { %1 = fadd <32 x double> %a, %b %2 = fsub <32 x double> %a, %b %3 = fmul <32 x double> %1, %2 @@ -14,3 +13,15 @@ ;CHECK: vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload ;CHECK-NOT: vmovapd {{.*#+}} 32-byte Reload } + +define <128 x float> @stack_fold_cvtdq2ps(<128 x i32> %a, <128 x i32> %b, <128 x i32> %c, <128 x i32> %d) #0 { + %1 = and <128 x i32> %a, %b + %2 = and <128 x i32> %c, %d + %3 = sitofp <128 x i32> %1 to <128 x float> + %4 = sitofp <128 x i32> %2 to <128 x float> + %5 = fadd <128 x float> %3, %4 + ret <128 x float> %5 + + ;CHECK-NOT: vmovaps {{.*#+}} 32-byte Reload + ;CHECK: vcvtdq2ps {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload +}