Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -526,6 +526,8 @@ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 }, { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 }, + { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, @@ -559,6 +561,8 @@ { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, // AVX 256-bit foldable instructions + { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 }, + { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 }, { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, @@ -930,8 +934,6 @@ { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, - { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 }, - { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, Index: test/CodeGen/X86/avx1-stack-reload-folding.ll =================================================================== --- test/CodeGen/X86/avx1-stack-reload-folding.ll +++ test/CodeGen/X86/avx1-stack-reload-folding.ll @@ -14,3 +14,29 @@ ;CHECK: vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload ;CHECK-NOT: vmovapd {{.*#+}} 32-byte Reload } + +define <64 x i32> @stack_fold_cvttpd2dq(<64 x double> %a, <64 x double> %b) #0 { + %1 = fadd <64 x double> %a, %b + %2 = fsub <64 x double> %a, %b + %3 = fptosi <64 x double> %1 to <64 x i32> + %4 = fptosi <64 x double> %2 to <64 x i32> + %5 = or <64 x i32> %3, %4 + ret <64 x i32> %5 + + ;CHECK-NOT: vmovapd {{.*#+}} 32-byte Reload + ;CHECK: vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + ;CHECK-NOT: vmovapd {{.*#+}} 32-byte Reload +} + +define <64 x i32> @stack_fold_cvttps2dq(<64 x float> %a, <64 x float> %b) #0 { + %1 = fadd <64 x float> %a, %b + %2 = fsub <64 x float> %a, %b + %3 = fptosi <64 x float> %1 to <64 x i32> + %4 = fptosi <64 x float> %2 to <64 x i32> + %5 = or <64 x i32> %3, %4 + ret <64 x i32> %5 + + ;CHECK-NOT: vmovaps {{.*#+}} 32-byte Reload + ;CHECK: vcvttps2dq {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + ;CHECK-NOT: vmovaps {{.*#+}} 32-byte Reload +}