Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
===================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td
@@ -488,8 +488,6 @@
               Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
@@ -1725,12 +1723,8 @@
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
   def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
         Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-  def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
   def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
         Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
 }
 
 // Vector bit test
Index: llvm/trunk/lib/IR/AutoUpgrade.cpp
===================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp
@@ -185,6 +185,8 @@
         Name == "x86.sse2.cvtps2pd" ||
         Name == "x86.avx.cvtdq2.pd.256" ||
         Name == "x86.avx.cvt.ps2.pd.256" ||
+        Name == "x86.sse2.cvttps2dq" ||
+        Name.startswith("x86.avx.cvtt.") ||
         Name.startswith("x86.avx.vinsertf128.") ||
         Name == "x86.avx2.vinserti128" ||
         Name.startswith("x86.avx.vextractf128.") ||
@@ -498,6 +500,12 @@
         Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
       else
         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
+    } else if (Name == "llvm.x86.sse2.cvttps2dq" ||
+               Name.startswith("llvm.x86.avx.cvtt.")) {
+      // Truncation (round to zero) float/double to i32 vector conversion.
+      Value *Src = CI->getArgOperand(0);
+      VectorType *DstTy = cast<VectorType>(CI->getType());
+      Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt");
     } else if (Name.startswith("llvm.x86.avx.movnt.")) {
       Module *M = F->getParent();
       SmallVector<Metadata *, 1> Elts;
Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td
===================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td
@@ -2013,35 +2013,24 @@
 // SSE2 packed instructions with XS prefix
 def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst,
-                           (int_x86_sse2_cvttps2dq VR128:$src))],
-                         IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
+                         [], IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
 def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst, (int_x86_sse2_cvttps2dq
-                                            (loadv4f32 addr:$src)))],
-                         IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+                         [], IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
 def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
-                          [(set VR256:$dst,
-                            (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
-                          IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+                          [], IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
-                          [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
-                                             (loadv8f32 addr:$src)))],
-                          IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
+                          [], IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
                           Sched<[WriteCvtF2ILd]>;
 
 def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
-                       IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
+                       [], IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
 def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst,
-                         (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
-                       IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
+                       [], IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
 
 let Predicates = [HasAVX] in {
   def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
@@ -2111,14 +2100,10 @@
 // YMM only
 def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst,
-                           (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
-                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+                         [], IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst,
-                          (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
-                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+                         [], IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
 def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
                 (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
 
Index: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -675,11 +675,10 @@
 ; X64-NEXT:    vcvttpd2dqy %ymm0, %xmm0
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
-  %cvt = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0)
+  %cvt = fptosi <4 x double> %a0 to <4 x i32>
   %res = bitcast <4 x i32> %cvt to <2 x i64>
   ret <2 x i64> %res
 }
-declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
 
 define <4 x i64> @test_mm256_cvttps_epi32(<8 x float> %a0) nounwind {
 ; X32-LABEL: test_mm256_cvttps_epi32:
@@ -691,11 +690,10 @@
 ; X64:       # BB#0:
 ; X64-NEXT:    vcvttps2dq %ymm0, %ymm0
 ; X64-NEXT:    retq
-  %cvt = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0)
+  %cvt = fptosi <8 x float> %a0 to <8 x i32>
   %res = bitcast <8 x i32> %cvt to <4 x i64>
   ret <4 x i64> %res
 }
-declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_mm256_div_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
 ; X32-LABEL: test_mm256_div_pd:
Index: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -357,12 +357,35 @@
 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
 
 
+define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
+; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retl
+  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
+; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    retl
+  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
+
+
 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
   ; add operation forces the execution domain.
 ; CHECK-LABEL: test_x86_sse2_storeu_dq:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    vpaddb LCPI32_0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb LCPI34_0, %xmm0, %xmm0
 ; CHECK-NEXT:    vmovdqu %xmm0, (%eax)
 ; CHECK-NEXT:    retl
   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
Index: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -3407,39 +3407,6 @@
 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
 
 
-define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
-; AVX-LABEL: test_x86_avx_cvtt_pd2dq_256:
-; AVX:       ## BB#0:
-; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
-; AVX-NEXT:    vzeroupper
-; AVX-NEXT:    retl
-;
-; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256:
-; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vcvttpd2dqy %ymm0, %xmm0
-; AVX512VL-NEXT:    retl
-  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
-; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256:
-; AVX:       ## BB#0:
-; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
-; AVX-NEXT:    retl
-;
-; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256:
-; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0
-; AVX512VL-NEXT:    retl
-  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
-  ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
-
-
 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; AVX-LABEL: test_x86_avx_dp_ps_256:
 ; AVX:       ## BB#0:
@@ -4133,7 +4100,7 @@
 ;
 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vpermilpd LCPI233_0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpermilpd LCPI231_0, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
   ret <4 x double> %res
@@ -4625,7 +4592,7 @@
 ; AVX-LABEL: movnt_dq:
 ; AVX:       ## BB#0:
 ; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT:    vpaddq LCPI260_0, %xmm0, %xmm0
+; AVX-NEXT:    vpaddq LCPI258_0, %xmm0, %xmm0
 ; AVX-NEXT:    vmovntdq %ymm0, (%eax)
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retl
@@ -4633,7 +4600,7 @@
 ; AVX512VL-LABEL: movnt_dq:
 ; AVX512VL:       ## BB#0:
 ; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT:    vpaddq LCPI260_0, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpaddq LCPI258_0, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovntdq %ymm0, (%eax)
 ; AVX512VL-NEXT:    retl
   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
Index: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -1280,11 +1280,10 @@
 ; X64:       # BB#0:
 ; X64-NEXT:    cvttps2dq %xmm0, %xmm0
 ; X64-NEXT:    retq
-  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
+  %res = fptosi <4 x float> %a0 to <4 x i32>
   %bc = bitcast <4 x i32> %res to <2 x i64>
   ret <2 x i64> %bc
 }
-declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
 
 define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
 ; X32-LABEL: test_mm_cvttsd_si32:
Index: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
@@ -84,6 +84,17 @@
 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
 
 
+define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_sse2_cvttps2dq:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    cvttps2dq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+
 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse2_storel_dq:
 ; CHECK:       ## BB#0:
@@ -101,7 +112,7 @@
 ; CHECK-LABEL: test_x86_sse2_storeu_dq:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    paddb LCPI7_0, %xmm0
+; CHECK-NEXT:    paddb LCPI8_0, %xmm0
 ; CHECK-NEXT:    movdqu %xmm0, (%eax)
 ; CHECK-NEXT:    retl
   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
Index: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -322,22 +322,6 @@
 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
 
 
-define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
-; SSE-LABEL: test_x86_sse2_cvttps2dq:
-; SSE:       ## BB#0:
-; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
-; SSE-NEXT:    retl
-;
-; KNL-LABEL: test_x86_sse2_cvttps2dq:
-; KNL:       ## BB#0:
-; KNL-NEXT:    vcvttps2dq %xmm0, %xmm0
-; KNL-NEXT:    retl
-  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
-
-
 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
 ; SSE-LABEL: test_x86_sse2_cvttsd2si:
 ; SSE:       ## BB#0: