Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -514,6 +514,12 @@ SDLoc loc(Op); SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); + + // Add an assert to inform that optimizers about the orginal bit width. + promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext + : ISD::AssertSext, + SDLoc(Op), NewVT, promoted, + DAG.getValueType(VT.getScalarType())); return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); } Index: test/CodeGen/X86/avx-cvt-2.ll =================================================================== --- test/CodeGen/X86/avx-cvt-2.ll +++ test/CodeGen/X86/avx-cvt-2.ll @@ -12,10 +12,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -29,10 +26,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -46,10 +40,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovq %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper @@ -64,10 +55,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovq %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper Index: test/CodeGen/X86/vec_cast2.ll =================================================================== --- test/CodeGen/X86/vec_cast2.ll +++ test/CodeGen/X86/vec_cast2.ll @@ -88,10 +88,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl ;