diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19158,8 +19158,18 @@ if (Subtarget.hasAVX() && VecIntVT == MVT::v4i32 && Op->getSimpleValueType(0) == MVT::v4f64) { SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i64, V); - SDValue VBias = - DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v4f64); + Constant *Bias = ConstantFP::get( + *DAG.getContext(), + APFloat(APFloat::IEEEdouble(), APInt(64, 0x4330000000000000ULL))); + auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, /*Alignment*/ 8); + SDVTList Tys = DAG.getVTList(MVT::v4f64, MVT::Other); + SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; + SDValue VBias = DAG.getMemIntrinsicNode( + X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + /*Alignment*/ 8, MachineMemOperand::MOLoad); + SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn, DAG.getBitcast(MVT::v4i64, VBias)); Or = DAG.getBitcast(MVT::v4f64, Or); diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll @@ -588,7 +588,7 @@ ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] ; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: ret{{[l|q]}} diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -722,12 +722,22 @@ ; SSE41-NEXT: subpd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_4i32_to_2f64: -; VEX: # %bb.0: -; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; VEX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; VEX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_4i32_to_2f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: # xmm1 = mem[0,0] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_4i32_to_2f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_4i32_to_2f64: ; AVX512F: # %bb.0: @@ -1016,7 +1026,7 @@ ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] ; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -3456,15 +3466,18 @@ ; AVX1-LABEL: uitofp_load_4i32_to_2f64_2: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: # xmm1 = mem[0,0] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: uitofp_load_4i32_to_2f64_2: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -3521,15 +3534,18 @@ ; AVX1-LABEL: uitofp_volatile_load_4i32_to_2f64_2: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: # xmm1 = mem[0,0] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: uitofp_volatile_load_4i32_to_2f64_2: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -3778,7 +3794,7 @@ ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] ; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -7272,7 +7272,7 @@ ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] ; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq