diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -917,6 +917,13 @@ return false; } + if (Op.getOpcode() == ISD::ConstantFP) { + // We know all of the bits for a floating point constant! + Known.One = cast(Op)->getValueAPF().bitcastToAPInt(); + Known.Zero = ~Known.One; + return false; + } + // Other users may use these bits. EVT VT = Op.getValueType(); if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { @@ -2254,9 +2261,13 @@ if (C->isOpaque()) return false; } - // TODO: Handle float bits as well. if (VT.isInteger()) return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + if (VT.isFloatingPoint()) + return TLO.CombineTo( + Op, + TLO.DAG.getConstantFP( + APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT)); } return false; diff --git a/llvm/test/CodeGen/ARM/fcopysign.ll b/llvm/test/CodeGen/ARM/fcopysign.ll --- a/llvm/test/CodeGen/ARM/fcopysign.ll +++ b/llvm/test/CodeGen/ARM/fcopysign.ll @@ -95,8 +95,9 @@ ; HARD-NEXT: vcvt.f32.f64 s0, d16 ; HARD-NEXT: vmov.i32 d17, #0x80000000 ; HARD-NEXT: vshr.u64 d16, d16, #32 -; HARD-NEXT: vmov.f32 s2, #5.000000e-01 -; HARD-NEXT: vbit d1, d16, d17 +; HARD-NEXT: vmov.i32 d18, #0x3f000000 +; HARD-NEXT: vorr d1, d17, d17 +; HARD-NEXT: vbsl d1, d16, d18 ; HARD-NEXT: vadd.f32 s0, s0, s2 ; HARD-NEXT: pop {r11, pc} entry: diff --git a/llvm/test/CodeGen/X86/combine-bextr.ll b/llvm/test/CodeGen/X86/combine-bextr.ll --- a/llvm/test/CodeGen/X86/combine-bextr.ll +++ b/llvm/test/CodeGen/X86/combine-bextr.ll @@ -39,12 +39,10 @@ ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: movl $3855, %eax # imm = 0xF0F ; X32-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: movd %eax, %xmm1 -; X32-NEXT: por %xmm0, %xmm1 -; X32-NEXT: subsd %xmm0, %xmm1 -; X32-NEXT: xorps %xmm0, %xmm0 -; X32-NEXT: cvtsd2ss %xmm1, %xmm0 +; X32-NEXT: movd %eax, %xmm0 +; X32-NEXT: por {{\.LCPI.*}}, %xmm0 +; X32-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; X32-NEXT: cvtsd2ss %xmm0, %xmm0 ; X32-NEXT: movss %xmm0, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax diff --git a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll --- a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll +++ b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll @@ -25,8 +25,7 @@ define double @mag_neg0_double(double %x) nounwind { ; CHECK-LABEL: mag_neg0_double: ; CHECK: ## %bb.0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = call double @copysign(double -0.0, double %x) ret double %y @@ -42,8 +41,7 @@ ; CHECK-LABEL: mag_pos1_double: ; CHECK: ## %bb.0: ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = call double @copysign(double 1.0, double %x) ret double %y @@ -87,8 +85,7 @@ define float @mag_neg0_float(float %x) nounwind { ; CHECK-LABEL: mag_neg0_float: ; CHECK: ## %bb.0: -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = call float @copysignf(float -0.0, float %x) ret float %y @@ -106,8 +103,7 @@ ; CHECK-LABEL: mag_pos1_float: ; CHECK: ## %bb.0: ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %y = call float @copysignf(float 1.0, float %x) ret float %y diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2438,11 +2438,10 @@ ; X86-SSE: # %bb.0: # %entry ; X86-SSE-NEXT: subl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 16 -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: orpd %xmm0, %xmm1 -; X86-SSE-NEXT: subsd %xmm0, %xmm1 -; X86-SSE-NEXT: movsd %xmm1, (%esp) +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) ; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp @@ -2644,12 +2643,10 @@ ; X86-SSE: # %bb.0: # %entry ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: orpd %xmm0, %xmm1 -; X86-SSE-NEXT: subsd %xmm0, %xmm1 -; X86-SSE-NEXT: xorps %xmm0, %xmm0 -; X86-SSE-NEXT: cvtsd2ss %xmm1, %xmm0 +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) ; X86-SSE-NEXT: wait diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll --- a/llvm/test/CodeGen/X86/fp-round.ll +++ b/llvm/test/CodeGen/X86/fp-round.ll @@ -13,11 +13,10 @@ ; SSE41: ## %bb.0: ; SSE41-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; SSE41-NEXT: andps %xmm0, %xmm1 -; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE41-NEXT: orps %xmm1, %xmm2 -; SSE41-NEXT: addss %xmm0, %xmm2 +; SSE41-NEXT: orps {{.*}}(%rip), %xmm1 +; SSE41-NEXT: addss %xmm0, %xmm1 ; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: roundss $11, %xmm2, %xmm0 +; SSE41-NEXT: roundss $11, %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: round_f32: @@ -51,11 +50,10 @@ ; SSE41: ## %bb.0: ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] ; SSE41-NEXT: andpd %xmm0, %xmm1 -; SSE41-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE41-NEXT: orpd %xmm1, %xmm2 -; SSE41-NEXT: addsd %xmm0, %xmm2 +; SSE41-NEXT: orpd {{.*}}(%rip), %xmm1 +; SSE41-NEXT: addsd %xmm0, %xmm1 ; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: roundsd $11, %xmm2, %xmm0 +; SSE41-NEXT: roundsd $11, %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: round_f64: diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll --- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll @@ -487,12 +487,10 @@ ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-X86-NEXT: orpd %xmm0, %xmm1 -; SSE-X86-NEXT: subsd %xmm0, %xmm1 -; SSE-X86-NEXT: xorps %xmm0, %xmm0 -; SSE-X86-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: cvtsd2ss %xmm0, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) ; SSE-X86-NEXT: wait @@ -510,10 +508,9 @@ ; AVX1-X86: # %bb.0: ; AVX1-X86-NEXT: pushl %eax ; AVX1-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-X86-NEXT: vorpd %xmm0, %xmm1, %xmm1 -; AVX1-X86-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-X86-NEXT: vorpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-X86-NEXT: vsubsd {{\.LCPI.*}}, %xmm0, %xmm0 ; AVX1-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 ; AVX1-X86-NEXT: vmovss %xmm0, (%esp) ; AVX1-X86-NEXT: flds (%esp) @@ -1166,11 +1163,10 @@ ; SSE-X86-NEXT: .cfi_def_cfa_register %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-X86-NEXT: orpd %xmm0, %xmm1 -; SSE-X86-NEXT: subsd %xmm0, %xmm1 -; SSE-X86-NEXT: movsd %xmm1, (%esp) +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp @@ -1193,10 +1189,9 @@ ; AVX1-X86-NEXT: .cfi_def_cfa_register %ebp ; AVX1-X86-NEXT: andl $-8, %esp ; AVX1-X86-NEXT: subl $8, %esp -; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-X86-NEXT: vorpd %xmm0, %xmm1, %xmm1 -; AVX1-X86-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-X86-NEXT: vorpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-X86-NEXT: vsubsd {{\.LCPI.*}}, %xmm0, %xmm0 ; AVX1-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX1-X86-NEXT: fldl (%esp) ; AVX1-X86-NEXT: wait diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -1260,8 +1260,7 @@ ; X64-SSE-NEXT: pushq %rax ; X64-SSE-NEXT: callq __trunctfdf2 ; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0 -; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X64-SSE-NEXT: orps %xmm1, %xmm0 +; X64-SSE-NEXT: orps {{.*}}(%rip), %xmm0 ; X64-SSE-NEXT: callq __extenddftf2 ; X64-SSE-NEXT: addq $8, %rsp ; X64-SSE-NEXT: .LBB26_2: # %cleanup diff --git a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll --- a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll +++ b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll @@ -32,12 +32,10 @@ ; SSE2_32-LABEL: u32_to_f: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %eax -; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2_32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2_32-NEXT: orpd %xmm0, %xmm1 -; SSE2_32-NEXT: subsd %xmm0, %xmm1 -; SSE2_32-NEXT: xorps %xmm0, %xmm0 -; SSE2_32-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE2_32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2_32-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: cvtsd2ss %xmm0, %xmm0 ; SSE2_32-NEXT: movss %xmm0, (%esp) ; SSE2_32-NEXT: flds (%esp) ; SSE2_32-NEXT: popl %eax @@ -148,11 +146,10 @@ ; SSE2_32-NEXT: movl %esp, %ebp ; SSE2_32-NEXT: andl $-8, %esp ; SSE2_32-NEXT: subl $8, %esp -; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2_32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2_32-NEXT: orpd %xmm0, %xmm1 -; SSE2_32-NEXT: subsd %xmm0, %xmm1 -; SSE2_32-NEXT: movsd %xmm1, (%esp) +; SSE2_32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2_32-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: movsd %xmm0, (%esp) ; SSE2_32-NEXT: fldl (%esp) ; SSE2_32-NEXT: movl %ebp, %esp ; SSE2_32-NEXT: popl %ebp diff --git a/llvm/test/CodeGen/X86/uint_to_fp-2.ll b/llvm/test/CodeGen/X86/uint_to_fp-2.ll --- a/llvm/test/CodeGen/X86/uint_to_fp-2.ll +++ b/llvm/test/CodeGen/X86/uint_to_fp-2.ll @@ -6,12 +6,10 @@ ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushl %eax -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: orpd %xmm0, %xmm1 -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: orpd {{\.LCPI.*}}, %xmm0 +; CHECK-NEXT: subsd {{\.LCPI.*}}, %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 ; CHECK-NEXT: movss %xmm0, (%esp) ; CHECK-NEXT: flds (%esp) ; CHECK-NEXT: popl %eax @@ -28,9 +26,8 @@ ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: orps %xmm0, %xmm1 -; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: orps {{\.LCPI.*}}, %xmm1 +; CHECK-NEXT: subsd {{\.LCPI.*}}, %xmm1 ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0 ; CHECK-NEXT: movss %xmm0, (%esp) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3061,42 +3061,16 @@ } define void @PR43024() { -; SSE2-LABEL: PR43024: -; SSE2: # %bb.0: -; SSE2-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSE2-NEXT: movaps %xmm0, (%rax) -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: movss %xmm1, (%rax) -; SSE2-NEXT: retq -; -; SSSE3-LABEL: PR43024: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSSE3-NEXT: movaps %xmm0, (%rax) -; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: xorps %xmm0, %xmm0 -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: movss %xmm1, (%rax) -; SSSE3-NEXT: retq -; -; SSE41-LABEL: PR43024: -; SSE41: # %bb.0: -; SSE41-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSE41-NEXT: movaps %xmm0, (%rax) -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: movss %xmm1, (%rax) -; SSE41-NEXT: retq +; SSE-LABEL: PR43024: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] +; SSE-NEXT: movaps %xmm0, (%rax) +; SSE-NEXT: addss {{.*}}(%rip), %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: movss %xmm0, (%rax) +; SSE-NEXT: retq ; ; AVX-LABEL: PR43024: ; AVX: # %bb.0: