diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46813,16 +46813,19 @@ return DAG.getBitcast(VT, FPLogic); } - // The vector ISA for FP predicates is incomplete before AVX, so converting - // COMIS* to CMPS* may not be a win before AVX. - // TODO: Check types/predicates to see if they are available with SSE/SSE2. - if (!Subtarget.hasAVX() || VT != MVT::i1 || N0.getOpcode() != ISD::SETCC || - !N0.hasOneUse() || !N1.hasOneUse()) + if (VT != MVT::i1 || N0.getOpcode() != ISD::SETCC || !N0.hasOneUse() || + !N1.hasOneUse()) return SDValue(); ISD::CondCode CC0 = cast(N0.getOperand(2))->get(); ISD::CondCode CC1 = cast(N1.getOperand(2))->get(); + // The vector ISA for FP predicates is incomplete before AVX, so converting + // COMIS* to CMPS* may not be a win before AVX. + if (!Subtarget.hasAVX() && + !(cheapX86FSETCC_SSE(CC0) && cheapX86FSETCC_SSE(CC1))) + return SDValue(); + // Convert scalar FP compares and logic to vector compares (COMIS* to CMPS*) // and vector logic: // logic (setcc N00, N01), (setcc N10, N11) --> diff --git a/llvm/test/CodeGen/X86/fcmp-logic.ll b/llvm/test/CodeGen/X86/fcmp-logic.ll --- a/llvm/test/CodeGen/X86/fcmp-logic.ll +++ b/llvm/test/CodeGen/X86/fcmp-logic.ll @@ -6,11 +6,11 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) { ; SSE2-LABEL: olt_ole_and_f32: ; SSE2: # %bb.0: -; SSE2-NEXT: ucomiss %xmm0, %xmm1 -; SSE2-NEXT: seta %cl -; SSE2-NEXT: ucomiss %xmm2, %xmm3 -; SSE2-NEXT: setae %al -; SSE2-NEXT: andb %cl, %al +; SSE2-NEXT: cmpleps %xmm3, %xmm2 +; SSE2-NEXT: cmpltps %xmm1, %xmm0 +; SSE2-NEXT: andps %xmm2, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; ; AVX1-LABEL: olt_ole_and_f32: @@ -43,13 +43,11 @@ define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) { ; SSE2-LABEL: oge_oeq_or_f32: ; SSE2: # %bb.0: -; SSE2-NEXT: ucomiss %xmm1, %xmm0 -; SSE2-NEXT: setae %cl -; SSE2-NEXT: ucomiss %xmm3, %xmm2 -; SSE2-NEXT: setnp %dl -; SSE2-NEXT: sete %al -; SSE2-NEXT: andb %dl, %al -; SSE2-NEXT: orb %cl, %al +; SSE2-NEXT: cmpeqps %xmm3, %xmm2 +; SSE2-NEXT: cmpleps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm2, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; ; AVX1-LABEL: oge_oeq_or_f32: @@ -121,13 +119,11 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) { ; SSE2-LABEL: une_ugt_and_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: ucomisd %xmm1, %xmm0 -; SSE2-NEXT: setp %al -; SSE2-NEXT: setne %cl -; SSE2-NEXT: orb %al, %cl -; SSE2-NEXT: ucomisd %xmm2, %xmm3 -; SSE2-NEXT: setb %al -; SSE2-NEXT: andb %cl, %al +; SSE2-NEXT: cmpnlepd %xmm3, %xmm2 +; SSE2-NEXT: cmpneqpd %xmm1, %xmm0 +; SSE2-NEXT: andpd %xmm2, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; ; AVX1-LABEL: une_ugt_and_f64: @@ -160,11 +156,11 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) { ; SSE2-LABEL: ult_uge_or_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: ucomisd %xmm1, %xmm0 -; SSE2-NEXT: setb %cl -; SSE2-NEXT: ucomisd %xmm2, %xmm3 -; SSE2-NEXT: setbe %al -; SSE2-NEXT: orb %cl, %al +; SSE2-NEXT: cmpnltpd %xmm3, %xmm2 +; SSE2-NEXT: cmpnlepd %xmm0, %xmm1 +; SSE2-NEXT: orpd %xmm2, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; ; AVX1-LABEL: ult_uge_or_f64: @@ -198,13 +194,11 @@ define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) { ; SSE2-LABEL: une_uno_xor_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: ucomisd %xmm1, %xmm0 -; SSE2-NEXT: setp %al -; SSE2-NEXT: setne %cl -; SSE2-NEXT: orb %al, %cl -; SSE2-NEXT: ucomisd %xmm3, %xmm2 -; SSE2-NEXT: setp %al -; SSE2-NEXT: xorb %cl, %al +; SSE2-NEXT: cmpunordpd %xmm3, %xmm2 +; SSE2-NEXT: cmpneqpd %xmm1, %xmm0 +; SSE2-NEXT: xorpd %xmm2, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; ; AVX1-LABEL: une_uno_xor_f64: @@ -337,11 +331,11 @@ ; SSE2-LABEL: f32cmp3: ; SSE2: # %bb.0: ; SSE2-NEXT: xorps %xmm4, %xmm4 -; SSE2-NEXT: ucomiss %xmm4, %xmm0 -; SSE2-NEXT: seta %al -; SSE2-NEXT: ucomiss %xmm4, %xmm1 -; SSE2-NEXT: seta %cl -; SSE2-NEXT: orb %al, %cl +; SSE2-NEXT: xorps %xmm5, %xmm5 +; SSE2-NEXT: cmpltps %xmm1, %xmm5 +; SSE2-NEXT: cmpltps %xmm0, %xmm4 +; SSE2-NEXT: orps %xmm5, %xmm4 +; SSE2-NEXT: movd %xmm4, %ecx ; SSE2-NEXT: ucomiss %xmm2, %xmm3 ; SSE2-NEXT: seta %al ; SSE2-NEXT: xorb %cl, %al diff --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll --- a/llvm/test/CodeGen/X86/pr40539.ll +++ b/llvm/test/CodeGen/X86/pr40539.ll @@ -40,20 +40,22 @@ ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; CHECK-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: #APP ; CHECK-NEXT: fcos ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: setae %cl -; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; CHECK-NEXT: setae %al -; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: cmpleps %xmm1, %xmm0 +; CHECK-NEXT: cmpleps %xmm2, %xmm1 +; CHECK-NEXT: andps %xmm0, %xmm1 +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl