Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -7690,12 +7690,16 @@ { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, + { X86::FsANDNPSrr, X86::FsANDNPDrr,X86::PANDNrr }, { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, + { X86::FsANDPSrr, X86::FsANDPDrr, X86::PANDrr }, { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, + { X86::FsORPSrr, X86::FsORPDrr, X86::PORrr }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, + { X86::FsXORPSrr, X86::FsXORPDrr, X86::PXORrr }, // AVX 128-bit support { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, @@ -7706,12 +7710,16 @@ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, + { X86::VFsANDNPSrr,X86::VFsANDNPDrr,X86::VPANDNrr }, { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, + { X86::VFsANDPSrr, X86::VFsANDPDrr, X86::VPANDrr }, { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, + { X86::VFsORPSrr, X86::VFsORPDrr, X86::VPORrr }, { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, + { X86::VFsXORPSrr, X86::VFsXORPDrr, X86::VPXORrr }, // AVX 256-bit support { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, Index: test/CodeGen/X86/fast-isel-select-sse.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-sse.ll +++ test/CodeGen/X86/fast-isel-select-sse.ll @@ -30,9 +30,9 @@ ; SSE-LABEL: select_fcmp_oeq_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpeqsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_oeq_f64: @@ -71,10 +71,10 @@ ; SSE-LABEL: select_fcmp_ogt_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpltsd %xmm0, %xmm1 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm2, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ogt_f64: @@ -113,10 +113,10 @@ ; SSE-LABEL: select_fcmp_oge_f64: ; SSE: # BB#0: ; SSE-NEXT: cmplesd %xmm0, %xmm1 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm2, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_oge_f64: @@ -154,9 +154,9 @@ ; SSE-LABEL: select_fcmp_olt_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpltsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_olt_f64: @@ -194,9 +194,9 @@ ; SSE-LABEL: select_fcmp_ole_f64: ; SSE: # BB#0: ; SSE-NEXT: cmplesd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ole_f64: @@ -234,9 +234,9 @@ ; SSE-LABEL: select_fcmp_ord_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpordsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ord_f64: @@ -274,9 +274,9 @@ ; SSE-LABEL: select_fcmp_uno_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpunordsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_uno_f64: @@ -314,9 +314,9 @@ ; SSE-LABEL: select_fcmp_ugt_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpnlesd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ugt_f64: @@ -354,9 +354,9 @@ ; SSE-LABEL: select_fcmp_uge_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpnltsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_uge_f64: @@ -395,10 +395,10 @@ ; SSE-LABEL: select_fcmp_ult_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpnlesd %xmm0, %xmm1 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm2, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ult_f64: @@ -437,10 +437,10 @@ ; SSE-LABEL: select_fcmp_ule_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpnltsd %xmm0, %xmm1 -; SSE-NEXT: andpd %xmm1, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm2, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm1 +; SSE-NEXT: orps %xmm2, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ule_f64: @@ -478,9 +478,9 @@ ; SSE-LABEL: select_fcmp_une_f64: ; SSE: # BB#0: ; SSE-NEXT: cmpneqsd %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: andnpd %xmm3, %xmm0 -; SSE-NEXT: orpd %xmm2, %xmm0 +; SSE-NEXT: andps %xmm0, %xmm2 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_une_f64: Index: test/CodeGen/X86/fp-logic-replace.ll =================================================================== --- test/CodeGen/X86/fp-logic-replace.ll +++ test/CodeGen/X86/fp-logic-replace.ll @@ -3,20 +3,20 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX ; Test that we can replace "scalar" FP-bitwise-logic with the optimal instruction. -; Scalar x86 FP-logic instructions only exist in your imagination and/or the bowels +; Scalar x86 FP-logic instructions only exist in your imagination and/or the bowels ; of compilers, but float and double variants of FP-logic instructions are reality -; and float may be a shorter instruction depending on which flavor of vector ISA -; you have...so just prefer float all the time, ok? Yay, x86! +; and float may be a shorter instruction depending on which flavor of vector ISA +; you have...so just prefer float all the time, ok? Yay, x86! define double @FsANDPSrr(double %x, double %y) { ; SSE-LABEL: FsANDPSrr: ; SSE: # BB#0: -; SSE-NEXT: andpd %xmm1, %xmm0 +; SSE-NEXT: andps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: FsANDPSrr: ; AVX: # BB#0: -; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; %bc1 = bitcast double %x to i64 @@ -56,12 +56,12 @@ define double @FsORPSrr(double %x, double %y) { ; SSE-LABEL: FsORPSrr: ; SSE: # BB#0: -; SSE-NEXT: orpd %xmm1, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: FsORPSrr: ; AVX: # BB#0: -; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; %bc1 = bitcast double %x to i64 @@ -74,12 +74,12 @@ define double @FsXORPSrr(double %x, double %y) { ; SSE-LABEL: FsXORPSrr: ; SSE: # BB#0: -; SSE-NEXT: xorpd %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: FsXORPSrr: ; AVX: # BB#0: -; AVX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; %bc1 = bitcast double %x to i64 Index: test/CodeGen/X86/fp-logic.ll =================================================================== --- test/CodeGen/X86/fp-logic.ll +++ test/CodeGen/X86/fp-logic.ll @@ -223,7 +223,7 @@ define double @doubles(double %x, double %y) { ; CHECK-LABEL: doubles: ; CHECK: # BB#0: -; CHECK-NEXT: andpd %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %bc1 = bitcast double %x to i64 Index: test/CodeGen/X86/fp-select-cmp-and.ll =================================================================== --- test/CodeGen/X86/fp-select-cmp-and.ll +++ test/CodeGen/X86/fp-select-cmp-and.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: test1: ; CHECK: # BB#0: ; CHECK-NEXT: cmpltsd %xmm2, %xmm0 -; CHECK-NEXT: andpd %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp olt double %a, %eps @@ -17,7 +17,7 @@ ; CHECK-LABEL: test2: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm2, %xmm0 -; CHECK-NEXT: andpd %xmm1, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp ole double %a, %eps @@ -29,8 +29,8 @@ ; CHECK-LABEL: test3: ; CHECK: # BB#0: ; CHECK-NEXT: cmpltsd %xmm0, %xmm2 -; CHECK-NEXT: andpd %xmm1, %xmm2 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp ogt double %a, %eps @@ -42,8 +42,8 @@ ; CHECK-LABEL: test4: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm0, %xmm2 -; CHECK-NEXT: andpd %xmm1, %xmm2 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp oge double %a, %eps @@ -55,7 +55,7 @@ ; CHECK-LABEL: test5: ; CHECK: # BB#0: ; CHECK-NEXT: cmpltsd %xmm2, %xmm0 -; CHECK-NEXT: andnpd %xmm1, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp olt double %a, %eps @@ -67,7 +67,7 @@ ; CHECK-LABEL: test6: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm2, %xmm0 -; CHECK-NEXT: andnpd %xmm1, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp ole double %a, %eps @@ -79,8 +79,8 @@ ; CHECK-LABEL: test7: ; CHECK: # BB#0: ; CHECK-NEXT: cmpltsd %xmm0, %xmm2 -; CHECK-NEXT: andnpd %xmm1, %xmm2 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp ogt double %a, %eps @@ -92,8 +92,8 @@ ; CHECK-LABEL: test8: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm0, %xmm2 -; CHECK-NEXT: andnpd %xmm1, %xmm2 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp oge double %a, %eps @@ -220,10 +220,10 @@ ; CHECK-LABEL: test18: ; CHECK: # BB#0: ; CHECK-NEXT: cmplesd %xmm0, %xmm3 -; CHECK-NEXT: andpd %xmm3, %xmm2 -; CHECK-NEXT: andnpd %xmm1, %xmm3 -; CHECK-NEXT: orpd %xmm2, %xmm3 -; CHECK-NEXT: movapd %xmm3, %xmm0 +; CHECK-NEXT: andps %xmm3, %xmm2 +; CHECK-NEXT: andnps %xmm1, %xmm3 +; CHECK-NEXT: orps %xmm2, %xmm3 +; CHECK-NEXT: movaps %xmm3, %xmm0 ; CHECK-NEXT: retq ; %cmp = fcmp oge double %a, %eps Index: test/CodeGen/X86/sse-minmax.ll =================================================================== --- test/CodeGen/X86/sse-minmax.ll +++ test/CodeGen/X86/sse-minmax.ll @@ -84,11 +84,11 @@ define double @oge(double %x, double %y) { ; STRICT-LABEL: oge: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm1, %xmm2 ; STRICT-NEXT: cmplesd %xmm0, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm0 -; STRICT-NEXT: andnpd %xmm1, %xmm2 -; STRICT-NEXT: orpd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm0 +; STRICT-NEXT: andnps %xmm1, %xmm2 +; STRICT-NEXT: orps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: oge: @@ -104,12 +104,12 @@ define double @ole(double %x, double %y) { ; STRICT-LABEL: ole: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmplesd %xmm1, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm0 -; STRICT-NEXT: andnpd %xmm1, %xmm2 -; STRICT-NEXT: orpd %xmm0, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm0 +; STRICT-NEXT: andnps %xmm1, %xmm2 +; STRICT-NEXT: orps %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ole: @@ -125,12 +125,12 @@ define double @oge_inverse(double %x, double %y) { ; STRICT-LABEL: oge_inverse: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm1, %xmm2 ; STRICT-NEXT: cmplesd %xmm0, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm2 -; STRICT-NEXT: orpd %xmm1, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm1 +; STRICT-NEXT: andnps %xmm0, %xmm2 +; STRICT-NEXT: orps %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: oge_inverse: @@ -152,12 +152,12 @@ define double @ole_inverse(double %x, double %y) { ; STRICT-LABEL: ole_inverse: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmplesd %xmm1, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm2 -; STRICT-NEXT: orpd %xmm1, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm1 +; STRICT-NEXT: andnps %xmm0, %xmm2 +; STRICT-NEXT: orps %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ole_inverse: @@ -257,7 +257,7 @@ ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: cmplesd %xmm0, %xmm1 -; STRICT-NEXT: andpd %xmm1, %xmm0 +; STRICT-NEXT: andps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: oge_x: @@ -275,10 +275,10 @@ ; STRICT-LABEL: ole_x: ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm2, %xmm2 -; STRICT-NEXT: movapd %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm0, %xmm1 ; STRICT-NEXT: cmplesd %xmm2, %xmm1 -; STRICT-NEXT: andpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ole_x: @@ -297,8 +297,8 @@ ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: cmplesd %xmm0, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andnps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: oge_inverse_x: @@ -323,10 +323,10 @@ ; STRICT-LABEL: ole_inverse_x: ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm2, %xmm2 -; STRICT-NEXT: movapd %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm0, %xmm1 ; STRICT-NEXT: cmplesd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andnps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ole_inverse_x: @@ -350,12 +350,12 @@ define double @ugt(double %x, double %y) { ; STRICT-LABEL: ugt: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmpnlesd %xmm1, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm0 -; STRICT-NEXT: andnpd %xmm1, %xmm2 -; STRICT-NEXT: orpd %xmm0, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm0 +; STRICT-NEXT: andnps %xmm1, %xmm2 +; STRICT-NEXT: orps %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ugt: @@ -371,11 +371,11 @@ define double @ult(double %x, double %y) { ; STRICT-LABEL: ult: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm1, %xmm2 ; STRICT-NEXT: cmpnlesd %xmm0, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm0 -; STRICT-NEXT: andnpd %xmm1, %xmm2 -; STRICT-NEXT: orpd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm0 +; STRICT-NEXT: andnps %xmm1, %xmm2 +; STRICT-NEXT: orps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ult: @@ -391,12 +391,12 @@ define double @ugt_inverse(double %x, double %y) { ; STRICT-LABEL: ugt_inverse: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm0, %xmm2 +; STRICT-NEXT: movaps %xmm0, %xmm2 ; STRICT-NEXT: cmpnlesd %xmm1, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm2 -; STRICT-NEXT: orpd %xmm1, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm1 +; STRICT-NEXT: andnps %xmm0, %xmm2 +; STRICT-NEXT: orps %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ugt_inverse: @@ -418,12 +418,12 @@ define double @ult_inverse(double %x, double %y) { ; STRICT-LABEL: ult_inverse: ; STRICT: # BB#0: -; STRICT-NEXT: movapd %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm1, %xmm2 ; STRICT-NEXT: cmpnlesd %xmm0, %xmm2 -; STRICT-NEXT: andpd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm2 -; STRICT-NEXT: orpd %xmm1, %xmm2 -; STRICT-NEXT: movapd %xmm2, %xmm0 +; STRICT-NEXT: andps %xmm2, %xmm1 +; STRICT-NEXT: andnps %xmm0, %xmm2 +; STRICT-NEXT: orps %xmm1, %xmm2 +; STRICT-NEXT: movaps %xmm2, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ult_inverse: @@ -524,10 +524,10 @@ ; STRICT-LABEL: ugt_x: ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm2, %xmm2 -; STRICT-NEXT: movapd %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm0, %xmm1 ; STRICT-NEXT: cmpnlesd %xmm2, %xmm1 -; STRICT-NEXT: andpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ugt_x: @@ -546,7 +546,7 @@ ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: cmpnlesd %xmm0, %xmm1 -; STRICT-NEXT: andpd %xmm1, %xmm0 +; STRICT-NEXT: andps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; RELAX-LABEL: ult_x: @@ -564,10 +564,10 @@ ; STRICT-LABEL: ugt_inverse_x: ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm2, %xmm2 -; STRICT-NEXT: movapd %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm0, %xmm1 ; STRICT-NEXT: cmpnlesd %xmm2, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andnps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ugt_inverse_x: @@ -593,8 +593,8 @@ ; STRICT: # BB#0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: cmpnlesd %xmm0, %xmm1 -; STRICT-NEXT: andnpd %xmm0, %xmm1 -; STRICT-NEXT: movapd %xmm1, %xmm0 +; STRICT-NEXT: andnps %xmm0, %xmm1 +; STRICT-NEXT: movaps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: ult_inverse_x: