Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1144,6 +1144,18 @@ setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal); setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); + + if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal); + setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal); + + setOperationAction(ISD::FMINNAN, MVT::v4f16, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v4f16, Legal); + setOperationAction(ISD::FMINNAN, MVT::v8f16, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v8f16, Legal); + } } // We have target-specific dag combine patterns for the following nodes: Index: test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll =================================================================== --- test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -782,43 +782,59 @@ ret <8 x half> %vmaxq_v2.i } -; FIXME (PR38404) -; -;define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) { -;entry: -; %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b) -; ret <4 x half> %vmaxnm_v2.i -;} +define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: test_vmaxnm_f16: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vmaxnm_v2.i +} -;define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) { -;entry: -; %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) -; ret <8 x half> %vmaxnmq_v2.i -;} +define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vmaxnmq_f16: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vmaxnmq_v2.i +} -;define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) { -;entry: -; %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b) -; ret <4 x half> %vmin_v2.i -;} +define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: test_vmin_f16: +; CHECK: vmin.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vmin_v2.i +} -;define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) { -;entry: -; %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b) -; ret <8 x half> %vminq_v2.i -;} +define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vminq_f16: +; CHECK: vmin.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vminq_v2.i +} -;define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) { -;entry: -; %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b) -; ret <4 x half> %vminnm_v2.i -;} +define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: test_vminnm_f16: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vminnm_v2.i +} -;define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) { -;entry: -; %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b) -; ret <8 x half> %vminnmq_v2.i -;} +define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vminnmq_f16: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vminnmq_v2.i +} define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) { ; CHECKLABEL: test_vmul_f16: Index: test/CodeGen/ARM/fp16-vminmaxnm-vector.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fp16-vminmaxnm-vector.ll @@ -0,0 +1,302 @@ +; RUN: llc < %s -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7a -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard | FileCheck %s + +; 4-element vector + +; Ordered + +define <4 x half> @test1(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test1: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ogt <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B + ret <4 x half> %tmp4 +} + +define <4 x half> @test2(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test2: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ogt <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A + ret <4 x half> %tmp4 +} + +define <4 x half> @test3(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test3: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast oge <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A + ret <4 x half> %tmp4 +} + +define <4 x half> @test4(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test4: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast oge <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B + ret <4 x half> %tmp4 +} + +define <4 x half> @test5(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test5: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast olt <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B + ret <4 x half> %tmp4 +} + +define <4 x half> @test6(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test6: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast olt <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A + ret <4 x half> %tmp4 +} + +define <4 x half> @test7(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test7: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ole <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B + ret <4 x half> %tmp4 +} + +define <4 x half> @test8(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test8: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ole <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A + ret <4 x half> %tmp4 +} + +; Unordered + +define <4 x half> @test11(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test11: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ugt <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B + ret <4 x half> %tmp4 +} + +define <4 x half> @test12(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test12: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ugt <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A + ret <4 x half> %tmp4 +} + +define <4 x half> @test13(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test13: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast uge <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A + ret <4 x half> %tmp4 +} + +define <4 x half> @test14(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test14: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast uge <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B + ret <4 x half> %tmp4 +} + +define <4 x half> @test15(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test15: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ult <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B + ret <4 x half> %tmp4 +} + +define <4 x half> @test16(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test16: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ult <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A + ret <4 x half> %tmp4 +} + +define <4 x half> @test17(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test17: +; CHECK: vminnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ule <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B + ret <4 x half> %tmp4 +} + +define <4 x half> @test18(<4 x half> %A, <4 x half> %B) { +; CHECK-LABEL: test18: +; CHECK: vmaxnm.f16 d0, d0, d1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ule <4 x half> %A, %B + %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A + ret <4 x half> %tmp4 +} + +; 8-element vector + +; Ordered + +define <8 x half> @test201(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test201: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ogt <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B + ret <8 x half> %tmp4 +} + +define <8 x half> @test202(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test202: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ogt <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A + ret <8 x half> %tmp4 +} + +define <8 x half> @test203(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test203: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast oge <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B + ret <8 x half> %tmp4 +} + +define <8 x half> @test204(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test204: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast oge <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A + ret <8 x half> %tmp4 +} + +define <8 x half> @test205(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test205: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast olt <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B + ret <8 x half> %tmp4 +} + +define <8 x half> @test206(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test206: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast olt <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A + ret <8 x half> %tmp4 +} + +define <8 x half> @test207(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test207: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ole <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B + ret <8 x half> %tmp4 +} + +define <8 x half> @test208(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test208: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ole <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A + ret <8 x half> %tmp4 +} + +; Unordered + +define <8 x half> @test209(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test209: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ugt <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B + ret <8 x half> %tmp4 +} + +define <8 x half> @test210(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test210: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ugt <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A + ret <8 x half> %tmp4 +} + +define <8 x half> @test211(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test211: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast uge <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B + ret <8 x half> %tmp4 +} + +define <8 x half> @test214(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test214: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast uge <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A + ret <8 x half> %tmp4 +} + +define <8 x half> @test215(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test215: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ult <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B + ret <8 x half> %tmp4 +} + +define <8 x half> @test216(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test216: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ult <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A + ret <8 x half> %tmp4 +} + +define <8 x half> @test217(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test217: +; CHECK: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ule <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %A, <8 x half> %B + ret <8 x half> %tmp4 +} + +define <8 x half> @test218(<8 x half> %A, <8 x half> %B) { +; CHECK-LABEL: test218: +; CHECK: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp3 = fcmp fast ule <8 x half> %A, %B + %tmp4 = select <8 x i1> %tmp3, <8 x half> %B, <8 x half> %A + ret <8 x half> %tmp4 +} Index: test/CodeGen/ARM/vfcmp.ll =================================================================== --- test/CodeGen/ARM/vfcmp.ll +++ test/CodeGen/ARM/vfcmp.ll @@ -7,33 +7,33 @@ ;CHECK-LABEL: vcunef32: ;CHECK: vceq.f32 ;CHECK-NEXT: vmvn - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp une <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp une <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; olt is implemented with VCGT define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: vcoltf32: ;CHECK: vcgt.f32 - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ole is implemented with VCGE define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: vcolef32: ;CHECK: vcge.f32 - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; uge is implemented with VCGT/VMVN @@ -41,11 +41,11 @@ ;CHECK-LABEL: vcugef32: ;CHECK: vcgt.f32 ;CHECK-NEXT: vmvn - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ule is implemented with VCGT/VMVN @@ -53,11 +53,11 @@ ;CHECK-LABEL: vculef32: ;CHECK: vcgt.f32 ;CHECK-NEXT: vmvn - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ugt is implemented with VCGE/VMVN @@ -65,11 +65,11 @@ ;CHECK-LABEL: vcugtf32: ;CHECK: vcge.f32 ;CHECK-NEXT: vmvn - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ult is implemented with VCGE/VMVN @@ -77,11 +77,11 @@ ;CHECK-LABEL: vcultf32: ;CHECK: vcge.f32 ;CHECK-NEXT: vmvn - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ueq is implemented with VCGT/VCGT/VORR/VMVN @@ -91,11 +91,11 @@ ;CHECK-NEXT: vcgt.f32 ;CHECK-NEXT: vorr ;CHECK-NEXT: vmvn - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; one is implemented with VCGT/VCGT/VORR @@ -104,11 +104,11 @@ ;CHECK: vcgt.f32 ;CHECK-NEXT: vcgt.f32 ;CHECK-NEXT: vorr - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp one <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp one <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; uno is implemented with VCGT/VCGE/VORR/VMVN @@ -118,11 +118,11 @@ ;CHECK-NEXT: vcgt.f32 ;CHECK-NEXT: vorr ;CHECK-NEXT: vmvn - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ord is implemented with VCGT/VCGE/VORR @@ -131,9 +131,9 @@ ;CHECK: vcge.f32 ;CHECK-NEXT: vcgt.f32 ;CHECK-NEXT: vorr - %tmp1 = load <2 x float>, <2 x float>* %A - %tmp2 = load <2 x float>, <2 x float>* %B - %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2 - %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> - ret <2 x i32> %tmp4 + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 }