diff --git a/llvm/test/CodeGen/PowerPC/change-no-infs.ll b/llvm/test/CodeGen/PowerPC/change-no-infs.ll --- a/llvm/test/CodeGen/PowerPC/change-no-infs.ll +++ b/llvm/test/CodeGen/PowerPC/change-no-infs.ll @@ -3,11 +3,7 @@ ; next one. ; RUN: llc < %s -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 -mattr=-vsx \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=SAFE - -; RUN: llc < %s -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 -mattr=-vsx \ -; RUN: -enable-no-infs-fp-math -enable-no-nans-fp-math \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=UNSAFE +; RUN: | FileCheck %s --check-prefix=CHECK ; The fcmp+select in these functions should be converted to a fsel instruction ; when both NoInfsFPMath and NoNaNsInFPMath are enabled. @@ -15,53 +11,85 @@ ; CHECK-LABEL: default0: define double @default0(double %a, double %y, double %z) { entry: -; SAFE-NOT: fsel -; UNSAFE: fsel +; CHECK-NOT: fsel %cmp = fcmp ult double %a, 0.000000e+00 %z.y = select i1 %cmp, double %z, double %y ret double %z.y } +; CHECK-LABEL: default0_unsafe: +define double @default0_unsafe(double %a, double %y, double %z) { +entry: +; CHECK: fsel + %cmp = fcmp nnan ninf ult double %a, 0.000000e+00 + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y +} + ; CHECK-LABEL: unsafe_math_off: -define double @unsafe_math_off(double %a, double %y, double %z) #0 #2 { +define double @unsafe_math_off(double %a, double %y, double %z) { entry: -; SAFE-NOT: fsel -; UNSAFE-NOT: fsel +; CHECK-NOT: fsel %cmp = fcmp ult double %a, 0.000000e+00 %z.y = select i1 %cmp, double %z, double %y ret double %z.y } +; CHECK-LABEL: unsafe_math_off_unsafe: +define double @unsafe_math_off_unsafe(double %a, double %y, double %z) { +entry: +; CHECK-NOT: fsel + %cmp = fcmp nnan ult double %a, 0.000000e+00 + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y +} + ; CHECK-LABEL: default1: define double @default1(double %a, double %y, double %z) { -; SAFE-NOT: fsel -; UNSAFE: fsel +; CHECK-NOT: fsel %cmp = fcmp ult double %a, 0.000000e+00 %z.y = select i1 %cmp, double %z, double %y ret double %z.y } +; CHECK-LABEL: default1_unsafe: +define double @default1_unsafe(double %a, double %y, double %z) { +; CHECK: fsel + %cmp = fcmp nnan ninf ult double %a, 0.000000e+00 + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y +} + ; CHECK-LABEL: unsafe_math_on: -define double @unsafe_math_on(double %a, double %y, double %z) #1 #3 { +define double @unsafe_math_on(double %a, double %y, double %z) { entry: -; SAFE-NOT: fsel -; UNSAFE-NOT: fsel +; CHECK-NOT: fsel %cmp = fcmp ult double %a, 0.000000e+00 %z.y = select i1 %cmp, double %z, double %y ret double %z.y } +; CHECK-LABEL: unsafe_math_on_unsafe: +define double @unsafe_math_on_unsafe(double %a, double %y, double %z) { +entry: +; CHECK-NOT: fsel + %cmp = fcmp ninf ult double %a, 0.000000e+00 + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y +} + ; CHECK-LABEL: default2: define double @default2(double %a, double %y, double %z) { -; SAFE-NOT: fsel -; UNSAFE: fsel +; CHECK-NOT: fsel %cmp = fcmp ult double %a, 0.000000e+00 %z.y = select i1 %cmp, double %z, double %y ret double %z.y } -attributes #0 = { "no-infs-fp-math"="false" } -attributes #1 = { "no-nans-fp-math"="false" } - -attributes #2 = { "no-infs-fp-math"="false" } -attributes #3 = { "no-infs-fp-math"="true" } +; CHECK-LABEL: default2_unsafe: +define double @default2_unsafe(double %a, double %y, double %z) { +; CHECK: fsel + %cmp = fcmp nnan ninf ult double %a, 0.000000e+00 + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y +} diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll --- a/llvm/test/CodeGen/PowerPC/fma-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll @@ -1,19 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -enable-no-signed-zeros-fp-math \ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CHECK-FAST %s -; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -enable-no-signed-zeros-fp-math \ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -enable-unsafe-fp-math -mattr=-vsx < %s | FileCheck -check-prefix=CHECK-FAST-NOVSX %s ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s define double @fma_combine1(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine1: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2 +; CHECK-FAST-NEXT: xsnegdp 0, 3 +; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine1: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 3, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fneg 0, 3 +; CHECK-FAST-NOVSX-NEXT: fmsub 1, 0, 2, 1 ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine1: @@ -29,15 +31,41 @@ ret double %add } +define double @fma_combine1_fast(double %a, double %b, double %c) { +; CHECK-FAST-LABEL: fma_combine1_fast: +; CHECK-FAST: # %bb.0: # %entry +; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2 +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: fma_combine1_fast: +; CHECK-FAST-NOVSX: # %bb.0: # %entry +; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 3, 2, 1 +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: fma_combine1_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsnegdp 0, 3 +; CHECK-NEXT: xsmuldp 0, 0, 2 +; CHECK-NEXT: xssubdp 1, 0, 1 +; CHECK-NEXT: blr +entry: + %fneg1 = fneg nsz double %c + %mul = fmul nsz double %fneg1, %b + %add = fsub nsz double %mul, %a + ret double %add +} + define double @fma_combine2(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine2: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnmaddadp 1, 2, 3 +; CHECK-FAST-NEXT: xsnegdp 0, 3 +; CHECK-FAST-NEXT: xsmsubadp 1, 2, 0 ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine2: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 2, 3, 1 +; CHECK-FAST-NOVSX-NEXT: fneg 0, 3 +; CHECK-FAST-NOVSX-NEXT: fmsub 1, 2, 0, 1 ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine2: @@ -53,11 +81,81 @@ ret double %add } +define double @fma_combine2_fast(double %a, double %b, double %c) { +; CHECK-FAST-LABEL: fma_combine2_fast: +; CHECK-FAST: # %bb.0: # %entry +; CHECK-FAST-NEXT: xsnmaddadp 1, 2, 3 +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: fma_combine2_fast: +; CHECK-FAST-NOVSX: # %bb.0: # %entry +; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 2, 3, 1 +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: fma_combine2_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsnegdp 0, 3 +; CHECK-NEXT: xsmuldp 0, 2, 0 +; CHECK-NEXT: xssubdp 1, 0, 1 +; CHECK-NEXT: blr +entry: + %fneg1 = fneg nsz double %c + %mul = fmul nsz double %b, %fneg1 + %add = fsub nsz double %mul, %a + ret double %add +} + @v = common local_unnamed_addr global double 0.000000e+00, align 8 @z = common local_unnamed_addr global double 0.000000e+00, align 8 define double @fma_combine_two_uses(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine_two_uses: ; CHECK-FAST: # %bb.0: # %entry +; CHECK-FAST-NEXT: xsnegdp 0, 3 +; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha +; CHECK-FAST-NEXT: addis 4, 2, z@toc@ha +; CHECK-FAST-NEXT: xsnegdp 3, 1 +; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 +; CHECK-FAST-NEXT: stfd 0, z@toc@l(4) +; CHECK-FAST-NEXT: stfd 3, v@toc@l(3) +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: fma_combine_two_uses: +; CHECK-FAST-NOVSX: # %bb.0: # %entry +; CHECK-FAST-NOVSX-NEXT: fneg 3, 3 +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha +; CHECK-FAST-NOVSX-NEXT: addis 4, 2, z@toc@ha +; CHECK-FAST-NOVSX-NEXT: fmsub 0, 3, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 +; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4) +; CHECK-FAST-NOVSX-NEXT: fmr 1, 0 +; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: fma_combine_two_uses: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsnegdp 3, 3 +; CHECK-NEXT: addis 3, 2, v@toc@ha +; CHECK-NEXT: addis 4, 2, z@toc@ha +; CHECK-NEXT: xsmuldp 0, 3, 2 +; CHECK-NEXT: stfd 3, z@toc@l(4) +; CHECK-NEXT: xsnegdp 2, 1 +; CHECK-NEXT: xssubdp 0, 0, 1 +; CHECK-NEXT: stfd 2, v@toc@l(3) +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: blr +entry: + %fneg = fneg double %a + store double %fneg, double* @v, align 8 + %fneg1 = fneg double %c + store double %fneg1, double* @z, align 8 + %mul = fmul double %fneg1, %b + %add = fsub double %mul, %a + ret double %add +} + +define double @fma_combine_two_uses_fast(double %a, double %b, double %c) { +; CHECK-FAST-LABEL: fma_combine_two_uses_fast: +; CHECK-FAST: # %bb.0: # %entry ; CHECK-FAST-NEXT: xsnegdp 0, 1 ; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha ; CHECK-FAST-NEXT: addis 4, 2, z@toc@ha @@ -67,7 +165,7 @@ ; CHECK-FAST-NEXT: stfd 2, z@toc@l(4) ; CHECK-FAST-NEXT: blr ; -; CHECK-FAST-NOVSX-LABEL: fma_combine_two_uses: +; CHECK-FAST-NOVSX-LABEL: fma_combine_two_uses_fast: ; CHECK-FAST-NOVSX: # %bb.0: # %entry ; CHECK-FAST-NOVSX-NEXT: fnmadd 0, 3, 2, 1 ; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 @@ -79,7 +177,53 @@ ; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4) ; CHECK-FAST-NOVSX-NEXT: blr ; -; CHECK-LABEL: fma_combine_two_uses: +; CHECK-LABEL: fma_combine_two_uses_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsnegdp 3, 3 +; CHECK-NEXT: addis 3, 2, v@toc@ha +; CHECK-NEXT: addis 4, 2, z@toc@ha +; CHECK-NEXT: xsmuldp 0, 3, 2 +; CHECK-NEXT: stfd 3, z@toc@l(4) +; CHECK-NEXT: xsnegdp 2, 1 +; CHECK-NEXT: xssubdp 0, 0, 1 +; CHECK-NEXT: stfd 2, v@toc@l(3) +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: blr +entry: + %fneg = fneg nsz double %a + store double %fneg, double* @v, align 8 + %fneg1 = fneg nsz double %c + store double %fneg1, double* @z, align 8 + %mul = fmul nsz double %fneg1, %b + %add = fsub nsz double %mul, %a + ret double %add +} + +define double @fma_combine_one_use(double %a, double %b, double %c) { +; CHECK-FAST-LABEL: fma_combine_one_use: +; CHECK-FAST: # %bb.0: # %entry +; CHECK-FAST-NEXT: xsnegdp 0, 3 +; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha +; CHECK-FAST-NEXT: addis 4, 2, z@toc@ha +; CHECK-FAST-NEXT: xsnegdp 3, 1 +; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 +; CHECK-FAST-NEXT: stfd 0, z@toc@l(4) +; CHECK-FAST-NEXT: stfd 3, v@toc@l(3) +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: fma_combine_one_use: +; CHECK-FAST-NOVSX: # %bb.0: # %entry +; CHECK-FAST-NOVSX-NEXT: fneg 3, 3 +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha +; CHECK-FAST-NOVSX-NEXT: addis 4, 2, z@toc@ha +; CHECK-FAST-NOVSX-NEXT: fmsub 0, 3, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 +; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4) +; CHECK-FAST-NOVSX-NEXT: fmr 1, 0 +; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: fma_combine_one_use: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsnegdp 3, 3 ; CHECK-NEXT: addis 3, 2, v@toc@ha @@ -101,8 +245,8 @@ ret double %add } -define double @fma_combine_one_use(double %a, double %b, double %c) { -; CHECK-FAST-LABEL: fma_combine_one_use: +define double @fma_combine_one_use_fast(double %a, double %b, double %c) { +; CHECK-FAST-LABEL: fma_combine_one_use_fast: ; CHECK-FAST: # %bb.0: # %entry ; CHECK-FAST-NEXT: xsnegdp 0, 1 ; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha @@ -110,7 +254,7 @@ ; CHECK-FAST-NEXT: stfd 0, v@toc@l(3) ; CHECK-FAST-NEXT: blr ; -; CHECK-FAST-NOVSX-LABEL: fma_combine_one_use: +; CHECK-FAST-NOVSX-LABEL: fma_combine_one_use_fast: ; CHECK-FAST-NOVSX: # %bb.0: # %entry ; CHECK-FAST-NOVSX-NEXT: fnmadd 0, 3, 2, 1 ; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 @@ -119,7 +263,7 @@ ; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3) ; CHECK-FAST-NOVSX-NEXT: blr ; -; CHECK-LABEL: fma_combine_one_use: +; CHECK-LABEL: fma_combine_one_use_fast: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsnegdp 0, 3 ; CHECK-NEXT: addis 3, 2, v@toc@ha @@ -130,24 +274,24 @@ ; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: blr entry: - %fneg = fneg double %a + %fneg = fneg nsz double %a store double %fneg, double* @v, align 8 - %fneg1 = fneg double %c - %mul = fmul double %fneg1, %b - %add = fsub double %mul, %a + %fneg1 = fneg nsz double %c + %mul = fmul nsz double %fneg1, %b + %add = fsub nsz double %mul, %a ret double %add } define float @fma_combine_no_ice() { ; CHECK-FAST-LABEL: fma_combine_no_ice: ; CHECK-FAST: # %bb.0: -; CHECK-FAST-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; CHECK-FAST-NEXT: addis 4, 2, .LCPI4_1@toc@ha -; CHECK-FAST-NEXT: lfs 0, .LCPI4_0@toc@l(3) +; CHECK-FAST-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; CHECK-FAST-NEXT: addis 4, 2, .LCPI8_1@toc@ha +; CHECK-FAST-NEXT: lfs 0, .LCPI8_0@toc@l(3) ; CHECK-FAST-NEXT: lfsx 2, 0, 3 -; CHECK-FAST-NEXT: addis 3, 2, .LCPI4_2@toc@ha -; CHECK-FAST-NEXT: lfs 3, .LCPI4_1@toc@l(4) -; CHECK-FAST-NEXT: lfs 1, .LCPI4_2@toc@l(3) +; CHECK-FAST-NEXT: addis 3, 2, .LCPI8_2@toc@ha +; CHECK-FAST-NEXT: lfs 3, .LCPI8_1@toc@l(4) +; CHECK-FAST-NEXT: lfs 1, .LCPI8_2@toc@l(3) ; CHECK-FAST-NEXT: xsmaddasp 3, 2, 0 ; CHECK-FAST-NEXT: xsmaddasp 1, 2, 3 ; CHECK-FAST-NEXT: xsnmsubasp 1, 3, 2 @@ -155,27 +299,27 @@ ; ; CHECK-FAST-NOVSX-LABEL: fma_combine_no_ice: ; CHECK-FAST-NOVSX: # %bb.0: -; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI4_0@toc@l(3) -; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_1@toc@ha +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI8_1@toc@ha ; CHECK-FAST-NOVSX-NEXT: lfs 1, 0(3) -; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI4_1@toc@l(3) -; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI4_2@toc@ha +; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI8_1@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI8_2@toc@ha ; CHECK-FAST-NOVSX-NEXT: fmadds 0, 1, 2, 0 -; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI4_2@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI8_2@toc@l(3) ; CHECK-FAST-NOVSX-NEXT: fmadds 2, 1, 0, 2 ; CHECK-FAST-NOVSX-NEXT: fnmsubs 1, 0, 1, 2 ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine_no_ice: ; CHECK: # %bb.0: -; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; CHECK-NEXT: addis 4, 2, .LCPI4_1@toc@ha -; CHECK-NEXT: lfs 0, .LCPI4_0@toc@l(3) +; CHECK-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; CHECK-NEXT: addis 4, 2, .LCPI8_1@toc@ha +; CHECK-NEXT: lfs 0, .LCPI8_0@toc@l(3) ; CHECK-NEXT: lfsx 2, 0, 3 -; CHECK-NEXT: addis 3, 2, .LCPI4_2@toc@ha -; CHECK-NEXT: lfs 3, .LCPI4_1@toc@l(4) -; CHECK-NEXT: lfs 1, .LCPI4_2@toc@l(3) +; CHECK-NEXT: addis 3, 2, .LCPI8_2@toc@ha +; CHECK-NEXT: lfs 3, .LCPI8_1@toc@l(4) +; CHECK-NEXT: lfs 1, .LCPI8_2@toc@l(3) ; CHECK-NEXT: xsmaddasp 3, 2, 0 ; CHECK-NEXT: xsmaddasp 1, 2, 3 ; CHECK-NEXT: xsnmsubasp 1, 3, 2 @@ -195,14 +339,71 @@ ret float %tmp12 } +define float @fma_combine_no_ice_fast() { +; CHECK-FAST-LABEL: fma_combine_no_ice_fast: +; CHECK-FAST: # %bb.0: +; CHECK-FAST-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; CHECK-FAST-NEXT: addis 4, 2, .LCPI9_1@toc@ha +; CHECK-FAST-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; CHECK-FAST-NEXT: lfsx 2, 0, 3 +; CHECK-FAST-NEXT: addis 3, 2, .LCPI9_2@toc@ha +; CHECK-FAST-NEXT: lfs 3, .LCPI9_1@toc@l(4) +; CHECK-FAST-NEXT: lfs 1, .LCPI9_2@toc@l(3) +; CHECK-FAST-NEXT: xsmaddasp 3, 2, 0 +; CHECK-FAST-NEXT: xsmaddasp 1, 2, 3 +; CHECK-FAST-NEXT: xsnmsubasp 1, 3, 2 +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: fma_combine_no_ice_fast: +; CHECK-FAST-NOVSX: # %bb.0: +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI9_1@toc@ha +; CHECK-FAST-NOVSX-NEXT: lfs 1, 0(3) +; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI9_1@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI9_2@toc@ha +; CHECK-FAST-NOVSX-NEXT: fmadds 0, 1, 2, 0 +; CHECK-FAST-NOVSX-NEXT: lfs 2, .LCPI9_2@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: fmadds 2, 1, 0, 2 +; CHECK-FAST-NOVSX-NEXT: fnmsubs 1, 0, 1, 2 +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: fma_combine_no_ice_fast: +; CHECK: # %bb.0: +; CHECK-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; CHECK-NEXT: addis 4, 2, .LCPI9_1@toc@ha +; CHECK-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; CHECK-NEXT: lfsx 2, 0, 3 +; CHECK-NEXT: addis 3, 2, .LCPI9_2@toc@ha +; CHECK-NEXT: lfs 3, .LCPI9_1@toc@l(4) +; CHECK-NEXT: lfs 1, .LCPI9_2@toc@l(3) +; CHECK-NEXT: xsmaddasp 3, 2, 0 +; CHECK-NEXT: xsmaddasp 1, 2, 3 +; CHECK-NEXT: xsnmsubasp 1, 3, 2 +; CHECK-NEXT: blr + %tmp = load float, float* undef, align 4 + %tmp2 = load float, float* undef, align 4 + %tmp3 = fmul reassoc float %tmp, 0x3FE372D780000000 + %tmp4 = fadd reassoc nsz float %tmp3, 1.000000e+00 + %tmp5 = fmul reassoc float %tmp2, %tmp4 + %tmp6 = load float, float* undef, align 4 + %tmp7 = load float, float* undef, align 4 + %tmp8 = fmul reassoc float %tmp7, 0x3FE372D780000000 + %tmp9 = fsub reassoc nsz float -1.000000e+00, %tmp8 + %tmp10 = fmul reassoc float %tmp9, %tmp6 + %tmp11 = fadd reassoc nsz float %tmp5, 5.000000e-01 + %tmp12 = fadd reassoc nsz float %tmp11, %tmp10 + ret float %tmp12 +} + ; This would crash while trying getNegatedExpression(). define double @getNegatedExpression_crash(double %x, double %y) { ; CHECK-FAST-LABEL: getNegatedExpression_crash: ; CHECK-FAST: # %bb.0: -; CHECK-FAST-NEXT: addis 3, 2, .LCPI5_1@toc@ha -; CHECK-FAST-NEXT: addis 4, 2, .LCPI5_0@toc@ha -; CHECK-FAST-NEXT: lfs 3, .LCPI5_1@toc@l(3) -; CHECK-FAST-NEXT: lfs 4, .LCPI5_0@toc@l(4) +; CHECK-FAST-NEXT: addis 3, 2, .LCPI10_1@toc@ha +; CHECK-FAST-NEXT: addis 4, 2, .LCPI10_0@toc@ha +; CHECK-FAST-NEXT: lfs 3, .LCPI10_1@toc@l(3) +; CHECK-FAST-NEXT: lfs 4, .LCPI10_0@toc@l(4) ; CHECK-FAST-NEXT: xssubdp 0, 1, 3 ; CHECK-FAST-NEXT: xsmaddadp 3, 1, 4 ; CHECK-FAST-NEXT: xsmaddadp 0, 3, 2 @@ -211,10 +412,10 @@ ; ; CHECK-FAST-NOVSX-LABEL: getNegatedExpression_crash: ; CHECK-FAST-NOVSX: # %bb.0: -; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha -; CHECK-FAST-NOVSX-NEXT: addis 4, 2, .LCPI5_1@toc@ha -; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI5_0@toc@l(3) -; CHECK-FAST-NOVSX-NEXT: lfs 3, .LCPI5_1@toc@l(4) +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; CHECK-FAST-NOVSX-NEXT: addis 4, 2, .LCPI10_1@toc@ha +; CHECK-FAST-NOVSX-NEXT: lfs 0, .LCPI10_0@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: lfs 3, .LCPI10_1@toc@l(4) ; CHECK-FAST-NOVSX-NEXT: fmadd 3, 1, 3, 0 ; CHECK-FAST-NOVSX-NEXT: fsub 0, 1, 0 ; CHECK-FAST-NOVSX-NEXT: fmadd 1, 3, 2, 0 @@ -222,10 +423,10 @@ ; ; CHECK-LABEL: getNegatedExpression_crash: ; CHECK: # %bb.0: -; CHECK-NEXT: addis 3, 2, .LCPI5_1@toc@ha -; CHECK-NEXT: addis 4, 2, .LCPI5_0@toc@ha -; CHECK-NEXT: lfs 3, .LCPI5_1@toc@l(3) -; CHECK-NEXT: lfs 4, .LCPI5_0@toc@l(4) +; CHECK-NEXT: addis 3, 2, .LCPI10_1@toc@ha +; CHECK-NEXT: addis 4, 2, .LCPI10_0@toc@ha +; CHECK-NEXT: lfs 3, .LCPI10_1@toc@l(3) +; CHECK-NEXT: lfs 4, .LCPI10_0@toc@l(4) ; CHECK-NEXT: xssubdp 0, 1, 3 ; CHECK-NEXT: xsmaddadp 3, 1, 4 ; CHECK-NEXT: xsmaddadp 0, 3, 2 diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -2,8 +2,8 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF -; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG -; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL +; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG +; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math | FileCheck %s --check-prefix=GLOBAL ; Test FP transforms using instruction/node-level fast-math-flags. ; We're also checking debug output to verify that FMF is propagated to the newly created nodes. diff --git a/llvm/test/CodeGen/PowerPC/fsel.ll b/llvm/test/CodeGen/PowerPC/fsel.ll --- a/llvm/test/CodeGen/PowerPC/fsel.ll +++ b/llvm/test/CodeGen/PowerPC/fsel.ll @@ -1,6 +1,5 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math -mattr=-vsx | FileCheck -check-prefix=CHECK-FM %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math -mattr=+vsx | FileCheck -check-prefix=CHECK-FM-VSX %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -14,13 +13,24 @@ ; CHECK-NOT: fsel ; CHECK: blr -; CHECK-FM: @zerocmp1 -; CHECK-FM: fsel 1, 1, 2, 3 -; CHECK-FM: blr +; CHECK-VSX: @zerocmp1 +; CHECK-VSX-NOT: fsel +; CHECK-VSX: blr +} + +define double @zerocmp1_fast(double %a, double %y, double %z) #0 { +entry: + %cmp = fcmp nnan ninf ult double %a, 0.000000e+00 + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y + +; CHECK: @zerocmp1_fast +; CHECK: fsel 1, 1, 2, 3 +; CHECK: blr -; CHECK-FM-VSX: @zerocmp1 -; CHECK-FM-VSX: fsel 1, 1, 2, 3 -; CHECK-FM-VSX: blr +; CHECK-VSX: @zerocmp1_fast +; CHECK-VSX: fsel 1, 1, 2, 3 +; CHECK-VSX: blr } define double @zerocmp2(double %a, double %y, double %z) #0 { @@ -33,15 +43,26 @@ ; CHECK-NOT: fsel ; CHECK: blr -; CHECK-FM: @zerocmp2 -; CHECK-FM: fneg [[REG:[0-9]+]], 1 -; CHECK-FM: fsel 1, [[REG]], 3, 2 -; CHECK-FM: blr +; CHECK-VSX: @zerocmp2 +; CHECK-VSX-NOT: fsel +; CHECK-VSX: blr +} + +define double @zerocmp2_fast(double %a, double %y, double %z) #0 { +entry: + %cmp = fcmp nnan ninf ogt double %a, 0.000000e+00 + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z + +; CHECK: @zerocmp2_fast +; CHECK: fneg [[REG:[0-9]+]], 1 +; CHECK: fsel 1, [[REG]], 3, 2 +; CHECK: blr -; CHECK-FM-VSX: @zerocmp2 -; CHECK-FM-VSX: xsnegdp [[REG:[0-9]+]], 1 -; CHECK-FM-VSX: fsel 1, [[REG]], 3, 2 -; CHECK-FM-VSX: blr +; CHECK-VSX: @zerocmp2_fast +; CHECK-VSX: xsnegdp [[REG:[0-9]+]], 1 +; CHECK-VSX: fsel 1, [[REG]], 3, 2 +; CHECK-VSX: blr } define double @zerocmp3(double %a, double %y, double %z) #0 { @@ -54,17 +75,28 @@ ; CHECK-NOT: fsel ; CHECK: blr -; CHECK-FM: @zerocmp3 -; CHECK-FM: fsel [[REG:[0-9]+]], 1, 2, 3 -; CHECK-FM: fneg [[REG2:[0-9]+]], 1 -; CHECK-FM: fsel 1, [[REG2]], [[REG]], 3 -; CHECK-FM: blr +; CHECK-VSX: @zerocmp3 +; CHECK-VSX-NOT: fsel +; CHECK-VSX: blr +} + +define double @zerocmp3_fast(double %a, double %y, double %z) #0 { +entry: + %cmp = fcmp nnan ninf oeq double %a, 0.000000e+00 + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z + +; CHECK: @zerocmp3_fast +; CHECK: fsel [[REG:[0-9]+]], 1, 2, 3 +; CHECK: fneg [[REG2:[0-9]+]], 1 +; CHECK: fsel 1, [[REG2]], [[REG]], 3 +; CHECK: blr -; CHECK-FM-VSX: @zerocmp3 -; CHECK-FM-VSX: xsnegdp [[REG2:[0-9]+]], 1 -; CHECK-FM-VSX: fsel [[REG:[0-9]+]], 1, 2, 3 -; CHECK-FM-VSX: fsel 1, [[REG2]], [[REG]], 3 -; CHECK-FM-VSX: blr +; CHECK-VSX: @zerocmp3_fast +; CHECK-VSX: xsnegdp [[REG2:[0-9]+]], 1 +; CHECK-VSX: fsel [[REG:[0-9]+]], 1, 2, 3 +; CHECK-VSX: fsel 1, [[REG2]], [[REG]], 3 +; CHECK-VSX: blr } define double @min1(double %a, double %b) #0 { @@ -77,15 +109,26 @@ ; CHECK-NOT: fsel ; CHECK: blr -; CHECK-FM: @min1 -; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1 -; CHECK-FM: fsel 1, [[REG]], 1, 2 -; CHECK-FM: blr +; CHECK-VSX: @min1 +; CHECK-VSX-NOT: fsel +; CHECK-VSX: blr +} + +define double @min1_fast(double %a, double %b) #0 { +entry: + %cmp = fcmp nnan ninf ole double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond + +; CHECK: @min1_fast +; CHECK: fsub [[REG:[0-9]+]], 2, 1 +; CHECK: fsel 1, [[REG]], 1, 2 +; CHECK: blr -; CHECK-FM-VSX: @min1 -; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 2, 1 -; CHECK-FM-VSX: fsel 1, [[REG]], 1, 2 -; CHECK-FM-VSX: blr +; CHECK-VSX: @min1_fast +; CHECK-VSX: xssubdp [[REG:[0-9]+]], 2, 1 +; CHECK-VSX: fsel 1, [[REG]], 1, 2 +; CHECK-VSX: blr } define double @max1(double %a, double %b) #0 { @@ -98,15 +141,26 @@ ; CHECK-NOT: fsel ; CHECK: blr -; CHECK-FM: @max1 -; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2 -; CHECK-FM: fsel 1, [[REG]], 1, 2 -; CHECK-FM: blr +; CHECK-VSX: @max1 +; CHECK-VSX-NOT: fsel +; CHECK-VSX: blr +} + +define double @max1_fast(double %a, double %b) #0 { +entry: + %cmp = fcmp nnan ninf oge double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond + +; CHECK: @max1_fast +; CHECK: fsub [[REG:[0-9]+]], 1, 2 +; CHECK: fsel 1, [[REG]], 1, 2 +; CHECK: blr -; CHECK-FM-VSX: @max1 -; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2 -; CHECK-FM-VSX: fsel 1, [[REG]], 1, 2 -; CHECK-FM-VSX: blr +; CHECK-VSX: @max1_fast +; CHECK-VSX: xssubdp [[REG:[0-9]+]], 1, 2 +; CHECK-VSX: fsel 1, [[REG]], 1, 2 +; CHECK-VSX: blr } define double @cmp1(double %a, double %b, double %y, double %z) #0 { @@ -119,15 +173,26 @@ ; CHECK-NOT: fsel ; CHECK: blr -; CHECK-FM: @cmp1 -; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2 -; CHECK-FM: fsel 1, [[REG]], 3, 4 -; CHECK-FM: blr +; CHECK-VSX: @cmp1 +; CHECK-VSX-NOT: fsel +; CHECK-VSX: blr +} + +define double @cmp1_fast(double %a, double %b, double %y, double %z) #0 { +entry: + %cmp = fcmp nnan ninf ult double %a, %b + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y + +; CHECK: @cmp1_fast +; CHECK: fsub [[REG:[0-9]+]], 1, 2 +; CHECK: fsel 1, [[REG]], 3, 4 +; CHECK: blr -; CHECK-FM-VSX: @cmp1 -; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2 -; CHECK-FM-VSX: fsel 1, [[REG]], 3, 4 -; CHECK-FM-VSX: blr +; CHECK-VSX: @cmp1_fast +; CHECK-VSX: xssubdp [[REG:[0-9]+]], 1, 2 +; CHECK-VSX: fsel 1, [[REG]], 3, 4 +; CHECK-VSX: blr } define double @cmp2(double %a, double %b, double %y, double %z) #0 { @@ -140,15 +205,26 @@ ; CHECK-NOT: fsel ; CHECK: blr -; CHECK-FM: @cmp2 -; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1 -; CHECK-FM: fsel 1, [[REG]], 4, 3 -; CHECK-FM: blr +; CHECK-VSX: @cmp2 +; CHECK-VSX-NOT: fsel +; CHECK-VSX: blr +} + +define double @cmp2_fast(double %a, double %b, double %y, double %z) #0 { +entry: + %cmp = fcmp nnan ninf ogt double %a, %b + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z + +; CHECK: @cmp2_fast +; CHECK: fsub [[REG:[0-9]+]], 2, 1 +; CHECK: fsel 1, [[REG]], 4, 3 +; CHECK: blr -; CHECK-FM-VSX: @cmp2 -; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 2, 1 -; CHECK-FM-VSX: fsel 1, [[REG]], 4, 3 -; CHECK-FM-VSX: blr +; CHECK-VSX: @cmp2_fast +; CHECK-VSX: xssubdp [[REG:[0-9]+]], 2, 1 +; CHECK-VSX: fsel 1, [[REG]], 4, 3 +; CHECK-VSX: blr } define double @cmp3(double %a, double %b, double %y, double %z) #0 { @@ -161,19 +237,30 @@ ; CHECK-NOT: fsel ; CHECK: blr -; CHECK-FM: @cmp3 -; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2 -; CHECK-FM: fsel [[REG2:[0-9]+]], [[REG]], 3, 4 -; CHECK-FM: fneg [[REG3:[0-9]+]], [[REG]] -; CHECK-FM: fsel 1, [[REG3]], [[REG2]], 4 -; CHECK-FM: blr +; CHECK-VSX: @cmp3 +; CHECK-VSX-NOT: fsel +; CHECK-VSX: blr +} + +define double @cmp3_fast(double %a, double %b, double %y, double %z) #0 { +entry: + %cmp = fcmp nnan ninf oeq double %a, %b + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z + +; CHECK: @cmp3_fast +; CHECK: fsub [[REG:[0-9]+]], 1, 2 +; CHECK: fsel [[REG2:[0-9]+]], [[REG]], 3, 4 +; CHECK: fneg [[REG3:[0-9]+]], [[REG]] +; CHECK: fsel 1, [[REG3]], [[REG2]], 4 +; CHECK: blr -; CHECK-FM-VSX: @cmp3 -; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2 -; CHECK-FM-VSX: xsnegdp [[REG3:[0-9]+]], [[REG]] -; CHECK-FM-VSX: fsel [[REG2:[0-9]+]], [[REG]], 3, 4 -; CHECK-FM-VSX: fsel 1, [[REG3]], [[REG2]], 4 -; CHECK-FM-VSX: blr +; CHECK-VSX: @cmp3_fast +; CHECK-VSX: xssubdp [[REG:[0-9]+]], 1, 2 +; CHECK-VSX: xsnegdp [[REG3:[0-9]+]], [[REG]] +; CHECK-VSX: fsel [[REG2:[0-9]+]], [[REG]], 3, 4 +; CHECK-VSX: fsel 1, [[REG3]], [[REG2]], 4 +; CHECK-VSX: blr } attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/PowerPC/scalar-equal.ll b/llvm/test/CodeGen/PowerPC/scalar-equal.ll --- a/llvm/test/CodeGen/PowerPC/scalar-equal.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-equal.ll @@ -1,12 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ -; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ -; RUN: --enable-no-nans-fp-math --enable-no-infs-fp-math \ +; RUN: -verify-machineinstrs \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=FAST-P8 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ -; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ -; RUN: --enable-no-nans-fp-math --enable-no-infs-fp-math \ +; RUN: -verify-machineinstrs \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=FAST-P9 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -verify-machineinstrs \ @@ -19,18 +17,21 @@ define double @testoeq(double %a, double %b, double %c, double %d) { ; FAST-P8-LABEL: testoeq: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubdp f0, f1, f2 -; FAST-P8-NEXT: xsnegdp f1, f0 -; FAST-P8-NEXT: fsel f0, f0, f3, f4 -; FAST-P8-NEXT: fsel f1, f1, f0, f4 +; FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: beqlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: testoeq: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubdp f0, f1, f2 -; FAST-P9-NEXT: fsel f1, f0, f3, f4 -; FAST-P9-NEXT: xsnegdp f0, f0 -; FAST-P9-NEXT: fsel f1, f0, f1, f4 +; FAST-P9-NEXT: xscmpudp cr0, f1, f2 +; FAST-P9-NEXT: beq cr0, .LBB0_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB0_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testoeq: diff --git a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll --- a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll @@ -1,12 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ -; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ -; RUN: --enable-no-nans-fp-math \ -; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s \ +; RUN: | FileCheck %s --check-prefix=FAST-P8 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ -; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ -; RUN: --enable-no-nans-fp-math \ -; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s \ +; RUN: | FileCheck %s --check-prefix=FAST-P9 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -verify-machineinstrs \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=NO-FAST-P9 @@ -14,10 +12,18 @@ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=NO-FAST-P8 define dso_local float @testfmax(float %a, float %b) local_unnamed_addr { -; CHECK-LABEL: testfmax: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmaxdp f1, f1, f2 -; CHECK-NEXT: blr +; FAST-P8-LABEL: testfmax: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: bgtlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f2 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: testfmax: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testfmax: ; NO-FAST-P9: # %bb.0: # %entry @@ -38,10 +44,18 @@ } define dso_local double @testdmax(double %a, double %b) local_unnamed_addr { -; CHECK-LABEL: testdmax: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmaxdp f1, f1, f2 -; CHECK-NEXT: blr +; FAST-P8-LABEL: testdmax: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; FAST-P8-NEXT: bgtlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f2 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: testdmax: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testdmax: ; NO-FAST-P9: # %bb.0: # %entry @@ -62,10 +76,18 @@ } define dso_local float @testfmin(float %a, float %b) local_unnamed_addr { -; CHECK-LABEL: testfmin: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmindp f1, f1, f2 -; CHECK-NEXT: blr +; FAST-P8-LABEL: testfmin: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: bltlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f2 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: testfmin: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xsmincdp f1, f1, f2 +; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testfmin: ; NO-FAST-P9: # %bb.0: # %entry @@ -86,10 +108,18 @@ } define dso_local double @testdmin(double %a, double %b) local_unnamed_addr { -; CHECK-LABEL: testdmin: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmindp f1, f1, f2 -; CHECK-NEXT: blr +; FAST-P8-LABEL: testdmin: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; FAST-P8-NEXT: bltlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f2 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: testdmin: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xsmincdp f1, f1, f2 +; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testdmin: ; NO-FAST-P9: # %bb.0: # %entry @@ -110,10 +140,16 @@ } define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr { -; CHECK-LABEL: testfmax_fast: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmaxdp f1, f1, f2 -; CHECK-NEXT: blr +; FAST-P8-LABEL: testfmax_fast: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: xssubsp f0, f2, f1 +; FAST-P8-NEXT: fsel f1, f0, f2, f1 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: testfmax_fast: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testfmax_fast: ; NO-FAST-P9: # %bb.0: # %entry @@ -131,10 +167,16 @@ ret float %cond } define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr { -; CHECK-LABEL: testdmax_fast: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmaxdp f1, f1, f2 -; CHECK-NEXT: blr +; FAST-P8-LABEL: testdmax_fast: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: xssubdp f0, f2, f1 +; FAST-P8-NEXT: fsel f1, f0, f2, f1 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: testdmax_fast: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testdmax_fast: ; NO-FAST-P9: # %bb.0: # %entry @@ -152,10 +194,16 @@ ret double %cond } define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr { -; CHECK-LABEL: testfmin_fast: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmindp f1, f1, f2 -; CHECK-NEXT: blr +; FAST-P8-LABEL: testfmin_fast: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: xssubsp f0, f1, f2 +; FAST-P8-NEXT: fsel f1, f0, f2, f1 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: testfmin_fast: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xsmincdp f1, f1, f2 +; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testfmin_fast: ; NO-FAST-P9: # %bb.0: # %entry @@ -173,10 +221,16 @@ ret float %cond } define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr { -; CHECK-LABEL: testdmin_fast: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmindp f1, f1, f2 -; CHECK-NEXT: blr +; FAST-P8-LABEL: testdmin_fast: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: xssubdp f0, f1, f2 +; FAST-P8-NEXT: fsel f1, f0, f2, f1 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: testdmin_fast: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xsmincdp f1, f1, f2 +; FAST-P9-NEXT: blr ; ; NO-FAST-P9-LABEL: testdmin_fast: ; NO-FAST-P9: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll --- a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll @@ -1,12 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ -; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ -; RUN: --enable-no-nans-fp-math --enable-no-infs-fp-math \ +; RUN: -verify-machineinstrs \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=FAST-P8 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \ -; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \ -; RUN: --enable-no-nans-fp-math --enable-no-infs-fp-math \ +; RUN: -verify-machineinstrs \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=FAST-P9 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -verify-machineinstrs \ @@ -20,18 +18,21 @@ define float @select_oeq_float(float %a, float %b, float %c, float %d) { ; FAST-P8-LABEL: select_oeq_float: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubsp f0, f1, f2 -; FAST-P8-NEXT: xssubsp f1, f2, f1 -; FAST-P8-NEXT: fsel f0, f0, f3, f4 -; FAST-P8-NEXT: fsel f1, f1, f0, f4 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: beqlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_oeq_float: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubsp f0, f2, f1 -; FAST-P9-NEXT: xssubsp f1, f1, f2 -; FAST-P9-NEXT: fsel f1, f1, f3, f4 -; FAST-P9-NEXT: fsel f1, f0, f1, f4 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: beq cr0, .LBB0_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB0_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_oeq_float: @@ -61,18 +62,21 @@ define double @select_oeq_double(double %a, double %b, double %c, double %d) { ; FAST-P8-LABEL: select_oeq_double: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubdp f0, f1, f2 -; FAST-P8-NEXT: xsnegdp f1, f0 -; FAST-P8-NEXT: fsel f0, f0, f3, f4 -; FAST-P8-NEXT: fsel f1, f1, f0, f4 +; FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: beqlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_oeq_double: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubdp f0, f1, f2 -; FAST-P9-NEXT: fsel f1, f0, f3, f4 -; FAST-P9-NEXT: xsnegdp f0, f0 -; FAST-P9-NEXT: fsel f1, f0, f1, f4 +; FAST-P9-NEXT: xscmpudp cr0, f1, f2 +; FAST-P9-NEXT: beq cr0, .LBB1_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB1_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_oeq_double: @@ -180,18 +184,23 @@ define float @select_one_float(float %a, float %b, float %c, float %d) { ; FAST-P8-LABEL: select_one_float: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubsp f0, f1, f2 -; FAST-P8-NEXT: xssubsp f1, f2, f1 -; FAST-P8-NEXT: fsel f0, f0, f4, f3 -; FAST-P8-NEXT: fsel f1, f1, f0, f3 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: crnor 4*cr5+lt, un, eq +; FAST-P8-NEXT: bclr 12, 4*cr5+lt, 0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_one_float: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubsp f0, f2, f1 -; FAST-P9-NEXT: xssubsp f1, f1, f2 -; FAST-P9-NEXT: fsel f1, f1, f4, f3 -; FAST-P9-NEXT: fsel f1, f0, f1, f3 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: crnor 4*cr5+lt, un, eq +; FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB4_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB4_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_one_float: @@ -223,18 +232,23 @@ define double @select_one_double(double %a, double %b, double %c, double %d) { ; FAST-P8-LABEL: select_one_double: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubdp f0, f1, f2 -; FAST-P8-NEXT: xsnegdp f1, f0 -; FAST-P8-NEXT: fsel f0, f0, f4, f3 -; FAST-P8-NEXT: fsel f1, f1, f0, f3 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: crnor 4*cr5+lt, un, eq +; FAST-P8-NEXT: bclr 12, 4*cr5+lt, 0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_one_double: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubdp f0, f1, f2 -; FAST-P9-NEXT: fsel f1, f0, f4, f3 -; FAST-P9-NEXT: xsnegdp f0, f0 -; FAST-P9-NEXT: fsel f1, f0, f1, f3 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: crnor 4*cr5+lt, un, eq +; FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB5_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB5_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_one_double: @@ -344,14 +358,23 @@ define float @select_oge_float(float %a, float %b, float %c, float %d) { ; FAST-P8-LABEL: select_oge_float: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubsp f0, f1, f2 -; FAST-P8-NEXT: fsel f1, f0, f3, f4 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: crnor 4*cr5+lt, un, lt +; FAST-P8-NEXT: bclr 12, 4*cr5+lt, 0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_oge_float: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubsp f0, f1, f2 -; FAST-P9-NEXT: fsel f1, f0, f3, f4 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: crnor 4*cr5+lt, un, lt +; FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB8_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB8_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_oge_float: @@ -383,14 +406,23 @@ define double @select_oge_double(double %a, double %b, double %c, double %d) { ; FAST-P8-LABEL: select_oge_double: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubdp f0, f1, f2 -; FAST-P8-NEXT: fsel f1, f0, f3, f4 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: crnor 4*cr5+lt, un, lt +; FAST-P8-NEXT: bclr 12, 4*cr5+lt, 0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_oge_double: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubdp f0, f1, f2 -; FAST-P9-NEXT: fsel f1, f0, f3, f4 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: crnor 4*cr5+lt, un, lt +; FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB9_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB9_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_oge_double: @@ -484,14 +516,21 @@ define float @select_olt_float(float %a, float %b, float %c, float %d) { ; FAST-P8-LABEL: select_olt_float: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubsp f0, f1, f2 -; FAST-P8-NEXT: fsel f1, f0, f4, f3 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: bltlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_olt_float: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubsp f0, f1, f2 -; FAST-P9-NEXT: fsel f1, f0, f4, f3 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: blt cr0, .LBB12_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB12_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_olt_float: @@ -521,14 +560,21 @@ define double @select_olt_double(double %a, double %b, double %c, double %d) { ; FAST-P8-LABEL: select_olt_double: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubdp f0, f1, f2 -; FAST-P8-NEXT: fsel f1, f0, f4, f3 +; FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: bltlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_olt_double: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubdp f0, f1, f2 -; FAST-P9-NEXT: fsel f1, f0, f4, f3 +; FAST-P9-NEXT: xscmpudp cr0, f1, f2 +; FAST-P9-NEXT: blt cr0, .LBB13_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB13_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_olt_double: @@ -620,14 +666,21 @@ define float @select_ogt_float(float %a, float %b, float %c, float %d) { ; FAST-P8-LABEL: select_ogt_float: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubsp f0, f2, f1 -; FAST-P8-NEXT: fsel f1, f0, f4, f3 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: bgtlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_ogt_float: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubsp f0, f2, f1 -; FAST-P9-NEXT: fsel f1, f0, f4, f3 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: bgt cr0, .LBB16_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB16_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_ogt_float: @@ -657,14 +710,21 @@ define double @select_ogt_double(double %a, double %b, double %c, double %d) { ; FAST-P8-LABEL: select_ogt_double: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubdp f0, f2, f1 -; FAST-P8-NEXT: fsel f1, f0, f4, f3 +; FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: bgtlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_ogt_double: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubdp f0, f2, f1 -; FAST-P9-NEXT: fsel f1, f0, f4, f3 +; FAST-P9-NEXT: xscmpudp cr0, f1, f2 +; FAST-P9-NEXT: bgt cr0, .LBB17_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB17_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_ogt_double: @@ -756,14 +816,23 @@ define float @select_ole_float(float %a, float %b, float %c, float %d) { ; FAST-P8-LABEL: select_ole_float: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubsp f0, f2, f1 -; FAST-P8-NEXT: fsel f1, f0, f3, f4 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: crnor 4*cr5+lt, un, gt +; FAST-P8-NEXT: bclr 12, 4*cr5+lt, 0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_ole_float: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubsp f0, f2, f1 -; FAST-P9-NEXT: fsel f1, f0, f3, f4 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: crnor 4*cr5+lt, un, gt +; FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB20_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB20_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_ole_float: @@ -795,14 +864,23 @@ define double @select_ole_double(double %a, double %b, double %c, double %d) { ; FAST-P8-LABEL: select_ole_double: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubdp f0, f2, f1 -; FAST-P8-NEXT: fsel f1, f0, f3, f4 +; FAST-P8-NEXT: fcmpu cr0, f1, f2 +; FAST-P8-NEXT: fmr f1, f3 +; FAST-P8-NEXT: crnor 4*cr5+lt, un, gt +; FAST-P8-NEXT: bclr 12, 4*cr5+lt, 0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_ole_double: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubdp f0, f2, f1 -; FAST-P9-NEXT: fsel f1, f0, f3, f4 +; FAST-P9-NEXT: fcmpu cr0, f1, f2 +; FAST-P9-NEXT: crnor 4*cr5+lt, un, gt +; FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB21_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f4 +; FAST-P9-NEXT: .LBB21_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_ole_double: @@ -897,16 +975,26 @@ ; FAST-P8: # %bb.0: # %entry ; FAST-P8-NEXT: addis r3, r2, .LCPI24_0@toc@ha ; FAST-P8-NEXT: lfs f0, .LCPI24_0@toc@l(r3) -; FAST-P8-NEXT: xssubdp f0, f1, f0 -; FAST-P8-NEXT: fsel f1, f0, f2, f3 +; FAST-P8-NEXT: fcmpu cr0, f1, f0 +; FAST-P8-NEXT: cror 4*cr5+lt, lt, un +; FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB24_2 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f3, f2 +; FAST-P8-NEXT: .LBB24_2: # %entry +; FAST-P8-NEXT: fmr f1, f3 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: onecmp1: ; FAST-P9: # %bb.0: # %entry ; FAST-P9-NEXT: addis r3, r2, .LCPI24_0@toc@ha ; FAST-P9-NEXT: lfs f0, .LCPI24_0@toc@l(r3) -; FAST-P9-NEXT: xssubdp f0, f1, f0 -; FAST-P9-NEXT: fsel f1, f0, f2, f3 +; FAST-P9-NEXT: fcmpu cr0, f1, f0 +; FAST-P9-NEXT: cror 4*cr5+lt, lt, un +; FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB24_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f3, f2 +; FAST-P9-NEXT: .LBB24_2: # %entry +; FAST-P9-NEXT: fmr f1, f3 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: onecmp1: @@ -945,16 +1033,23 @@ ; FAST-P8: # %bb.0: # %entry ; FAST-P8-NEXT: addis r3, r2, .LCPI25_0@toc@ha ; FAST-P8-NEXT: lfs f0, .LCPI25_0@toc@l(r3) -; FAST-P8-NEXT: xssubdp f0, f0, f1 -; FAST-P8-NEXT: fsel f1, f0, f3, f2 +; FAST-P8-NEXT: xscmpudp cr0, f1, f0 +; FAST-P8-NEXT: fmr f1, f2 +; FAST-P8-NEXT: bgtlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f3 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: onecmp2: ; FAST-P9: # %bb.0: # %entry ; FAST-P9-NEXT: addis r3, r2, .LCPI25_0@toc@ha ; FAST-P9-NEXT: lfs f0, .LCPI25_0@toc@l(r3) -; FAST-P9-NEXT: xssubdp f0, f0, f1 -; FAST-P9-NEXT: fsel f1, f0, f3, f2 +; FAST-P9-NEXT: xscmpudp cr0, f1, f0 +; FAST-P9-NEXT: bgt cr0, .LBB25_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f2, f3 +; FAST-P9-NEXT: .LBB25_2: # %entry +; FAST-P9-NEXT: fmr f1, f2 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: onecmp2: @@ -990,20 +1085,23 @@ ; FAST-P8: # %bb.0: # %entry ; FAST-P8-NEXT: addis r3, r2, .LCPI26_0@toc@ha ; FAST-P8-NEXT: lfs f0, .LCPI26_0@toc@l(r3) -; FAST-P8-NEXT: xssubdp f0, f1, f0 -; FAST-P8-NEXT: xsnegdp f1, f0 -; FAST-P8-NEXT: fsel f0, f0, f2, f3 -; FAST-P8-NEXT: fsel f1, f1, f0, f3 +; FAST-P8-NEXT: xscmpudp cr0, f1, f0 +; FAST-P8-NEXT: fmr f1, f2 +; FAST-P8-NEXT: beqlr cr0 +; FAST-P8-NEXT: # %bb.1: # %entry +; FAST-P8-NEXT: fmr f1, f3 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: onecmp3: ; FAST-P9: # %bb.0: # %entry ; FAST-P9-NEXT: addis r3, r2, .LCPI26_0@toc@ha ; FAST-P9-NEXT: lfs f0, .LCPI26_0@toc@l(r3) -; FAST-P9-NEXT: xssubdp f0, f1, f0 -; FAST-P9-NEXT: fsel f1, f0, f2, f3 -; FAST-P9-NEXT: xsnegdp f0, f0 -; FAST-P9-NEXT: fsel f1, f0, f1, f3 +; FAST-P9-NEXT: xscmpudp cr0, f1, f0 +; FAST-P9-NEXT: beq cr0, .LBB26_2 +; FAST-P9-NEXT: # %bb.1: # %entry +; FAST-P9-NEXT: fmr f2, f3 +; FAST-P9-NEXT: .LBB26_2: # %entry +; FAST-P9-NEXT: fmr f1, f2 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: onecmp3: @@ -1033,3 +1131,125 @@ %y.z = select i1 %cmp, double %y, double %z ret double %y.z } + +define double @onecmp1_fast(double %a, double %y, double %z) { +; FAST-P8-LABEL: onecmp1_fast: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: addis r3, r2, .LCPI27_0@toc@ha +; FAST-P8-NEXT: lfs f0, .LCPI27_0@toc@l(r3) +; FAST-P8-NEXT: xssubdp f0, f1, f0 +; FAST-P8-NEXT: fsel f1, f0, f2, f3 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: onecmp1_fast: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: addis r3, r2, .LCPI27_0@toc@ha +; FAST-P9-NEXT: lfs f0, .LCPI27_0@toc@l(r3) +; FAST-P9-NEXT: xssubdp f0, f1, f0 +; FAST-P9-NEXT: fsel f1, f0, f2, f3 +; FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: onecmp1_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: addis r3, r2, .LCPI27_0@toc@ha +; NO-FAST-P8-NEXT: lfs f0, .LCPI27_0@toc@l(r3) +; NO-FAST-P8-NEXT: xssubdp f0, f1, f0 +; NO-FAST-P8-NEXT: fsel f1, f0, f2, f3 +; NO-FAST-P8-NEXT: blr +; +; NO-FAST-P9-LABEL: onecmp1_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: addis r3, r2, .LCPI27_0@toc@ha +; NO-FAST-P9-NEXT: lfs f0, .LCPI27_0@toc@l(r3) +; NO-FAST-P9-NEXT: xssubdp f0, f1, f0 +; NO-FAST-P9-NEXT: fsel f1, f0, f2, f3 +; NO-FAST-P9-NEXT: blr +entry: + %cmp = fcmp nnan ninf nsz ult double %a, 1.000000e+00 + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y +} + +define double @onecmp2_fast(double %a, double %y, double %z) { +; FAST-P8-LABEL: onecmp2_fast: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: addis r3, r2, .LCPI28_0@toc@ha +; FAST-P8-NEXT: lfs f0, .LCPI28_0@toc@l(r3) +; FAST-P8-NEXT: xssubdp f0, f0, f1 +; FAST-P8-NEXT: fsel f1, f0, f3, f2 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: onecmp2_fast: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: addis r3, r2, .LCPI28_0@toc@ha +; FAST-P9-NEXT: lfs f0, .LCPI28_0@toc@l(r3) +; FAST-P9-NEXT: xssubdp f0, f0, f1 +; FAST-P9-NEXT: fsel f1, f0, f3, f2 +; FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: onecmp2_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: addis r3, r2, .LCPI28_0@toc@ha +; NO-FAST-P8-NEXT: lfs f0, .LCPI28_0@toc@l(r3) +; NO-FAST-P8-NEXT: xssubdp f0, f0, f1 +; NO-FAST-P8-NEXT: fsel f1, f0, f3, f2 +; NO-FAST-P8-NEXT: blr +; +; NO-FAST-P9-LABEL: onecmp2_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: addis r3, r2, .LCPI28_0@toc@ha +; NO-FAST-P9-NEXT: lfs f0, .LCPI28_0@toc@l(r3) +; NO-FAST-P9-NEXT: xssubdp f0, f0, f1 +; NO-FAST-P9-NEXT: fsel f1, f0, f3, f2 +; NO-FAST-P9-NEXT: blr +entry: + %cmp = fcmp nnan ninf nsz ogt double %a, 1.000000e+00 + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z +} + +define double @onecmp3_fast(double %a, double %y, double %z) { +; FAST-P8-LABEL: onecmp3_fast: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: addis r3, r2, .LCPI29_0@toc@ha +; FAST-P8-NEXT: lfs f0, .LCPI29_0@toc@l(r3) +; FAST-P8-NEXT: xssubdp f0, f1, f0 +; FAST-P8-NEXT: xsnegdp f1, f0 +; FAST-P8-NEXT: fsel f0, f0, f2, f3 +; FAST-P8-NEXT: fsel f1, f1, f0, f3 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: onecmp3_fast: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: addis r3, r2, .LCPI29_0@toc@ha +; FAST-P9-NEXT: lfs f0, .LCPI29_0@toc@l(r3) +; FAST-P9-NEXT: xssubdp f0, f1, f0 +; FAST-P9-NEXT: fsel f1, f0, f2, f3 +; FAST-P9-NEXT: xsnegdp f0, f0 +; FAST-P9-NEXT: fsel f1, f0, f1, f3 +; FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: onecmp3_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: addis r3, r2, .LCPI29_0@toc@ha +; NO-FAST-P8-NEXT: lfs f0, .LCPI29_0@toc@l(r3) +; NO-FAST-P8-NEXT: xssubdp f0, f1, f0 +; NO-FAST-P8-NEXT: xsnegdp f1, f0 +; NO-FAST-P8-NEXT: fsel f0, f0, f2, f3 +; NO-FAST-P8-NEXT: fsel f1, f1, f0, f3 +; NO-FAST-P8-NEXT: blr +; +; NO-FAST-P9-LABEL: onecmp3_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: addis r3, r2, .LCPI29_0@toc@ha +; NO-FAST-P9-NEXT: lfs f0, .LCPI29_0@toc@l(r3) +; NO-FAST-P9-NEXT: xssubdp f0, f1, f0 +; NO-FAST-P9-NEXT: fsel f1, f0, f2, f3 +; NO-FAST-P9-NEXT: xsnegdp f0, f0 +; NO-FAST-P9-NEXT: fsel f1, f0, f1, f3 +; NO-FAST-P9-NEXT: blr +entry: + %cmp = fcmp nnan ninf nsz oeq double %a, 1.000000e+00 + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z +}