Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4884,7 +4884,7 @@ // the initial estimate is 2^-8. Thus the number of extra steps to refine // the result for float (23 mantissa bits) is 2 and for double (52 // mantissa bits) is 3. - ExtraSteps = VT == MVT::f64 ? 3 : 2; + ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2; return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand); } Index: llvm/test/CodeGen/AArch64/recp-fastmath.ll =================================================================== --- llvm/test/CodeGen/AArch64/recp-fastmath.ll +++ llvm/test/CodeGen/AArch64/recp-fastmath.ll @@ -18,6 +18,8 @@ ; CHECK-NEXT: BB#0 ; CHECK-NEXT: frecpe [[R:s[0-7]]] ; CHECK-NEXT: frecps {{s[0-7](, s[0-7])?}}, [[R]] +; CHECK: frecps {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} +; CHECK-NOT: frecps {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} } define <2 x float> @f2recp0(<2 x float> %x) #0 { @@ -38,6 +40,8 @@ ; CHECK-NEXT: BB#0 ; CHECK-NEXT: frecpe [[R:v[0-7]\.2s]] ; CHECK-NEXT: frecps {{v[0-7]\.2s(, v[0-7].2s)?}}, [[R]] +; CHECK: frecps {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} +; CHECK-NOT: frecps {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} } define <4 x float> @f4recp0(<4 x float> %x) #0 { @@ -58,6 +62,8 @@ ; CHECK-NEXT: BB#0 ; CHECK-NEXT: frecpe [[R:v[0-7]\.4s]] ; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[R]] +; CHECK: frecps {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK-NOT: frecps {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} } define <8 x float> @f8recp0(<8 x float> %x) #0 { @@ -77,10 +83,12 @@ ; CHECK-LABEL: f8recp1: ; CHECK-NEXT: BB#0 -; CHECK-NEXT: frecpe [[RA:v[0-7]\.4s]] -; CHECK-NEXT: frecpe [[RB:v[0-7]\.4s]] -; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[RA]] -; CHECK: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[RB]] +; CHECK-NEXT: frecpe [[R:v[0-7]\.4s]] +; CHECK: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[R]] +; CHECK: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, {{v[0-7]\.4s}} +; CHECK: frecps {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK: frecps {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK-NOT: frecps {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} } define double @drecp0(double %x) #0 { @@ -101,6 +109,9 @@ ; CHECK-NEXT: BB#0 ; CHECK-NEXT: frecpe [[R:d[0-7]]] ; CHECK-NEXT: frecps {{d[0-7](, d[0-7])?}}, [[R]] +; CHECK: frecps {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} +; CHECK: frecps {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} +; CHECK-NOT: frecps {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} } define <2 x double> @d2recp0(<2 x double> %x) #0 { @@ -121,6 +132,9 @@ ; CHECK-NEXT: BB#0 ; CHECK-NEXT: frecpe [[R:v[0-7]\.2d]] ; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[R]] +; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK-NOT: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} } define <4 x double> @d4recp0(<4 x double> %x) #0 { @@ -140,10 +154,14 @@ ; CHECK-LABEL: d4recp1: ; CHECK-NEXT: BB#0 -; CHECK-NEXT: frecpe [[RA:v[0-7]\.2d]] -; CHECK-NEXT: frecpe [[RB:v[0-7]\.2d]] -; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[RA]] -; CHECK: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[RB]] +; CHECK-NEXT: frecpe [[R:v[0-7]\.2d]] +; CHECK: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[R]] +; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK-NOT: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} } attributes #0 = { nounwind "unsafe-fp-math"="true" } Index: llvm/test/CodeGen/AArch64/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ llvm/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -22,7 +22,9 @@ ; CHECK-NEXT: frsqrte [[RA:s[0-7]]] ; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]] -; CHECK: fcmp s0, #0 +; CHECK: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} +; CHECK-NOT: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} +; CHECK: fcmp {{s[0-7]}}, #0 } define <2 x float> @f2sqrt(<2 x float> %a) #0 { @@ -38,6 +40,8 @@ ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} +; CHECK-NOT: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} ; CHECK: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0 } @@ -54,6 +58,8 @@ ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} ; CHECK: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0 } @@ -69,8 +75,15 @@ ; CHECK-LABEL: f8sqrt: ; CHECK-NEXT: BB#0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] -; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] -; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] +; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] +; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK: fcmeq {{v[0-7]\.4s, v[0-1]\.4s}}, #0 +; CHECK: frsqrte [[RC:v[0-7]\.4s]] +; CHECK-NEXT: fmul [[RD:v[0-7]\.4s]], [[RC]], [[RC]] +; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RD]] +; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} ; CHECK: fcmeq {{v[0-7]\.4s, v[0-1]\.4s}}, #0 } @@ -87,7 +100,10 @@ ; CHECK-NEXT: frsqrte [[RA:d[0-7]]] ; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]] -; CHECK: fcmp d0, #0 +; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} +; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} +; CHECK-NOT: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} +; CHECK: fcmp {{d[0-7]}}, #0 } define <2 x double> @d2sqrt(<2 x double> %a) #0 { @@ -103,6 +119,9 @@ ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} ; CHECK: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0 } @@ -118,8 +137,17 @@ ; CHECK-LABEL: d4sqrt: ; CHECK-NEXT: BB#0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] -; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] -; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] +; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] +; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrte [[RC:v[0-7]\.2d]] +; CHECK-NEXT: fmul [[RD:v[0-7]\.2d]], [[RC]], [[RC]] +; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RD]] +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} ; CHECK: fcmeq {{v[0-7]\.2d, v[0-1]\.2d}}, #0 } @@ -137,6 +165,8 @@ ; CHECK-NEXT: frsqrte [[RA:s[0-7]]] ; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]] +; CHECK: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} +; CHECK-NOT: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} ; CHECK-NOT: fcmp {{s[0-7]}}, #0 } @@ -154,6 +184,8 @@ ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} +; CHECK-NOT: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} ; CHECK-NOT: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0 } @@ -171,6 +203,8 @@ ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} ; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0 } @@ -189,6 +223,10 @@ ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] ; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] ; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} +; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} ; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0 } @@ -206,6 +244,9 @@ ; CHECK-NEXT: frsqrte [[RA:d[0-7]]] ; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]] +; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} +; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} +; CHECK-NOT: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} ; CHECK-NOT: fcmp d0, #0 } @@ -223,6 +264,9 @@ ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} ; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0 } @@ -241,6 +285,12 @@ ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] ; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] ; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} +; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} ; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0 }