@@ -530,3 +530,173 @@ define float @fact_div6(float %x) {
530
530
; CHECK: fact_div6
531
531
; CHECK: %t3 = fsub fast float %t1, %t2
532
532
}
533
+
534
+ ; =========================================================================
535
+ ;
536
+ ; Test-cases for square root
537
+ ;
538
+ ; =========================================================================
539
+
540
+ ; A squared factor fed into a square root intrinsic should be hoisted out
541
+ ; as a fabs() value.
542
+ ; We have to rely on a function-level attribute to enable this optimization
543
+ ; because intrinsics don't currently have access to IR-level fast-math
544
+ ; flags. If that changes, we can relax the requirement on all of these
545
+ ; tests to just specify 'fast' on the sqrt.
546
+
547
+ attributes #0 = { "unsafe-fp-math" = "true" }
548
+
549
+ declare double @llvm.sqrt.f64 (double )
550
+
551
+ define double @sqrt_intrinsic_arg_squared (double %x ) #0 {
552
+ %mul = fmul fast double %x , %x
553
+ %sqrt = call double @llvm.sqrt.f64 (double %mul )
554
+ ret double %sqrt
555
+
556
+ ; CHECK-LABEL: sqrt_intrinsic_arg_squared(
557
+ ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
558
+ ; CHECK-NEXT: ret double %fabs
559
+ }
560
+
561
+ ; Check all 6 combinations of a 3-way multiplication tree where
562
+ ; one factor is repeated.
563
+
564
+ define double @sqrt_intrinsic_three_args1 (double %x , double %y ) #0 {
565
+ %mul = fmul fast double %y , %x
566
+ %mul2 = fmul fast double %mul , %x
567
+ %sqrt = call double @llvm.sqrt.f64 (double %mul2 )
568
+ ret double %sqrt
569
+
570
+ ; CHECK-LABEL: sqrt_intrinsic_three_args1(
571
+ ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
572
+ ; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
573
+ ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
574
+ ; CHECK-NEXT: ret double %1
575
+ }
576
+
577
+ define double @sqrt_intrinsic_three_args2 (double %x , double %y ) #0 {
578
+ %mul = fmul fast double %x , %y
579
+ %mul2 = fmul fast double %mul , %x
580
+ %sqrt = call double @llvm.sqrt.f64 (double %mul2 )
581
+ ret double %sqrt
582
+
583
+ ; CHECK-LABEL: sqrt_intrinsic_three_args2(
584
+ ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
585
+ ; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
586
+ ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
587
+ ; CHECK-NEXT: ret double %1
588
+ }
589
+
590
+ define double @sqrt_intrinsic_three_args3 (double %x , double %y ) #0 {
591
+ %mul = fmul fast double %x , %x
592
+ %mul2 = fmul fast double %mul , %y
593
+ %sqrt = call double @llvm.sqrt.f64 (double %mul2 )
594
+ ret double %sqrt
595
+
596
+ ; CHECK-LABEL: sqrt_intrinsic_three_args3(
597
+ ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
598
+ ; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
599
+ ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
600
+ ; CHECK-NEXT: ret double %1
601
+ }
602
+
603
+ define double @sqrt_intrinsic_three_args4 (double %x , double %y ) #0 {
604
+ %mul = fmul fast double %y , %x
605
+ %mul2 = fmul fast double %x , %mul
606
+ %sqrt = call double @llvm.sqrt.f64 (double %mul2 )
607
+ ret double %sqrt
608
+
609
+ ; CHECK-LABEL: sqrt_intrinsic_three_args4(
610
+ ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
611
+ ; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
612
+ ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
613
+ ; CHECK-NEXT: ret double %1
614
+ }
615
+
616
+ define double @sqrt_intrinsic_three_args5 (double %x , double %y ) #0 {
617
+ %mul = fmul fast double %x , %y
618
+ %mul2 = fmul fast double %x , %mul
619
+ %sqrt = call double @llvm.sqrt.f64 (double %mul2 )
620
+ ret double %sqrt
621
+
622
+ ; CHECK-LABEL: sqrt_intrinsic_three_args5(
623
+ ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
624
+ ; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
625
+ ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
626
+ ; CHECK-NEXT: ret double %1
627
+ }
628
+
629
+ define double @sqrt_intrinsic_three_args6 (double %x , double %y ) #0 {
630
+ %mul = fmul fast double %x , %x
631
+ %mul2 = fmul fast double %y , %mul
632
+ %sqrt = call double @llvm.sqrt.f64 (double %mul2 )
633
+ ret double %sqrt
634
+
635
+ ; CHECK-LABEL: sqrt_intrinsic_three_args6(
636
+ ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
637
+ ; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
638
+ ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
639
+ ; CHECK-NEXT: ret double %1
640
+ }
641
+
642
+ define double @sqrt_intrinsic_arg_4th (double %x ) #0 {
643
+ %mul = fmul fast double %x , %x
644
+ %mul2 = fmul fast double %mul , %mul
645
+ %sqrt = call double @llvm.sqrt.f64 (double %mul2 )
646
+ ret double %sqrt
647
+
648
+ ; CHECK-LABEL: sqrt_intrinsic_arg_4th(
649
+ ; CHECK-NEXT: %mul = fmul fast double %x, %x
650
+ ; CHECK-NEXT: ret double %mul
651
+ }
652
+
653
+ define double @sqrt_intrinsic_arg_5th (double %x ) #0 {
654
+ %mul = fmul fast double %x , %x
655
+ %mul2 = fmul fast double %mul , %x
656
+ %mul3 = fmul fast double %mul2 , %mul
657
+ %sqrt = call double @llvm.sqrt.f64 (double %mul3 )
658
+ ret double %sqrt
659
+
660
+ ; CHECK-LABEL: sqrt_intrinsic_arg_5th(
661
+ ; CHECK-NEXT: %mul = fmul fast double %x, %x
662
+ ; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %x)
663
+ ; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
664
+ ; CHECK-NEXT: ret double %1
665
+ }
666
+
667
+ ; Check that square root calls have the same behavior.
668
+
669
+ declare float @sqrtf (float )
670
+ declare double @sqrt (double )
671
+ declare fp128 @sqrtl (fp128 )
672
+
673
+ define float @sqrt_call_squared_f32 (float %x ) #0 {
674
+ %mul = fmul fast float %x , %x
675
+ %sqrt = call float @sqrtf (float %mul )
676
+ ret float %sqrt
677
+
678
+ ; CHECK-LABEL: sqrt_call_squared_f32(
679
+ ; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
680
+ ; CHECK-NEXT: ret float %fabs
681
+ }
682
+
683
+ define double @sqrt_call_squared_f64 (double %x ) #0 {
684
+ %mul = fmul fast double %x , %x
685
+ %sqrt = call double @sqrt (double %mul )
686
+ ret double %sqrt
687
+
688
+ ; CHECK-LABEL: sqrt_call_squared_f64(
689
+ ; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
690
+ ; CHECK-NEXT: ret double %fabs
691
+ }
692
+
693
+ define fp128 @sqrt_call_squared_f128 (fp128 %x ) #0 {
694
+ %mul = fmul fast fp128 %x , %x
695
+ %sqrt = call fp128 @sqrtl (fp128 %mul )
696
+ ret fp128 %sqrt
697
+
698
+ ; CHECK-LABEL: sqrt_call_squared_f128(
699
+ ; CHECK-NEXT: %fabs = call fp128 @llvm.fabs.f128(fp128 %x)
700
+ ; CHECK-NEXT: ret fp128 %fabs
701
+ }
702
+
0 commit comments