@@ -69,39 +69,31 @@ define float @bazz() {
69
69
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4
70
70
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
71
71
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
72
- ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
73
- ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
74
- ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
75
- ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
76
- ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]]
77
- ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
78
- ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]]
79
- ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
80
- ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
81
- ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
82
- ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
83
- ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]]
84
- ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
85
- ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]]
72
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
73
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
74
+ ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
75
+ ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
76
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
77
+ ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
78
+ ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
86
79
; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
87
80
; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
88
81
; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
89
- ; CHECK-NEXT: [[TMP11:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4) to <2 x float>*), align 16
90
- ; CHECK-NEXT: [[TMP12:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4) to <2 x float>*), align 16
91
- ; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[TMP12]], [[TMP11]]
92
- ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP13]], i32 0
93
- ; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP14]], [[ADD7]]
94
- ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP13]], i32 1
95
- ; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP15]], [[ADD19]]
96
- ; CHECK-NEXT: [[TMP16:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6) to <2 x float>*), align 8
97
- ; CHECK-NEXT: [[TMP17:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6) to <2 x float>*), align 8
98
- ; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x float> [[TMP17]], [[TMP16]]
99
- ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP18]], i32 0
100
- ; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP19]], [[ADD19_1]]
101
- ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP18]], i32 1
102
- ; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP20]], [[ADD19_2]]
103
- ; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4
104
- ; CHECK-NEXT: ret float [[ADD19_3]]
82
+ ; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
83
+ ; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
84
+ ; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
85
+ ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
86
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]]
87
+ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
88
+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
89
+ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
90
+ ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
91
+ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
92
+ ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
93
+ ; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV6]]
94
+ ; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
95
+ ; CHECK-NEXT: store float [[BIN_EXTRA5]], float* @res, align 4
96
+ ; CHECK-NEXT: ret float [[BIN_EXTRA5]]
105
97
;
106
98
entry:
107
99
%0 = load i32 , i32* @n , align 4
@@ -555,102 +547,84 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
555
547
; CHECK-NEXT: entry:
556
548
; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
557
549
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float
558
- ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[X:%.*]], align 4
559
- ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], [[CONV]]
560
- ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
561
- ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
562
- ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[ADD]]
550
+ ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
563
551
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
564
- ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX_2]], align 4
565
- ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP2]], [[ADD_1]]
566
552
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
567
- ; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX_3]], align 4
568
- ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP3]], [[ADD_2]]
569
553
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
570
- ; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX_4]], align 4
571
- ; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP4]], [[ADD_3]]
572
554
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
573
- ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_5]], align 4
574
- ; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP5]], [[ADD_4]]
575
555
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
576
- ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_6]], align 4
577
- ; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP6]], [[ADD_5]]
578
556
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
579
- ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX_7]], align 4
580
- ; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float [[TMP7]], [[ADD_6]]
581
557
; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
582
- ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_8]], align 4
583
- ; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float [[TMP8]], [[ADD_7]]
584
558
; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
585
- ; CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX_9]], align 4
586
- ; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float [[TMP9]], [[ADD_8]]
587
559
; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
588
- ; CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX_10]], align 4
589
- ; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float [[TMP10]], [[ADD_9]]
590
560
; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
591
- ; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX_11]], align 4
592
- ; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float [[TMP11]], [[ADD_10]]
593
561
; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
594
- ; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX_12]], align 4
595
- ; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float [[TMP12]], [[ADD_11]]
596
562
; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
597
- ; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX_13]], align 4
598
- ; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float [[TMP13]], [[ADD_12]]
599
563
; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
600
- ; CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX_14]], align 4
601
- ; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float [[TMP14]], [[ADD_13]]
602
564
; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
603
- ; CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX_15]], align 4
604
- ; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float [[TMP15]], [[ADD_14]]
605
565
; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
606
- ; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX_16]], align 4
607
- ; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float [[TMP16]], [[ADD_15]]
608
566
; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
609
- ; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX_17]], align 4
610
- ; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float [[TMP17]], [[ADD_16]]
611
567
; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
612
- ; CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX_18]], align 4
613
- ; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float [[TMP18]], [[ADD_17]]
614
568
; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
615
- ; CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX_19]], align 4
616
- ; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float [[TMP19]], [[ADD_18]]
617
569
; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
618
- ; CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX_20]], align 4
619
- ; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float [[TMP20]], [[ADD_19]]
620
570
; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
621
- ; CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX_21]], align 4
622
- ; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float [[TMP21]], [[ADD_20]]
623
571
; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
624
- ; CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX_22]], align 4
625
- ; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float [[TMP22]], [[ADD_21]]
626
572
; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
627
- ; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX_23]], align 4
628
- ; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float [[TMP23]], [[ADD_22]]
629
573
; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
630
- ; CHECK-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX_24]], align 4
631
- ; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float [[TMP24]], [[ADD_23]]
632
574
; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
633
- ; CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX_25]], align 4
634
- ; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float [[TMP25]], [[ADD_24]]
635
575
; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
636
- ; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX_26]], align 4
637
- ; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float [[TMP26]], [[ADD_25]]
638
576
; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
639
- ; CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX_27]], align 4
640
- ; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float [[TMP27]], [[ADD_26]]
641
577
; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
642
- ; CHECK-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX_28]], align 4
643
- ; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float [[TMP28]], [[ADD_27]]
644
578
; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
645
- ; CHECK-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX_29]], align 4
646
- ; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float [[TMP29]], [[ADD_28]]
647
579
; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
648
- ; CHECK-NEXT: [[TMP30:%.*]] = load float, float* [[ARRAYIDX_30]], align 4
649
- ; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float [[TMP30]], [[ADD_29]]
650
580
; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
651
- ; CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX_31]], align 4
652
- ; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float [[TMP31]], [[ADD_30]]
653
- ; CHECK-NEXT: ret float [[ADD_31]]
581
+ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
582
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
583
+ ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
584
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
585
+ ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
586
+ ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
587
+ ; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
588
+ ; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
589
+ ; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
590
+ ; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
591
+ ; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
592
+ ; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
593
+ ; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
594
+ ; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
595
+ ; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
596
+ ; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
597
+ ; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
598
+ ; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
599
+ ; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
600
+ ; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
601
+ ; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
602
+ ; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
603
+ ; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
604
+ ; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
605
+ ; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
606
+ ; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
607
+ ; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
608
+ ; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
609
+ ; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
610
+ ; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
611
+ ; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
612
+ ; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
613
+ ; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
614
+ ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
615
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]]
616
+ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
617
+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]]
618
+ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
619
+ ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
620
+ ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
621
+ ; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
622
+ ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
623
+ ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
624
+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
625
+ ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
626
+ ; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
627
+ ; CHECK-NEXT: ret float [[BIN_EXTRA]]
654
628
;
655
629
entry:
656
630
%rem = srem i32 %a , %b
@@ -948,32 +922,35 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
948
922
; CHECK-NEXT: entry:
949
923
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
950
924
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
951
- ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[X:%.*]], align 4
952
925
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
953
- ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[TMP0]], [[ADD]]
954
- ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
955
- ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX3]], align 4
956
- ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[ADD1]]
957
- ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
926
+ ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
958
927
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
959
- ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4
960
- ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float [[TMP2]], [[ADD5]]
961
928
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
962
- ; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4
963
- ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[TMP3]], [[ADD4_1]]
964
929
; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
965
- ; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX3_3]], align 4
966
- ; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float [[TMP4]], [[ADD4_2]]
967
930
; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
968
- ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX3_4]], align 4
969
- ; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float [[TMP5]], [[ADD4_3]]
970
931
; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
971
- ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX3_5]], align 4
972
- ; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float [[TMP6]], [[ADD4_4]]
973
932
; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
974
- ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX3_6]], align 4
975
- ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float [[TMP7]], [[ADD4_5]]
976
- ; CHECK-NEXT: ret float [[ADD4_6]]
933
+ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
934
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
935
+ ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
936
+ ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
937
+ ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
938
+ ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
939
+ ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
940
+ ; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
941
+ ; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
942
+ ; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
943
+ ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
944
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
945
+ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
946
+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
947
+ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
948
+ ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
949
+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
950
+ ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
951
+ ; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV]]
952
+ ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
953
+ ; CHECK-NEXT: ret float [[BIN_EXTRA5]]
977
954
;
978
955
entry:
979
956
%mul = mul nsw i32 %b , %a
@@ -1011,34 +988,37 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
1011
988
; CHECK-NEXT: entry:
1012
989
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1013
990
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1014
- ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[X:%.*]], align 4
1015
991
; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
1016
992
; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00
1017
993
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]
1018
- ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[TMP0]], [[ADD]]
1019
- ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
1020
- ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX3]], align 4
1021
- ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[ADD1]]
994
+ ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1022
995
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1023
- ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4
1024
- ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float [[TMP2]], [[ADD4]]
1025
996
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1026
- ; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4
1027
- ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[TMP3]], [[ADD4_1]]
1028
997
; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1029
- ; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX3_3]], align 4
1030
- ; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float [[TMP4]], [[ADD4_2]]
1031
- ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
1032
998
; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1033
- ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX3_4]], align 4
1034
- ; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float [[TMP5]], [[ADD5]]
1035
999
; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1036
- ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX3_5]], align 4
1037
- ; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float [[TMP6]], [[ADD4_4]]
1038
1000
; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1039
- ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX3_6]], align 4
1040
- ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float [[TMP7]], [[ADD4_5]]
1041
- ; CHECK-NEXT: ret float [[ADD4_6]]
1001
+ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1002
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1003
+ ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1004
+ ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1005
+ ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
1006
+ ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1007
+ ; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1008
+ ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
1009
+ ; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
1010
+ ; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
1011
+ ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1012
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
1013
+ ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1014
+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1015
+ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1016
+ ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1017
+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
1018
+ ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1019
+ ; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV]]
1020
+ ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1021
+ ; CHECK-NEXT: ret float [[BIN_EXTRA5]]
1042
1022
;
1043
1023
entry:
1044
1024
%mul = mul nsw i32 %b , %a
0 commit comments