# Changeset View

Changeset View

# Standalone View

Standalone View

# test/Transforms/LoopVectorize/AArch64/predication_costs.ll

Show All 10 Lines | |||||

11 | 11 | | |||

12 | ; CHECK-LABEL: predicated_udiv | 12 | ; CHECK-LABEL: predicated_udiv | ||

13 | ; | 13 | ; | ||

14 | ; This test checks that we correctly compute the cost of the predicated udiv | 14 | ; This test checks that we correctly compute the cost of the predicated udiv | ||

15 | ; instruction. If we assume the block probability is 50%, we compute the cost | 15 | ; instruction. If we assume the block probability is 50%, we compute the cost | ||

16 | ; as: | 16 | ; as: | ||

17 | ; | 17 | ; | ||

18 | ; Cost of udiv: | 18 | ; Cost of udiv: | ||

19 | ; (udiv(2) + extractelement(6) + insertelement(3)) / 2 = 5 | 19 | ; (udiv(2) + extractelement(12) + insertelement(6)) / 2 = 10 | ||

20 | ; | 20 | ; | ||

21 | ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 | 21 | ; CHECK: Found an estimated cost of 10 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 | ||

22 | ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3 | 22 | ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3 | ||

23 | ; | 23 | ; | ||

24 | define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) { | 24 | define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) { | ||

25 | entry: | 25 | entry: | ||

26 | br label %for.body | 26 | br label %for.body | ||

27 | 27 | | |||

28 | for.body: | 28 | for.body: | ||

29 | %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] | 29 | %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] | ||

Show All 22 Lines | |||||

52 | 52 | | |||

53 | ; CHECK-LABEL: predicated_store | 53 | ; CHECK-LABEL: predicated_store | ||

54 | ; | 54 | ; | ||

55 | ; This test checks that we correctly compute the cost of the predicated store | 55 | ; This test checks that we correctly compute the cost of the predicated store | ||

56 | ; instruction. If we assume the block probability is 50%, we compute the cost | 56 | ; instruction. If we assume the block probability is 50%, we compute the cost | ||

57 | ; as: | 57 | ; as: | ||

58 | ; | 58 | ; | ||

59 | ; Cost of store: | 59 | ; Cost of store: | ||

60 | ; (store(4) + extractelement(3)) / 2 = 3 | 60 | ; (store(4) + extractelement(6)) / 2 = 5 | ||

61 | ; | 61 | ; | ||

62 | ; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4 | 62 | ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4 | ||

63 | ; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4 | 63 | ; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4 | ||

64 | ; | 64 | ; | ||

65 | define void @predicated_store(i32* %a, i1 %c, i32 %x, i64 %n) { | 65 | define void @predicated_store(i32* %a, i1 %c, i32 %x, i64 %n) { | ||

66 | entry: | 66 | entry: | ||

67 | br label %for.body | 67 | br label %for.body | ||

68 | 68 | | |||

69 | for.body: | 69 | for.body: | ||

70 | %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] | 70 | %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] | ||

Show All 18 Lines | |||||

89 | ; CHECK-LABEL: predicated_udiv_scalarized_operand | 89 | ; CHECK-LABEL: predicated_udiv_scalarized_operand | ||

90 | ; | 90 | ; | ||

91 | ; This test checks that we correctly compute the cost of the predicated udiv | 91 | ; This test checks that we correctly compute the cost of the predicated udiv | ||

92 | ; instruction and the add instruction it uses. The add is scalarized and sunk | 92 | ; instruction and the add instruction it uses. The add is scalarized and sunk | ||

93 | ; inside the predicated block. If we assume the block probability is 50%, we | 93 | ; inside the predicated block. If we assume the block probability is 50%, we | ||

94 | ; compute the cost as: | 94 | ; compute the cost as: | ||

95 | ; | 95 | ; | ||

96 | ; Cost of add: | 96 | ; Cost of add: | ||

97 | ; (add(2) + extractelement(3)) / 2 = 2 | 97 | ; (add(2) + extractelement(6)) / 2 = 4 | ||

98 | ; Cost of udiv: | 98 | ; Cost of udiv: | ||

99 | ; (udiv(2) + extractelement(3) + insertelement(3)) / 2 = 4 | 99 | ; (udiv(2) + extractelement(6) + insertelement(6)) / 2 = 7 | ||

100 | ; | 100 | ; | ||

101 | ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp3 = add nsw i32 %tmp2, %x | 101 | ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp3 = add nsw i32 %tmp2, %x | ||

102 | ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 | 102 | ; CHECK: Found an estimated cost of 7 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 | ||

103 | ; CHECK: Scalarizing: %tmp3 = add nsw i32 %tmp2, %x | 103 | ; CHECK: Scalarizing: %tmp3 = add nsw i32 %tmp2, %x | ||

104 | ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3 | 104 | ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3 | ||

105 | ; | 105 | ; | ||

106 | define i32 @predicated_udiv_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) { | 106 | define i32 @predicated_udiv_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) { | ||

107 | entry: | 107 | entry: | ||

108 | br label %for.body | 108 | br label %for.body | ||

109 | 109 | | |||

110 | for.body: | 110 | for.body: | ||

Show All 23 Lines | |||||

134 | ; CHECK-LABEL: predicated_store_scalarized_operand | 134 | ; CHECK-LABEL: predicated_store_scalarized_operand | ||

135 | ; | 135 | ; | ||

136 | ; This test checks that we correctly compute the cost of the predicated store | 136 | ; This test checks that we correctly compute the cost of the predicated store | ||

137 | ; instruction and the add instruction it uses. The add is scalarized and sunk | 137 | ; instruction and the add instruction it uses. The add is scalarized and sunk | ||

138 | ; inside the predicated block. If we assume the block probability is 50%, we | 138 | ; inside the predicated block. If we assume the block probability is 50%, we | ||

139 | ; compute the cost as: | 139 | ; compute the cost as: | ||

140 | ; | 140 | ; | ||

141 | ; Cost of add: | 141 | ; Cost of add: | ||

142 | ; (add(2) + extractelement(3)) / 2 = 2 | 142 | ; (add(2) + extractelement(6)) / 2 = 4 | ||

143 | ; Cost of store: | 143 | ; Cost of store: | ||

144 | ; store(4) / 2 = 2 | 144 | ; store(4) / 2 = 2 | ||

145 | ; | 145 | ; | ||

146 | ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = add nsw i32 %tmp1, %x | 146 | ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp2 = add nsw i32 %tmp1, %x | ||

147 | ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4 | 147 | ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4 | ||

148 | ; CHECK: Scalarizing: %tmp2 = add nsw i32 %tmp1, %x | 148 | ; CHECK: Scalarizing: %tmp2 = add nsw i32 %tmp1, %x | ||

149 | ; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4 | 149 | ; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4 | ||

150 | ; | 150 | ; | ||

151 | define void @predicated_store_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) { | 151 | define void @predicated_store_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) { | ||

152 | entry: | 152 | entry: | ||

153 | br label %for.body | 153 | br label %for.body | ||

154 | 154 | | |||

Show All 24 Lines | |||||

179 | ; and predicated. The sub feeding the store is scalarized and sunk inside the | 179 | ; and predicated. The sub feeding the store is scalarized and sunk inside the | ||

180 | ; store's predicated block. However, the add feeding the sdiv and udiv cannot | 180 | ; store's predicated block. However, the add feeding the sdiv and udiv cannot | ||

181 | ; be sunk and is not scalarized. If we assume the block probability is 50%, we | 181 | ; be sunk and is not scalarized. If we assume the block probability is 50%, we | ||

182 | ; compute the cost as: | 182 | ; compute the cost as: | ||

183 | ; | 183 | ; | ||

184 | ; Cost of add: | 184 | ; Cost of add: | ||

185 | ; add(1) = 1 | 185 | ; add(1) = 1 | ||

186 | ; Cost of sdiv: | 186 | ; Cost of sdiv: | ||

187 | ; (sdiv(2) + extractelement(6) + insertelement(3)) / 2 = 5 | 187 | ; (sdiv(2) + extractelement(12) + insertelement(6)) / 2 = 10 | ||

188 | ; Cost of udiv: | 188 | ; Cost of udiv: | ||

189 | ; (udiv(2) + extractelement(6) + insertelement(3)) / 2 = 5 | 189 | ; (udiv(2) + extractelement(12) + insertelement(6)) / 2 = 10 | ||

190 | ; Cost of sub: | 190 | ; Cost of sub: | ||

191 | ; (sub(2) + extractelement(3)) / 2 = 2 | 191 | ; (sub(2) + extractelement(6)) / 2 = 4 | ||

192 | ; Cost of store: | 192 | ; Cost of store: | ||

193 | ; store(4) / 2 = 2 | 193 | ; store(4) / 2 = 2 | ||

194 | ; | 194 | ; | ||

195 | ; CHECK: Found an estimated cost of 1 for VF 2 For instruction: %tmp2 = add i32 %tmp1, %x | 195 | ; CHECK: Found an estimated cost of 1 for VF 2 For instruction: %tmp2 = add i32 %tmp1, %x | ||

196 | ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp3 = sdiv i32 %tmp1, %tmp2 | 196 | ; CHECK: Found an estimated cost of 10 for VF 2 For instruction: %tmp3 = sdiv i32 %tmp1, %tmp2 | ||

197 | ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp3, %tmp2 | 197 | ; CHECK: Found an estimated cost of 10 for VF 2 For instruction: %tmp4 = udiv i32 %tmp3, %tmp2 | ||

198 | ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp5 = sub i32 %tmp4, %x | 198 | ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp5 = sub i32 %tmp4, %x | ||

199 | ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, i32* %tmp0, align 4 | 199 | ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, i32* %tmp0, align 4 | ||

200 | ; CHECK-NOT: Scalarizing: %tmp2 = add i32 %tmp1, %x | 200 | ; CHECK-NOT: Scalarizing: %tmp2 = add i32 %tmp1, %x | ||

201 | ; CHECK: Scalarizing and predicating: %tmp3 = sdiv i32 %tmp1, %tmp2 | 201 | ; CHECK: Scalarizing and predicating: %tmp3 = sdiv i32 %tmp1, %tmp2 | ||

202 | ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp3, %tmp2 | 202 | ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp3, %tmp2 | ||

203 | ; CHECK: Scalarizing: %tmp5 = sub i32 %tmp4, %x | 203 | ; CHECK: Scalarizing: %tmp5 = sub i32 %tmp4, %x | ||

204 | ; CHECK: Scalarizing and predicating: store i32 %tmp5, i32* %tmp0, align 4 | 204 | ; CHECK: Scalarizing and predicating: store i32 %tmp5, i32* %tmp0, align 4 | ||

205 | ; | 205 | ; | ||

206 | define void @predication_multi_context(i32* %a, i1 %c, i32 %x, i64 %n) { | 206 | define void @predication_multi_context(i32* %a, i1 %c, i32 %x, i64 %n) { | ||

Show All 25 Lines |