@@ -338,6 +338,7 @@ namespace {
338
338
unsigned HiOp);
339
339
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
340
340
SDValue CombineExtLoad(SDNode *N);
341
+ SDValue combineRepeatedFPDivisors(SDNode *N);
341
342
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
342
343
SDValue BuildSDIV(SDNode *N);
343
344
SDValue BuildSDIVPow2(SDNode *N);
@@ -8235,6 +8236,60 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
8235
8236
return SDValue();
8236
8237
}
8237
8238
8239
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
8240
+ // reciprocal.
8241
+ // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
8242
+ // Notice that this is not always beneficial. One reason is different target
8243
+ // may have different costs for FDIV and FMUL, so sometimes the cost of two
8244
+ // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
8245
+ // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
8246
+ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
8247
+ if (!DAG.getTarget().Options.UnsafeFPMath)
8248
+ return SDValue();
8249
+
8250
+ SDValue N0 = N->getOperand(0);
8251
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8252
+
8253
+ // Skip if current node is a reciprocal.
8254
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
8255
+ return SDValue();
8256
+
8257
+ SDValue N1 = N->getOperand(1);
8258
+ SmallVector<SDNode *, 4> Users;
8259
+
8260
+ // Find all FDIV users of the same divisor.
8261
+ for (auto *U : N1->uses()) {
8262
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
8263
+ Users.push_back(U);
8264
+ }
8265
+
8266
+ if (!TLI.combineRepeatedFPDivisors(Users.size()))
8267
+ return SDValue();
8268
+
8269
+ EVT VT = N->getValueType(0);
8270
+ SDLoc DL(N);
8271
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
8272
+ // FIXME: This optimization requires some level of fast-math, so the
8273
+ // created reciprocal node should at least have the 'allowReciprocal'
8274
+ // fast-math-flag set.
8275
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
8276
+
8277
+ // Dividend / Divisor -> Dividend * Reciprocal
8278
+ for (auto *U : Users) {
8279
+ SDValue Dividend = U->getOperand(0);
8280
+ if (Dividend != FPOne) {
8281
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
8282
+ Reciprocal);
8283
+ CombineTo(U, NewNode);
8284
+ } else if (U != Reciprocal.getNode()) {
8285
+ // In the absence of fast-math-flags, this user node is always the
8286
+ // same node as Reciprocal, but with FMF they may be different nodes.
8287
+ CombineTo(U, Reciprocal);
8288
+ }
8289
+ }
8290
+ return SDValue(N, 0); // N was replaced.
8291
+ }
8292
+
8238
8293
SDValue DAGCombiner::visitFDIV(SDNode *N) {
8239
8294
SDValue N0 = N->getOperand(0);
8240
8295
SDValue N1 = N->getOperand(1);
@@ -8335,48 +8390,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
8335
8390
}
8336
8391
}
8337
8392
8338
- // Combine multiple FDIVs with the same divisor into multiple FMULs by the
8339
- // reciprocal.
8340
- // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
8341
- // Notice that this is not always beneficial. One reason is different target
8342
- // may have different costs for FDIV and FMUL, so sometimes the cost of two
8343
- // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
8344
- // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
8345
- if (Options.UnsafeFPMath) {
8346
- // Skip if current node is a reciprocal.
8347
- if (N0CFP && N0CFP->isExactlyValue(1.0))
8348
- return SDValue();
8349
-
8350
- SmallVector<SDNode *, 4> Users;
8351
- // Find all FDIV users of the same divisor.
8352
- for (auto *U : N1->uses()) {
8353
- if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
8354
- Users.push_back(U);
8355
- }
8356
-
8357
- if (TLI.combineRepeatedFPDivisors(Users.size())) {
8358
- SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
8359
- // FIXME: This optimization requires some level of fast-math, so the
8360
- // created reciprocal node should at least have the 'allowReciprocal'
8361
- // fast-math-flag set.
8362
- SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
8363
-
8364
- // Dividend / Divisor -> Dividend * Reciprocal
8365
- for (auto *U : Users) {
8366
- SDValue Dividend = U->getOperand(0);
8367
- if (Dividend != FPOne) {
8368
- SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
8369
- Reciprocal);
8370
- CombineTo(U, NewNode);
8371
- } else if (U != Reciprocal.getNode()) {
8372
- // In the absence of fast-math-flags, this user node is always the
8373
- // same node as Reciprocal, but with FMF they may be different nodes.
8374
- CombineTo(U, Reciprocal);
8375
- }
8376
- }
8377
- return SDValue(N, 0); // N was replaced.
8378
- }
8379
- }
8393
+ if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
8394
+ return CombineRepeatedDivisors;
8380
8395
8381
8396
return SDValue();
8382
8397
}
0 commit comments