Skip to content

Commit bddb8c3

Browse files
committedJul 3, 2019
[DAGCombine] More diamong carry pattern optimization.
Summary: This diff improve the capability of DAGCOmbine to generate linear carries propagation in presence of a diamond pattern. It is now able to match a large variety of different patterns rather than some hardcoded one. Arguably, the codegen in test cases is not better, but this is to be expected. The goal of this transformation is more about canonicalisation than actual optimisation. Reviewers: hfinkel, RKSimon, craig.topper Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D57302 llvm-svn: 365051
1 parent 783dbe4 commit bddb8c3

File tree

3 files changed

+117
-44
lines changed

3 files changed

+117
-44
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+92-27
Original file line numberDiff line numberDiff line change
@@ -2875,6 +2875,93 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
28752875
return SDValue();
28762876
}
28772877

2878+
/**
2879+
* If we are facing some sort of diamond carry propapagtion pattern try to
2880+
* break it up to generate something like:
2881+
* (addcarry X, 0, (addcarry A, B, Z):Carry)
2882+
*
2883+
* The end result is usually an increase in operation required, but because the
2884+
* carry is now linearized, other tranforms can kick in and optimize the DAG.
2885+
*
2886+
* Patterns typically look something like
2887+
* (uaddo A, B)
2888+
* / \
2889+
* Carry Sum
2890+
* | \
2891+
* | (addcarry *, 0, Z)
2892+
* | /
2893+
* \ Carry
2894+
* | /
2895+
* (addcarry X, *, *)
2896+
*
2897+
* But numerous variation exist. Our goal is to identify A, B, X and Z and
2898+
* produce a combine with a single path for carry propagation.
2899+
*/
2900+
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2901+
SDValue X, SDValue Carry0, SDValue Carry1,
2902+
SDNode *N) {
2903+
if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2904+
return SDValue();
2905+
if (Carry1.getOpcode() != ISD::UADDO)
2906+
return SDValue();
2907+
2908+
SDValue Z;
2909+
2910+
/**
2911+
* First look for a suitable Z. It will present itself in the form of
2912+
* (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2913+
*/
2914+
if (Carry0.getOpcode() == ISD::ADDCARRY &&
2915+
isNullConstant(Carry0.getOperand(1))) {
2916+
Z = Carry0.getOperand(2);
2917+
} else if (Carry0.getOpcode() == ISD::UADDO &&
2918+
isOneConstant(Carry0.getOperand(1))) {
2919+
EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2920+
Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2921+
} else {
2922+
// We couldn't find a suitable Z.
2923+
return SDValue();
2924+
}
2925+
2926+
2927+
auto cancelDiamond = [&](SDValue A,SDValue B) {
2928+
SDLoc DL(N);
2929+
SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2930+
Combiner.AddToWorklist(NewY.getNode());
2931+
return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2932+
DAG.getConstant(0, DL, X.getValueType()),
2933+
NewY.getValue(1));
2934+
};
2935+
2936+
/**
2937+
* (uaddo A, B)
2938+
* |
2939+
* Sum
2940+
* |
2941+
* (addcarry *, 0, Z)
2942+
*/
2943+
if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2944+
return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2945+
}
2946+
2947+
/**
2948+
* (addcarry A, 0, Z)
2949+
* |
2950+
* Sum
2951+
* |
2952+
* (uaddo *, B)
2953+
*/
2954+
if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2955+
return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2956+
}
2957+
2958+
if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2959+
return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2960+
}
2961+
2962+
return SDValue();
2963+
}
2964+
28782965
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
28792966
SDNode *N) {
28802967
// Iff the flag result is dead:
@@ -2889,35 +2976,13 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
28892976
* When one of the addcarry argument is itself a carry, we may be facing
28902977
* a diamond carry propagation. In which case we try to transform the DAG
28912978
* to ensure linear carry propagation if that is possible.
2892-
*
2893-
* We are trying to get:
2894-
* (addcarry X, 0, (addcarry A, B, Z):Carry)
28952979
*/
28962980
if (auto Y = getAsCarry(TLI, N1)) {
2897-
/**
2898-
* (uaddo A, B)
2899-
* / \
2900-
* Carry Sum
2901-
* | \
2902-
* | (addcarry *, 0, Z)
2903-
* | /
2904-
* \ Carry
2905-
* | /
2906-
* (addcarry X, *, *)
2907-
*/
2908-
if (Y.getOpcode() == ISD::UADDO &&
2909-
CarryIn.getResNo() == 1 &&
2910-
CarryIn.getOpcode() == ISD::ADDCARRY &&
2911-
isNullConstant(CarryIn.getOperand(1)) &&
2912-
CarryIn.getOperand(0) == Y.getValue(0)) {
2913-
auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2914-
Y.getOperand(0), Y.getOperand(1),
2915-
CarryIn.getOperand(2));
2916-
AddToWorklist(NewY.getNode());
2917-
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2918-
DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2919-
NewY.getValue(1));
2920-
}
2981+
// Because both are carries, Y and Z can be swapped.
2982+
if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2983+
return R;
2984+
if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
2985+
return R;
29212986
}
29222987

29232988
return SDValue();

‎llvm/test/CodeGen/X86/addcarry.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -390,13 +390,13 @@ define i128 @addcarry1_not(i128 %n) {
390390
define i128 @addcarry_to_subcarry(i64 %a, i64 %b) {
391391
; CHECK-LABEL: addcarry_to_subcarry:
392392
; CHECK: # %bb.0:
393+
; CHECK-NEXT: movq %rdi, %rax
393394
; CHECK-NEXT: notq %rsi
394-
; CHECK-NEXT: xorl %eax, %eax
395+
; CHECK-NEXT: movb $1, %cl
396+
; CHECK-NEXT: addb $-1, %cl
395397
; CHECK-NEXT: movq %rdi, %rcx
396-
; CHECK-NEXT: addq %rsi, %rcx
397-
; CHECK-NEXT: setb %al
398-
; CHECK-NEXT: addq $1, %rcx
399-
; CHECK-NEXT: adcq %rdi, %rax
398+
; CHECK-NEXT: adcq %rsi, %rcx
399+
; CHECK-NEXT: adcq $0, %rax
400400
; CHECK-NEXT: setb %cl
401401
; CHECK-NEXT: movzbl %cl, %edx
402402
; CHECK-NEXT: addq %rsi, %rax

‎llvm/test/CodeGen/X86/subcarry.ll

+20-12
Original file line numberDiff line numberDiff line change
@@ -90,29 +90,37 @@ entry:
9090
define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr {
9191
; CHECK-LABEL: sub:
9292
; CHECK: # %bb.0: # %entry
93+
; CHECK-NEXT: pushq %rbx
94+
; CHECK-NEXT: .cfi_def_cfa_offset 16
95+
; CHECK-NEXT: .cfi_offset %rbx, -16
9396
; CHECK-NEXT: movq %rdi, %rax
97+
; CHECK-NEXT: movq (%rsi), %r10
98+
; CHECK-NEXT: movq 8(%rsi), %rdi
99+
; CHECK-NEXT: movq %r10, %r11
100+
; CHECK-NEXT: subq %rdx, %r11
94101
; CHECK-NEXT: notq %rdx
95-
; CHECK-NEXT: xorl %edi, %edi
96-
; CHECK-NEXT: addq (%rsi), %rdx
97-
; CHECK-NEXT: setb %dil
98-
; CHECK-NEXT: addq $1, %rdx
99-
; CHECK-NEXT: adcq 8(%rsi), %rdi
100-
; CHECK-NEXT: setb %r10b
101-
; CHECK-NEXT: movzbl %r10b, %r10d
102+
; CHECK-NEXT: movb $1, %bl
103+
; CHECK-NEXT: addb $-1, %bl
104+
; CHECK-NEXT: adcq %r10, %rdx
105+
; CHECK-NEXT: adcq $0, %rdi
106+
; CHECK-NEXT: setb %dl
107+
; CHECK-NEXT: movzbl %dl, %edx
102108
; CHECK-NEXT: notq %rcx
103109
; CHECK-NEXT: addq %rdi, %rcx
104-
; CHECK-NEXT: adcq 16(%rsi), %r10
105-
; CHECK-NEXT: setb %dil
106-
; CHECK-NEXT: movzbl %dil, %edi
110+
; CHECK-NEXT: adcq 16(%rsi), %rdx
111+
; CHECK-NEXT: setb %bl
112+
; CHECK-NEXT: movzbl %bl, %edi
107113
; CHECK-NEXT: notq %r8
108-
; CHECK-NEXT: addq %r10, %r8
114+
; CHECK-NEXT: addq %rdx, %r8
109115
; CHECK-NEXT: adcq 24(%rsi), %rdi
110116
; CHECK-NEXT: notq %r9
111117
; CHECK-NEXT: addq %rdi, %r9
112-
; CHECK-NEXT: movq %rdx, (%rax)
118+
; CHECK-NEXT: movq %r11, (%rax)
113119
; CHECK-NEXT: movq %rcx, 8(%rax)
114120
; CHECK-NEXT: movq %r8, 16(%rax)
115121
; CHECK-NEXT: movq %r9, 24(%rax)
122+
; CHECK-NEXT: popq %rbx
123+
; CHECK-NEXT: .cfi_def_cfa_offset 8
116124
; CHECK-NEXT: retq
117125
entry:
118126
%0 = extractvalue %S %arg.b, 0

0 commit comments

Comments
 (0)