Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2225,6 +2225,7 @@ /// the {load; op; store} to modify transformation. static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, + unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain) { // Is the stored value result 0 of the operation? @@ -2237,7 +2238,7 @@ if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) return false; - SDValue Load = StoredVal->getOperand(0); + SDValue Load = StoredVal->getOperand(LoadOpNo); // Is the stored value a non-extending and non-indexed load? if (!ISD::isNormalLoad(Load.getNode())) return false; @@ -2366,26 +2367,39 @@ if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 && MemVT != MVT::i8) return false; + + bool IsCommutable = false; switch (Opc) { default: return false; case X86ISD::INC: case X86ISD::DEC: - case X86ISD::ADD: - case X86ISD::ADC: case X86ISD::SUB: case X86ISD::SBB: + break; + case X86ISD::ADD: + case X86ISD::ADC: case X86ISD::AND: case X86ISD::OR: case X86ISD::XOR: + IsCommutable = true; break; } + unsigned LoadOpNo = 0; LoadSDNode *LoadNode = nullptr; SDValue InputChain; - if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode, - InputChain)) - return false; + if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, + LoadNode, InputChain)) { + if (!IsCommutable) + return false; + + // This operation is commutable, try the other operand. + LoadOpNo = 1; + if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, + LoadNode, InputChain)) + return false; + } SDValue Base, Scale, Index, Disp, Segment; if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, @@ -2503,7 +2517,7 @@ }; unsigned NewOpc = SelectRegOpcode(Opc); - SDValue Operand = StoredVal->getOperand(1); + SDValue Operand = StoredVal->getOperand(1-LoadOpNo); // See if the operand is a constant that we can fold into an immediate // operand. Index: llvm/trunk/test/CodeGen/X86/addcarry.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/addcarry.ll +++ llvm/trunk/test/CodeGen/X86/addcarry.ll @@ -30,9 +30,8 @@ define void @add128_rmw2(i128 %a, i128* %b) nounwind { ; CHECK-LABEL: add128_rmw2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq (%rdx), %rdi +; CHECK-NEXT: addq %rdi, (%rdx) ; CHECK-NEXT: adcq %rsi, 8(%rdx) -; CHECK-NEXT: movq %rdi, (%rdx) ; CHECK-NEXT: retq entry: %0 = load i128, i128* %b @@ -77,13 +76,10 @@ define void @add256_rmw2(i256 %a, i256* %b) nounwind { ; CHECK-LABEL: add256_rmw2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq (%r8), %rdi -; CHECK-NEXT: adcq 8(%r8), %rsi -; CHECK-NEXT: adcq 16(%r8), %rdx +; CHECK-NEXT: addq %rdi, (%r8) +; CHECK-NEXT: adcq %rsi, 8(%r8) +; CHECK-NEXT: adcq %rdx, 16(%r8) ; CHECK-NEXT: adcq %rcx, 24(%r8) -; CHECK-NEXT: movq %rdi, (%r8) -; CHECK-NEXT: movq %rsi, 8(%r8) -; CHECK-NEXT: movq %rdx, 16(%r8) ; CHECK-NEXT: retq entry: %0 = load i256, i256* %b