diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19453,6 +19453,23 @@ return true; } +static bool hasGluedInputChain(SDNode *N) { + auto LastOpNo = N->getNumOperands() - 1; + bool hasChain = N->getOperand(0).getValueType() == MVT::Other; + bool hasInputGlue = N->getOperand(LastOpNo).getValueType() == MVT::Glue; + return hasChain && hasInputGlue && + (N->getOperand(0).getNode() == N->getOperand(LastOpNo).getNode()); +} + +static bool hasGluedOutput(SDNode *N) { + if (N->getNumValues() <= 1) + return false; + auto LastValNo = N->getNumValues() - 1; + bool hasChainOut = N->getValueType(LastValNo - 1) == MVT::Other; + bool hasGlueOut = N->getValueType(LastValNo) == MVT::Glue; + return hasChainOut && hasGlueOut; +} + /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain, @@ -19469,12 +19486,106 @@ Chains.push_back(OriginalChain); unsigned Depth = 0; + // Attempt to improve chain by a single step + std::function ImproveChain = [&](SDValue &C) -> bool { + switch (C.getOpcode()) { + case ISD::EntryToken: + // No need to mark EntryToken. + C = SDValue(); + return true; + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for C. + auto LSChain = cast(C.getNode()); + bool IsOpLoad = isa(C.getNode()) && !LSChain->isVolatile(); + if ((IsLoad && IsOpLoad) || !isAlias(N, LSChain)) { + // Look further up the chain. + C = C.getOperand(0); + return true; + } + // Alias, so stop here. + return false; + } + + case ISD::CopyFromReg: + // If this is glued, recursively check chain to see if we can skip the + // full glued chain.. + if (hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + // Unglued node can always be skipped. + C = C.getOperand(0); + return true; + + case ISD::CopyToReg: { + if (hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + + // CopyToReg is used to introduce values into the DAG. Forwarding past + // these nodes appears to violate some assumtion in the DAG logic. As + // such, we avoid bypassing unglued CopyToReg nodes. + + // Succeed if we're at the top of a glued region. + if (hasGluedOutput(C.getNode())) { + C = C.getOperand(0); + return true; + } + return false; + } + + case ISD::INLINEASM: { + auto *ExtraInfo = + cast(C->getOperand(InlineAsm::Op_ExtraInfo)); + unsigned EIInt = ExtraInfo->getZExtValue(); + bool IsSafe = !(EIInt & InlineAsm::Extra_MayStore); + if (!IsLoad) + IsSafe = IsSafe && !(EIInt & (InlineAsm::Extra_MayLoad | + InlineAsm::Extra_HasSideEffects)); + if (IsSafe && hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + return false; + } + + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + // We can forward past any lifetime start/end that can be proven not to + // alias the memory access. + const auto *Lifetime = cast(C); + if (!Lifetime->hasOffset()) + return false; // Be conservative if we don't know the extents of the object. + + const BaseIndexOffset LifetimePtr(Lifetime->getOperand(1), SDValue(), + Lifetime->getOffset(), false); + bool IsAlias; + if (BaseIndexOffset::computeAliasing(LifetimePtr, Lifetime->getSize(), + LSBasePtr, LSNumBytes, DAG, + IsAlias) && !IsAlias) { + C = C.getOperand(0); + return true; + } + return false; + } + + default: + return false; + } + }; + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.pop_back_val(); + // Don't bother if we've seen Chain before. + if (!Visited.insert(Chain.getNode()).second) + continue; + // For TokenFactor nodes, look at each operand and only continue up the // chain until we reach the depth limit. // @@ -19487,83 +19598,30 @@ return; } - // Don't bother if we've been before. - if (!Visited.insert(Chain.getNode()).second) - continue; - - switch (Chain.getOpcode()) { - case ISD::EntryToken: - // Entry token is ideal chain operand, but handled in FindBetterChain. - break; - - case ISD::LOAD: - case ISD::STORE: { - // Get alias information for Chain. - bool IsOpLoad = isa(Chain.getNode()) && - !cast(Chain.getNode())->isVolatile(); - - // If chain is alias then stop here. - if (!(IsLoad && IsOpLoad) && - isAlias(N, cast(Chain.getNode()))) { - Aliases.push_back(Chain); - } else { - // Look further up the chain. - Chains.push_back(Chain.getOperand(0)); - ++Depth; - } - break; - } - - case ISD::TokenFactor: + if (Chain.getOpcode() == ISD::TokenFactor) { // We have to check each of the operands of the token factor for "small" // token factors, so we queue them up. Adding the operands to the queue // (stack) in reverse order maintains the original order and increases the // likelihood that getNode will find a matching token factor (CSE.) if (Chain.getNumOperands() > 16) { Aliases.push_back(Chain); - break; + continue; } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); ++Depth; - break; - - case ISD::CopyFromReg: - // Forward past CopyFromReg. - Chains.push_back(Chain.getOperand(0)); - ++Depth; - break; - - case ISD::LIFETIME_START: - case ISD::LIFETIME_END: { - // We can forward past any lifetime start/end that can be proven not to - // alias the memory access. - const auto *Lifetime = cast(Chain); - if (!Lifetime->hasOffset()) { - Aliases.push_back(Chain); - break; // Be conservative if we don't know the extents of the object. - } - - const BaseIndexOffset LifetimePtr(Lifetime->getOperand(1), SDValue(), - Lifetime->getOffset(), false); - bool IsAlias; - if (BaseIndexOffset::computeAliasing(LifetimePtr, Lifetime->getSize(), - LSBasePtr, LSNumBytes, DAG, - IsAlias) && - !IsAlias) { - Chains.push_back(Chain.getOperand(0)); - ++Depth; - } else { - Aliases.push_back(Chain); - } - break; + continue; } - - default: - // For all other instructions we will just have to take what we can get. - Aliases.push_back(Chain); - break; + // Everything else + if (ImproveChain(Chain)) { + // Updated Chain Found, Consider new chain if one exists. + if (Chain.getNode()) + Chains.push_back(Chain); + ++Depth; + continue; } + // No Improved Chain Possible, treat as Alias. + Aliases.push_back(Chain); } } diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll --- a/llvm/test/CodeGen/Thumb/frame-access.ll +++ b/llvm/test/CodeGen/Thumb/frame-access.ll @@ -280,9 +280,9 @@ ; CHECK-NEXT: add r2, sp, #12 ; CHECK-NEXT: bl h ; Load `x`, `y`, and `z` via SP -; CHECK: ldr r1, [sp, #20] -; CHECK-NEXT: ldr r2, [sp, #16] -; CHECK-NEXT: ldr r3, [sp, #12] +; CHECK-DAG: ldr r1, [sp, #20] +; CHECK-DAG: ldr r2, [sp, #16] +; CHECK-DAG: ldr r3, [sp, #12] ; CHECK: bl g ; Re-aligned stack, access via SP. @@ -324,9 +324,9 @@ ; CHECK-NEXT: add r2, sp, #20 ; CHECK-NEXT: bl h ; Load `x`, `y`, and `z` via SP for passing to `g` -; CHECK: ldr r1, [sp, #28] -; CHECK-NEXT: ldr r2, [sp, #24] -; CHECK-NEXT: ldr r3, [sp, #20] +; CHECK-DAG: ldr r1, [sp, #28] +; CHECK-DAG: ldr r2, [sp, #24] +; CHECK-DAG: ldr r3, [sp, #20] ; CHECK: bl g ; VLAs, access via BP. diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll --- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll @@ -209,17 +209,12 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: flds LCPI10_0 -; CHECK-NEXT: fmul %st, %st(1) -; CHECK-NEXT: flds LCPI10_1 -; CHECK-NEXT: fmul %st, %st(2) -; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fmuls LCPI10_0 +; CHECK-NEXT: fmuls LCPI10_1 +; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: fmulp %st, %st(1) -; CHECK-NEXT: fmulp %st, %st(1) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End diff --git a/llvm/test/CodeGen/X86/pr9517.ll b/llvm/test/CodeGen/X86/pr9517.ll --- a/llvm/test/CodeGen/X86/pr9517.ll +++ b/llvm/test/CodeGen/X86/pr9517.ll @@ -11,7 +11,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %eax ; CHECK-NEXT: incl %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq @@ -22,7 +21,7 @@ ret i16 %v } -; The asm call prevents the merging the loads here. +; The asm call prevents the merging the loads here. define i16 @unify_through_trival_asm_w_memory_clobber() { ; CHECK-LABEL: unify_through_trival_asm_w_memory_clobber: ; CHECK: # %bb.0: @@ -47,61 +46,37 @@ ; CHECK-NEXT: movzwl {{.*}}(%rip), %edx ; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $8, %al ; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP