diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19375,26 +19375,106 @@ return true; } -/// Walk up chain skipping non-aliasing memory nodes, -/// looking for aliasing nodes and adding them to the Aliases vector. +static bool hasGluedInputChain(SDNode *N) { + auto LastOpNo = N->getNumOperands() - 1; + bool hasChain = N->getOperand(0).getValueType() == MVT::Other; + bool hasInputGlue = N->getOperand(LastOpNo).getValueType() == MVT::Glue; + return hasChain && hasInputGlue && + (N->getOperand(0).getNode() == N->getOperand(LastOpNo).getNode()); +} + +static bool hasGluedOutput(SDNode *N) { + if (N->getNumValues() <= 1) + return false; + auto LastValNo = N->getNumValues() - 1; + bool hasChainOut = N->getValueType(LastValNo - 1) == MVT::Other; + bool hasGlueOut = N->getValueType(LastValNo) == MVT::Glue; + return hasChainOut && hasGlueOut; +} + void DAGCombiner::GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases) { SmallVector Chains; // List of chains to visit. SmallPtrSet Visited; // Visited node set. // Get alias information for node. - bool IsLoad = isa(N) && !N->isVolatile(); + const bool IsLoad = isa(N) && !N->isVolatile(); // Starting off. Chains.push_back(OriginalChain); unsigned Depth = 0; + // Attempt to improve chain by a single step + std::function ImproveChain = [&](SDValue &C) -> bool { + switch (C.getOpcode()) { + case ISD::EntryToken: + // No need to mark EntryToken. + C = SDValue(); + return true; + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for C. + auto LSChain = cast(C.getNode()); + bool IsOpLoad = isa(C.getNode()) && !LSChain->isVolatile(); + if ((IsLoad && IsOpLoad) || !isAlias(N, LSChain)) { + // Look further up the chain. + C = C.getOperand(0); + return true; + } + // Alias, so stop here. + return false; + } + + case ISD::CopyFromReg: { + // If this is glued, recursively check chain to see if we can skip the + // full glued chain.. + if (hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + // Unglued node can always be skipped. + C = C.getOperand(0); + return true; + } + + case ISD::CopyToReg: { + if (hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + // If this had a glued output, return C so recursive calls of this + // succeed. + return hasGluedOutput(C.getNode()); + } + + case ISD::INLINEASM: { + auto *ExtraInfo = + cast(C->getOperand(InlineAsm::Op_ExtraInfo)); + unsigned EIInt = ExtraInfo->getZExtValue(); + bool IsSafe = !(EIInt & InlineAsm::Extra_MayStore); + if (!IsLoad) + IsSafe = IsSafe && (EIInt & (InlineAsm::Extra_MayLoad)); + if (IsSafe && hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + return false; + } + default: + return false; + } + }; + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.pop_back_val(); + // Don't bother if we've seen Chain before. + if (!Visited.insert(Chain.getNode()).second) + continue; + // For TokenFactor nodes, look at each operand and only continue up the // chain until we reach the depth limit. // @@ -19407,58 +19487,30 @@ return; } - // Don't bother if we've been before. - if (!Visited.insert(Chain.getNode()).second) - continue; - - switch (Chain.getOpcode()) { - case ISD::EntryToken: - // Entry token is ideal chain operand, but handled in FindBetterChain. - break; - - case ISD::LOAD: - case ISD::STORE: { - // Get alias information for Chain. - bool IsOpLoad = isa(Chain.getNode()) && - !cast(Chain.getNode())->isVolatile(); - - // If chain is alias then stop here. - if (!(IsLoad && IsOpLoad) && - isAlias(N, cast(Chain.getNode()))) { - Aliases.push_back(Chain); - } else { - // Look further up the chain. - Chains.push_back(Chain.getOperand(0)); - ++Depth; - } - break; - } - - case ISD::TokenFactor: + if (Chain.getOpcode() == ISD::TokenFactor) { // We have to check each of the operands of the token factor for "small" // token factors, so we queue them up. Adding the operands to the queue // (stack) in reverse order maintains the original order and increases the // likelihood that getNode will find a matching token factor (CSE.) if (Chain.getNumOperands() > 16) { Aliases.push_back(Chain); - break; + continue; } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); ++Depth; - break; - - case ISD::CopyFromReg: - // Forward past CopyFromReg. - Chains.push_back(Chain.getOperand(0)); + continue; + } + // Everything else + if (ImproveChain(Chain)) { + // Updated Chain Found, Consider new chain if one exists. + if (Chain.getNode()) + Chains.push_back(Chain); ++Depth; - break; - - default: - // For all other instructions we will just have to take what we can get. - Aliases.push_back(Chain); - break; + continue; } + // No Improved Chain Possible, treat as Alias. + Aliases.push_back(Chain); } } diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll --- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll +++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll @@ -122,12 +122,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -237,12 +237,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -359,12 +359,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 @@ -474,12 +474,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -589,12 +589,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -711,12 +711,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 @@ -825,12 +825,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -939,12 +939,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -1060,12 +1060,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll --- a/llvm/test/CodeGen/Thumb/frame-access.ll +++ b/llvm/test/CodeGen/Thumb/frame-access.ll @@ -280,9 +280,9 @@ ; CHECK-NEXT: add r2, sp, #12 ; CHECK-NEXT: bl h ; Load `x`, `y`, and `z` via SP -; CHECK: ldr r1, [sp, #20] -; CHECK-NEXT: ldr r2, [sp, #16] -; CHECK-NEXT: ldr r3, [sp, #12] +; CHECK-DAG: ldr r1, [sp, #20] +; CHECK-DAG: ldr r2, [sp, #16] +; CHECK-DAG: ldr r3, [sp, #12] ; CHECK: bl g ; Re-aligned stack, access via SP. @@ -324,9 +324,9 @@ ; CHECK-NEXT: add r2, sp, #20 ; CHECK-NEXT: bl h ; Load `x`, `y`, and `z` via SP for passing to `g` -; CHECK: ldr r1, [sp, #28] -; CHECK-NEXT: ldr r2, [sp, #24] -; CHECK-NEXT: ldr r3, [sp, #20] +; CHECK-DAG: ldr r1, [sp, #28] +; CHECK-DAG: ldr r2, [sp, #24] +; CHECK-DAG: ldr r3, [sp, #20] ; CHECK: bl g ; VLAs, access via BP. diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll --- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll @@ -209,17 +209,12 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: flds LCPI10_0 -; CHECK-NEXT: fmul %st, %st(1) -; CHECK-NEXT: flds LCPI10_1 -; CHECK-NEXT: fmul %st, %st(2) -; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fmuls LCPI10_0 +; CHECK-NEXT: fmuls LCPI10_1 +; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: fmulp %st, %st(1) -; CHECK-NEXT: fmulp %st, %st(1) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End diff --git a/llvm/test/CodeGen/X86/pr9517.ll b/llvm/test/CodeGen/X86/pr9517.ll --- a/llvm/test/CodeGen/X86/pr9517.ll +++ b/llvm/test/CodeGen/X86/pr9517.ll @@ -11,7 +11,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %eax ; CHECK-NEXT: incl %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq @@ -22,7 +21,7 @@ ret i16 %v } -; The asm call prevents the merging the loads here. +; The asm call prevents the merging the loads here. define i16 @unify_through_trival_asm_w_memory_clobber() { ; CHECK-LABEL: unify_through_trival_asm_w_memory_clobber: ; CHECK: # %bb.0: @@ -47,61 +46,37 @@ ; CHECK-NEXT: movzwl {{.*}}(%rip), %edx ; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $8, %al ; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP