diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19375,6 +19375,23 @@ return true; } +static bool hasGluedInputChain(SDNode *N) { + auto LastOpNo = N->getNumOperands() - 1; + bool hasChain = N->getOperand(0).getValueType() == MVT::Other; + bool hasInputGlue = N->getOperand(LastOpNo).getValueType() == MVT::Glue; + return hasChain && hasInputGlue && + (N->getOperand(0).getNode() == N->getOperand(LastOpNo).getNode()); +} + +static bool hasGluedOutput(SDNode *N) { + if (N->getNumValues() <= 1) + return false; + auto LastValNo = N->getNumValues() - 1; + bool hasChainOut = N->getValueType(LastValNo - 1) == MVT::Other; + bool hasGlueOut = N->getValueType(LastValNo) == MVT::Glue; + return hasChainOut && hasGlueOut; +} + void DAGCombiner::GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases) { SmallVector Chains; // List of chains to visit. @@ -19408,10 +19425,41 @@ return false; } - case ISD::CopyFromReg: - // Always forward past past CopyFromReg. + case ISD::CopyFromReg: { + // If this is glued, recursively check chain to see if we can skip the + // full glued chain.. + if (hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + // Unglued node can always be skipped. C = C.getOperand(0); return true; + } + + case ISD::CopyToReg: { + if (hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + // If this had a glued output, return C so recursive calls of this + // succeed. + return hasGluedOutput(C.getNode()); + } + + case ISD::INLINEASM: { + auto *ExtraInfo = + cast(C->getOperand(InlineAsm::Op_ExtraInfo)); + unsigned EIInt = ExtraInfo->getZExtValue(); + bool IsSafe = !(EIInt & InlineAsm::Extra_MayStore); + if (!IsLoad) + IsSafe = IsSafe && (EIInt & (InlineAsm::Extra_MayLoad)); + if (IsSafe && hasGluedInputChain(C.getNode())) { + C = C.getOperand(0); + return ImproveChain(C); + } + return false; + } default: return false; } diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll --- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll +++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll @@ -122,12 +122,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -237,12 +237,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -359,12 +359,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 @@ -474,12 +474,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -589,12 +589,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -711,12 +711,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 @@ -825,12 +825,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -939,12 +939,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -1060,12 +1060,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll --- a/llvm/test/CodeGen/Thumb/frame-access.ll +++ b/llvm/test/CodeGen/Thumb/frame-access.ll @@ -280,9 +280,9 @@ ; CHECK-NEXT: add r2, sp, #12 ; CHECK-NEXT: bl h ; Load `x`, `y`, and `z` via SP -; CHECK: ldr r1, [sp, #20] -; CHECK-NEXT: ldr r2, [sp, #16] -; CHECK-NEXT: ldr r3, [sp, #12] +; CHECK-DAG: ldr r1, [sp, #20] +; CHECK-DAG: ldr r2, [sp, #16] +; CHECK-DAG: ldr r3, [sp, #12] ; CHECK: bl g ; Re-aligned stack, access via SP. @@ -324,9 +324,9 @@ ; CHECK-NEXT: add r2, sp, #20 ; CHECK-NEXT: bl h ; Load `x`, `y`, and `z` via SP for passing to `g` -; CHECK: ldr r1, [sp, #28] -; CHECK-NEXT: ldr r2, [sp, #24] -; CHECK-NEXT: ldr r3, [sp, #20] +; CHECK-DAG: ldr r1, [sp, #28] +; CHECK-DAG: ldr r2, [sp, #24] +; CHECK-DAG: ldr r3, [sp, #20] ; CHECK: bl g ; VLAs, access via BP. diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll --- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll @@ -209,17 +209,12 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: flds LCPI10_0 -; CHECK-NEXT: fmul %st, %st(1) -; CHECK-NEXT: flds LCPI10_1 -; CHECK-NEXT: fmul %st, %st(2) -; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fmuls LCPI10_0 +; CHECK-NEXT: fmuls LCPI10_1 +; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: fmulp %st, %st(1) -; CHECK-NEXT: fmulp %st, %st(1) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st ; CHECK-NEXT: ## InlineAsm End diff --git a/llvm/test/CodeGen/X86/pr9517.ll b/llvm/test/CodeGen/X86/pr9517.ll --- a/llvm/test/CodeGen/X86/pr9517.ll +++ b/llvm/test/CodeGen/X86/pr9517.ll @@ -11,7 +11,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %eax ; CHECK-NEXT: incl %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq @@ -22,7 +21,7 @@ ret i16 %v } -; The asm call prevents the merging the loads here. +; The asm call prevents the merging the loads here. define i16 @unify_through_trival_asm_w_memory_clobber() { ; CHECK-LABEL: unify_through_trival_asm_w_memory_clobber: ; CHECK: # %bb.0: @@ -47,61 +46,37 @@ ; CHECK-NEXT: movzwl {{.*}}(%rip), %edx ; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $8, %al ; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP