Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18789,21 +18789,28 @@ /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases) { - SmallVector Chains; // List of chains to visit. - SmallPtrSet Visited; // Visited node set. + SmallVector, 16> + Chains; // List of chains to visit. + SmallPtrSet Visited; // Visited node set. // Get alias information for node. bool IsLoad = isa(N) && !cast(N)->isVolatile(); // Starting off. - Chains.push_back(OriginalChain); + Chains.push_back(std::make_pair(OriginalChain, OriginalChain)); unsigned Depth = 0; // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. - while (!Chains.empty()) { - SDValue Chain = Chains.pop_back_val(); + for (unsigned ChainIdx = 0; ChainIdx < Chains.size(); ++ChainIdx) { + auto Pair = Chains[ChainIdx]; + SDValue Chain = Pair.first; + SDValue Alias = Pair.second; + + // Don't bother if we've been before. + if (!Visited.insert(Chain.getNode()).second) + continue; // For TokenFactor nodes, look at each operand and only continue up the // chain until we reach the depth limit. @@ -18817,10 +18824,6 @@ return; } - // Don't bother if we've been before. - if (!Visited.insert(Chain.getNode()).second) - continue; - switch (Chain.getOpcode()) { case ISD::EntryToken: // Entry token is ideal chain operand, but handled in FindBetterChain. @@ -18835,10 +18838,12 @@ // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && isAlias(cast(N), cast(Chain.getNode()))) { - Aliases.push_back(Chain); + Visited.insert(Chain->getOperand(0).getNode()); + Aliases.push_back(Alias); } else { // Look further up the chain. - Chains.push_back(Chain.getOperand(0)); + Chains.push_back( + std::make_pair(Chain.getOperand(0), Chain.getOperand(0))); ++Depth; } break; @@ -18847,26 +18852,75 @@ case ISD::TokenFactor: // We have to check each of the operands of the token factor for "small" // token factors, so we queue them up. Adding the operands to the queue - // (stack) in reverse order maintains the original order and increases the + // (stack) in order maintains the original order and increases the // likelihood that getNode will find a matching token factor (CSE.) if (Chain.getNumOperands() > 16) { - Aliases.push_back(Chain); + for (const SDValue &C : Chain->op_values()) + Visited.insert(C.getNode()); + Aliases.push_back(Alias); break; } - for (unsigned n = Chain.getNumOperands(); n;) - Chains.push_back(Chain.getOperand(--n)); + for (unsigned n = 0; n < Chain.getNumOperands(); ++n) { + auto NewChain = Chain.getOperand(n); + Chains.push_back(std::make_pair(NewChain, NewChain)); + } ++Depth; break; - case ISD::CopyFromReg: + case ISD::CopyFromReg: { // Forward past CopyFromReg. - Chains.push_back(Chain.getOperand(0)); - ++Depth; + auto LastOpNo = Chain->getNumOperands() - 1; + bool isGlued = Chain->getOperand(LastOpNo).getValueType() == MVT::Glue && + Chain->getOperand(0).getNode() == + Chain->getOperand(LastOpNo).getNode(); + + auto NewAlias = (isGlued) ? Alias : Chain.getOperand(0); + Chains.push_back(std::make_pair(Chain.getOperand(0), NewAlias)); + break; + } + + case ISD::CopyToReg: { + // Forward past CopyToReg. + auto LastOpNo = Chain->getNumOperands() - 1; + // Input Glue + bool hasGlueInput = + Chain->getOperand(LastOpNo).getValueType() == MVT::Glue && + Chain->getOperand(0).getNode() == + Chain->getOperand(LastOpNo).getNode(); + // Always fail if this is a glueless (input or output) CopyToReg. + if (hasGlueInput || Chain != Alias) + Chains.push_back(std::make_pair(Chain.getOperand(0), Alias)); + else { // Don't go past unglued CopyToRegs + Visited.insert(Chain->getOperand(0).getNode()); + Aliases.push_back(Alias); + } break; + } + + case ISD::INLINEASM: { + auto LastOpNo = Chain->getNumOperands() - 1; + bool isGlued = Chain->getOperand(LastOpNo).getValueType() == MVT::Glue && + Chain->getOperand(0).getNode() == + Chain->getOperand(LastOpNo).getNode(); + auto *ExtraInfo = + cast(Chain->getOperand(InlineAsm::Op_ExtraInfo)); + unsigned EIInt = ExtraInfo->getZExtValue(); + bool IsSafe = !(EIInt & InlineAsm::Extra_MayStore); + if (!IsLoad) + IsSafe = IsSafe && (EIInt & (InlineAsm::Extra_MayLoad)); + + auto NewAlias = (isGlued) ? Alias : Chain.getOperand(0); + if (IsSafe) { + Chains.push_back(std::make_pair(Chain.getOperand(0), NewAlias)); + break; + } + } + LLVM_FALLTHROUGH; default: // For all other instructions we will just have to take what we can get. - Aliases.push_back(Chain); + Visited.insert(Chain->getOperand(0).getNode()); + Aliases.push_back(Alias); break; } } Index: llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -9,10 +9,6 @@ define void @test_simple(i32 %n, ...) { ; CHECK-LABEL: test_simple: ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] - -; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]] ; ... omit middle ones ... @@ -22,6 +18,10 @@ ; ... omit middle ones ... ; CHECK: stp q6, q7, [sp, # +; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] +; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var + ; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]] ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] @@ -45,10 +45,6 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { ; CHECK-LABEL: test_fewargs: ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] - -; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]] ; ... omit middle ones ... @@ -58,6 +54,10 @@ ; ... omit middle ones ... ; CHECK: str q7, [sp, # +; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] +; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var + ; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]] ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] Index: llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll =================================================================== --- llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll +++ llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll @@ -122,12 +122,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -237,12 +237,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -359,12 +359,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 @@ -474,12 +474,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -589,12 +589,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -711,12 +711,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 @@ -825,12 +825,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i16 %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1 store volatile i16 %arg1, i16* %e1, align 2 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i16 %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2 store volatile i16 %arg2, i16* %e2, align 2 @@ -939,12 +939,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i32 %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1 store volatile i32 %arg1, i32* %e1, align 4 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i32 %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2 store volatile i32 %arg2, i32* %e2, align 4 @@ -1060,12 +1060,12 @@ %ap2 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap2) - call void asm sideeffect "teqi $$zero, 1", ""() + call void asm sideeffect "teqi $$zero, 1", "~{memory}"() %arg1 = va_arg i8** %ap, i64 %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1 store volatile i64 %arg1, i64* %e1, align 8 - call void asm sideeffect "teqi $$zero, 2", ""() + call void asm sideeffect "teqi $$zero, 2", "~{memory}"() %arg2 = va_arg i8** %ap, i64 %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2 store volatile i64 %arg2, i64* %e2, align 8 Index: llvm/test/CodeGen/SystemZ/pr36164.ll =================================================================== --- llvm/test/CodeGen/SystemZ/pr36164.ll +++ llvm/test/CodeGen/SystemZ/pr36164.ll @@ -22,9 +22,9 @@ ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: lhi %r0, 1 ; CHECK-NEXT: larl %r1, g_938 -; CHECK-NEXT: lhi %r2, 2 -; CHECK-NEXT: lhi %r3, 3 -; CHECK-NEXT: lhi %r4, 0 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lhi %r3, 2 +; CHECK-NEXT: lhi %r4, 3 ; CHECK-NEXT: lhi %r5, 4 ; CHECK-NEXT: larl %r14, g_11 ; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 @@ -44,21 +44,18 @@ ; CHECK-NEXT: lrl %r13, g_832 ; CHECK-NEXT: lrl %r13, g_832 ; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: strl %r0, g_69 ; CHECK-NEXT: lrl %r13, g_832 -; CHECK-DAG: lghi %r13, 24 -; CHECK-DAG: strl %r2, g_69 -; CHECK-DAG: ag %r13, 0(%r1) +; CHECK-NEXT: lrl %r13, g_832 +; CHECK-NEXT: lrl %r13, g_832 +; CHECK-NEXT: strl %r2, g_69 +; CHECK-NEXT: lrl %r13, g_832 +; CHECK-NEXT: lghi %r13, 24 +; CHECK-NEXT: ag %r13, 0(%r1) +; CHECK-NEXT: strl %r0, g_69 ; CHECK-NEXT: lrl %r12, g_832 ; CHECK-NEXT: strl %r3, g_69 ; CHECK-NEXT: lrl %r12, g_832 ; CHECK-NEXT: strl %r4, g_69 -; CHECK-NEXT: lrl %r12, g_832 -; CHECK-NEXT: strl %r0, g_69 -; CHECK-NEXT: lrl %r12, g_832 -; CHECK-NEXT: strl %r2, g_69 -; CHECK-NEXT: lrl %r12, g_832 -; CHECK-NEXT: strl %r3, g_69 ; CHECK-NEXT: stgrl %r13, g_938 ; CHECK-NEXT: lrl %r13, g_832 ; CHECK-NEXT: strl %r5, g_69 Index: llvm/test/CodeGen/Thumb/frame-access.ll =================================================================== --- llvm/test/CodeGen/Thumb/frame-access.ll +++ llvm/test/CodeGen/Thumb/frame-access.ll @@ -280,9 +280,9 @@ ; CHECK-NEXT: add r2, sp, #12 ; CHECK-NEXT: bl h ; Load `x`, `y`, and `z` via SP -; CHECK: ldr r1, [sp, #20] -; CHECK-NEXT: ldr r2, [sp, #16] -; CHECK-NEXT: ldr r3, [sp, #12] +; CHECK-DAG: ldr r1, [sp, #20] +; CHECK-DAG: ldr r2, [sp, #16] +; CHECK-DAG: ldr r3, [sp, #12] ; CHECK: bl g ; Re-aligned stack, access via SP. @@ -324,9 +324,9 @@ ; CHECK-NEXT: add r2, sp, #20 ; CHECK-NEXT: bl h ; Load `x`, `y`, and `z` via SP for passing to `g` -; CHECK: ldr r1, [sp, #28] -; CHECK-NEXT: ldr r2, [sp, #24] -; CHECK-NEXT: ldr r3, [sp, #20] +; CHECK-DAG: ldr r1, [sp, #28] +; CHECK-DAG: ldr r2, [sp, #24] +; CHECK-DAG: ldr r3, [sp, #20] ; CHECK: bl g ; VLAs, access via BP. Index: llvm/test/CodeGen/X86/inline-asm-fpstack.ll =================================================================== --- llvm/test/CodeGen/X86/inline-asm-fpstack.ll +++ llvm/test/CodeGen/X86/inline-asm-fpstack.ll @@ -92,7 +92,6 @@ ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: ; Uses the same value twice, should have one fstp after the asm. tail call void asm sideeffect "foo $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %A, double %A ) nounwind @@ -187,9 +186,9 @@ ; CHECK-NEXT: subl $28, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) -; CHECK-NEXT: fstpt {{[0-9]+}}(%esp) ## 10-byte Folded Spill +; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill ; CHECK-NEXT: calll _test1 -; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ## 10-byte Folded Reload +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st(0) ; CHECK-NEXT: ## InlineAsm End @@ -210,17 +209,12 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: flds LCPI10_0 -; CHECK-NEXT: fmul %st(0), %st(1) -; CHECK-NEXT: flds LCPI10_1 -; CHECK-NEXT: fmul %st(0), %st(2) -; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fmuls LCPI10_0 +; CHECK-NEXT: fmuls LCPI10_1 +; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st(0) ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: fldt (%eax) -; CHECK-NEXT: fmulp %st(1) -; CHECK-NEXT: fmulp %st(1) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st(0) ; CHECK-NEXT: ## InlineAsm End @@ -256,7 +250,6 @@ ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: tail call void asm sideeffect "fistl $1", "{st},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind ret void @@ -279,7 +272,6 @@ ; CHECK-NEXT: fistl (%eax) ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: %0 = tail call x86_fp80 asm "fistl $2", "=&{st},0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind ret x86_fp80 %0 @@ -301,7 +293,6 @@ ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: tail call void asm sideeffect "fucomp $1", "{st},f,~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind ret void @@ -328,7 +319,6 @@ ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: tail call void asm sideeffect "fucomp $1", "{st},{st(1)},~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind ret void @@ -344,7 +334,6 @@ ; CHECK-NEXT: fucompp %st(1) ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: tail call void asm sideeffect "fucompp $1", "{st},{st(1)},~{st},~{st(1)},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind ret void @@ -361,7 +350,6 @@ ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: %0 = tail call %complex asm "sincos", "={st},={st(1)},0,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind %asmresult = extractvalue %complex %0, 0 @@ -378,7 +366,6 @@ ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: %0 = tail call %complex asm "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind %asmresult = extractvalue %complex %0, 1 @@ -406,7 +393,6 @@ ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: %0 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind %1 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind @@ -427,7 +413,6 @@ ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: %0 = tail call i32 asm "fcomi $2, $1; pushf; pop $0", "=r,{st},{st(1)},~{dirflag},~{fpsr},~{flags}"(double 2.000000e+00, double 2.000000e+00) nounwind ret i32 %0 Index: llvm/test/CodeGen/X86/oddshuffles.ll =================================================================== --- llvm/test/CodeGen/X86/oddshuffles.ll +++ llvm/test/CodeGen/X86/oddshuffles.ll @@ -1648,26 +1648,26 @@ ; AVX2-FAST-NEXT: vmovups (%rsi), %ymm0 ; AVX2-FAST-NEXT: vmovups (%rdx), %ymm1 ; AVX2-FAST-NEXT: vmovups (%rcx), %ymm2 -; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [1,0,2,2,1,0,2,2] +; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [5,6,5,6,5,6,7,7] ; AVX2-FAST-NEXT: vpermps %ymm1, %ymm3, %ymm3 -; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[0,0,2,1] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7] -; AVX2-FAST-NEXT: vbroadcastsd %xmm2, %ymm4 +; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[2,1,3,3] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2,3],ymm4[4],ymm3[5,6],ymm4[7] +; AVX2-FAST-NEXT: vbroadcastsd 24(%rsi), %ymm4 ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7] +; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [1,0,2,2,1,0,2,2] +; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm4 +; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm0[0,0,2,1] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7] +; AVX2-FAST-NEXT: vbroadcastsd %xmm2, %ymm5 +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7] ; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2] -; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[1,1,2,2] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0],ymm0[1],ymm4[2,3],ymm0[4],ymm4[5,6],ymm0[7] -; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[0,0,3,3,4,4,7,7] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm4[2],ymm0[3,4],ymm4[5],ymm0[6,7] -; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [5,6,5,6,5,6,7,7] -; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm1 -; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[2,1,3,3] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6],ymm2[7] -; AVX2-FAST-NEXT: vbroadcastsd 24(%rsi), %ymm2 -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4],ymm2[5],ymm1[6,7] -; AVX2-FAST-NEXT: vmovups %ymm1, 64(%rdi) +; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[1,1,2,2] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3],ymm0[4],ymm2[5,6],ymm0[7] +; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7] ; AVX2-FAST-NEXT: vmovups %ymm0, 32(%rdi) -; AVX2-FAST-NEXT: vmovups %ymm3, (%rdi) +; AVX2-FAST-NEXT: vmovups %ymm4, (%rdi) +; AVX2-FAST-NEXT: vmovups %ymm3, 64(%rdi) ; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: retq ; Index: llvm/test/CodeGen/X86/pr9517.ll =================================================================== --- llvm/test/CodeGen/X86/pr9517.ll +++ llvm/test/CodeGen/X86/pr9517.ll @@ -11,7 +11,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %eax ; CHECK-NEXT: incl %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq @@ -22,7 +21,7 @@ ret i16 %v } -; The asm call prevents the merging the loads here. +; The asm call prevents the merging the loads here. define i16 @unify_through_trival_asm_w_memory_clobber() { ; CHECK-LABEL: unify_through_trival_asm_w_memory_clobber: ; CHECK: # %bb.0: @@ -47,61 +46,37 @@ ; CHECK-NEXT: movzwl {{.*}}(%rip), %edx ; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP ; CHECK-NEXT: outb %al, %dx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{.*}}(%rip), %edx -; CHECK-NEXT: addl $16, %edx ; CHECK-NEXT: movb $8, %al ; CHECK-NEXT: # kill: def $dx killed $dx killed $edx ; CHECK-NEXT: #APP