Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -67,6 +67,11 @@ " function")); #endif + static cl::opt + MaxParallelChains("combiner-max-tf-size", cl::Hidden, cl::init(64), + cl::desc("Maximum number of parallel chains in " + "TokenFactor to allow for inlining")); + /// Hidden option to stress test load slicing, i.e., when this option /// is enabled, load slicing bypasses most of its profitability guards. static cl::opt @@ -1590,7 +1595,12 @@ break; case ISD::TokenFactor: - if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { + // Inlining multi-use token factors can cause quadratic growth + // in node size. Place a bound on the maximum worst-case TF + // size when inlining. + if ((Op.hasOneUse() || + Op.getNumOperands() + Ops.size() <= MaxParallelChains) && + !is_contained(TFs, Op.getNode())) { // Queue up for processing. TFs.push_back(Op.getNode()); // Clean up in case the token factor is removed. Index: test/CodeGen/AArch64/arm64-abi_align.ll =================================================================== --- test/CodeGen/AArch64/arm64-abi_align.ll +++ test/CodeGen/AArch64/arm64-abi_align.ll @@ -280,10 +280,10 @@ define i32 @caller42() #3 { entry: ; CHECK-LABEL: caller42 -; CHECK: str {{x[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] -; CHECK: str {{x[0-9]+}}, [sp, #16] -; CHECK: str {{q[0-9]+}}, [sp] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #16] +; CHECK-DAG: str {{q[0-9]+}}, [sp] ; CHECK: add x1, sp, #32 ; CHECK: mov x2, sp ; Space for s1 is allocated at sp+32 @@ -318,10 +318,10 @@ ; CHECK-LABEL: caller42_stack ; CHECK: sub sp, sp, #112 ; CHECK: add x29, sp, #96 -; CHECK: stur {{x[0-9]+}}, [x29, #-16] -; CHECK: stur {{q[0-9]+}}, [x29, #-32] -; CHECK: str {{x[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] ; Space for s1 is allocated at x29-32 = sp+64 ; Space for s2 is allocated at sp+32 ; CHECK: add x[[B:[0-9]+]], sp, #32 @@ -388,10 +388,10 @@ define i32 @caller43() #3 { entry: ; CHECK-LABEL: caller43 -; CHECK: str {{q[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] -; CHECK: str {{q[0-9]+}}, [sp, #16] -; CHECK: str {{q[0-9]+}}, [sp] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #16] +; CHECK-DAG: str {{q[0-9]+}}, [sp] ; CHECK: add x1, sp, #32 ; CHECK: mov x2, sp ; Space for s1 is allocated at sp+32 @@ -430,10 +430,10 @@ ; CHECK-LABEL: caller43_stack ; CHECK: sub sp, sp, #112 ; CHECK: add x29, sp, #96 -; CHECK: stur {{q[0-9]+}}, [x29, #-16] -; CHECK: stur {{q[0-9]+}}, [x29, #-32] -; CHECK: str {{q[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] ; Space for s1 is allocated at x29-32 = sp+64 ; Space for s2 is allocated at sp+32 ; CHECK: add x[[B:[0-9]+]], sp, #32 Index: test/CodeGen/AArch64/arm64-variadic-aapcs.ll =================================================================== --- test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -99,10 +99,10 @@ ; __stack field should point just past them. define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) { ; CHECK-LABEL: test_offsetstack: -; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]! -; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96 -; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var -; CHECK: str [[STACK_TOP]], [x[[VAR]]] +; CHECK-DAG: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]! +; CHECK-DAG: add [[STACK_TOP:x[0-9]+]], sp, #96 +; CHECK-DAG: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var +; CHECK-DAG: str [[STACK_TOP]], [x[[VAR]]] %addr = bitcast %va_list* @var to i8* call void @llvm.va_start(i8* %addr) Index: test/CodeGen/BPF/undef.ll =================================================================== --- test/CodeGen/BPF/undef.ll +++ test/CodeGen/BPF/undef.ll @@ -31,18 +31,18 @@ ; CHECK: *(u8 *)(r10 - 4) = r2 ; CHECK: r2 = 10 ; CHECK: *(u8 *)(r10 - 3) = r2 -; CHECK: *(u16 *)(r10 + 24) = r1 -; CHECK: *(u16 *)(r10 + 22) = r1 -; CHECK: *(u16 *)(r10 + 20) = r1 -; CHECK: *(u16 *)(r10 + 18) = r1 -; CHECK: *(u16 *)(r10 + 16) = r1 -; CHECK: *(u16 *)(r10 + 14) = r1 -; CHECK: *(u16 *)(r10 + 12) = r1 -; CHECK: *(u16 *)(r10 + 10) = r1 -; CHECK: *(u16 *)(r10 + 8) = r1 -; CHECK: *(u16 *)(r10 + 6) = r1 -; CHECK: *(u16 *)(r10 - 2) = r1 -; CHECK: *(u16 *)(r10 + 26) = r1 +; CHECK-DAG: *(u16 *)(r10 + 24) = r1 +; CHECK-DAG: *(u16 *)(r10 + 22) = r1 +; CHECK-DAG: *(u16 *)(r10 + 20) = r1 +; CHECK-DAG: *(u16 *)(r10 + 18) = r1 +; CHECK-DAG: *(u16 *)(r10 + 16) = r1 +; CHECK-DAG: *(u16 *)(r10 + 14) = r1 +; CHECK-DAG: *(u16 *)(r10 + 12) = r1 +; CHECK-DAG: *(u16 *)(r10 + 10) = r1 +; CHECK-DAG: *(u16 *)(r10 + 8) = r1 +; CHECK-DAG: *(u16 *)(r10 + 6) = r1 +; CHECK-DAG: *(u16 *)(r10 - 2) = r1 +; CHECK-DAG: *(u16 *)(r10 + 26) = r1 ; CHECK: r2 = r10 ; CHECK: r2 += -8 ; CHECK: r1 = ll Index: test/CodeGen/MSP430/Inst16mm.ll =================================================================== --- test/CodeGen/MSP430/Inst16mm.ll +++ test/CodeGen/MSP430/Inst16mm.ll @@ -64,6 +64,6 @@ %0 = load i16, i16* %retval ; [#uses=1] ret i16 %0 ; CHECK-LABEL: mov2: -; CHECK: mov.w 2(r1), 6(r1) -; CHECK: mov.w 0(r1), 4(r1) +; CHECK-DAG: mov.w 2(r1), 6(r1) +; CHECK-DAG: mov.w 0(r1), 4(r1) } Index: test/CodeGen/PowerPC/complex-return.ll =================================================================== --- test/CodeGen/PowerPC/complex-return.ll +++ test/CodeGen/PowerPC/complex-return.ll @@ -9,8 +9,8 @@ %x = alloca { ppc_fp128, ppc_fp128 }, align 16 %real = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0 %imag = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1 - store ppc_fp128 0xM400C0000000000000000000000000000, ppc_fp128* %real - store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE147A, ppc_fp128* %imag + store ppc_fp128 0xM400C0000000033300000000888800001, ppc_fp128* %real + store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE149A, ppc_fp128* %imag %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0 %x.real = load ppc_fp128, ppc_fp128* %x.realp %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1 Index: test/CodeGen/PowerPC/structsinmem.ll =================================================================== --- test/CodeGen/PowerPC/structsinmem.ll +++ test/CodeGen/PowerPC/structsinmem.ll @@ -148,18 +148,18 @@ %call = call i32 @callee2(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7) ret i32 %call -; CHECK: stb {{[0-9]+}}, 119(1) -; CHECK: sth {{[0-9]+}}, 126(1) -; CHECK: stb {{[0-9]+}}, 135(1) -; CHECK: sth {{[0-9]+}}, 133(1) -; CHECK: stw {{[0-9]+}}, 140(1) -; CHECK: stb {{[0-9]+}}, 151(1) -; CHECK: stw {{[0-9]+}}, 147(1) -; CHECK: sth {{[0-9]+}}, 158(1) -; CHECK: stw {{[0-9]+}}, 154(1) -; CHECK: stb {{[0-9]+}}, 167(1) -; CHECK: sth {{[0-9]+}}, 165(1) -; CHECK: stw {{[0-9]+}}, 161(1) +; CHECK-DAG: stb {{[0-9]+}}, 119(1) +; CHECK-DAG: sth {{[0-9]+}}, 126(1) +; CHECK-DAG: stb {{[0-9]+}}, 135(1) +; CHECK-DAG: sth {{[0-9]+}}, 133(1) +; CHECK-DAG: stw {{[0-9]+}}, 140(1) +; CHECK-DAG: stb {{[0-9]+}}, 151(1) +; CHECK-DAG: stw {{[0-9]+}}, 147(1) +; CHECK-DAG: sth {{[0-9]+}}, 158(1) +; CHECK-DAG: stw {{[0-9]+}}, 154(1) +; CHECK-DAG: stb {{[0-9]+}}, 167(1) +; CHECK-DAG: sth {{[0-9]+}}, 165(1) +; CHECK-DAG: stw {{[0-9]+}}, 161(1) } define internal i32 @callee2(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind { Index: test/CodeGen/PowerPC/structsinregs.ll =================================================================== --- test/CodeGen/PowerPC/structsinregs.ll +++ test/CodeGen/PowerPC/structsinregs.ll @@ -60,13 +60,13 @@ ret i32 %call ; CHECK-LABEL: caller1 -; CHECK: ld 9, 112(31) -; CHECK: ld 8, 120(31) -; CHECK: ld 7, 128(31) -; CHECK: lwz 6, 136(31) -; CHECK: lwz 5, 144(31) -; CHECK: lhz 4, 152(31) -; CHECK: lbz 3, 160(31) +; CHECK-DAG: ld 9, 112(31) +; CHECK-DAG: ld 8, 120(31) +; CHECK-DAG: ld 7, 128(31) +; CHECK-DAG: lwz 6, 136(31) +; CHECK-DAG: lwz 5, 144(31) +; CHECK-DAG: lhz 4, 152(31) +; CHECK-DAG: lbz 3, 160(31) } declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind @@ -142,22 +142,22 @@ ret i32 %call ; CHECK-LABEL: caller2 -; CHECK: stb {{[0-9]+}}, 71(1) -; CHECK: sth {{[0-9]+}}, 69(1) -; CHECK: stb {{[0-9]+}}, 87(1) -; CHECK: stw {{[0-9]+}}, 83(1) -; CHECK: sth {{[0-9]+}}, 94(1) -; CHECK: stw {{[0-9]+}}, 90(1) -; CHECK: stb {{[0-9]+}}, 103(1) -; CHECK: sth {{[0-9]+}}, 101(1) -; CHECK: stw {{[0-9]+}}, 97(1) -; CHECK: ld 9, 96(1) -; CHECK: ld 8, 88(1) -; CHECK: ld 7, 80(1) -; CHECK: lwz 6, 136(31) -; CHECK: ld 5, 64(1) -; CHECK: lhz 4, 152(31) -; CHECK: lbz 3, 160(31) +; CHECK-DAG: stb {{[0-9]+}}, 71(1) +; CHECK-DAG: sth {{[0-9]+}}, 69(1) +; CHECK-DAG: stb {{[0-9]+}}, 87(1) +; CHECK-DAG: stw {{[0-9]+}}, 83(1) +; CHECK-DAG: sth {{[0-9]+}}, 94(1) +; CHECK-DAG: stw {{[0-9]+}}, 90(1) +; CHECK-DAG: stb {{[0-9]+}}, 103(1) +; CHECK-DAG: sth {{[0-9]+}}, 101(1) +; CHECK-DAG: stw {{[0-9]+}}, 97(1) +; CHECK-DAG: ld 9, 96(1) +; CHECK-DAG: ld 8, 88(1) +; CHECK-DAG: ld 7, 80(1) +; CHECK-DAG: lwz 6, 136(31) +; CHECK-DAG: ld 5, 64(1) +; CHECK-DAG: lhz 4, 152(31) +; CHECK-DAG: lbz 3, 160(31) } define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind { Index: test/CodeGen/X86/add.ll =================================================================== --- test/CodeGen/X86/add.ll +++ test/CodeGen/X86/add.ll @@ -167,7 +167,7 @@ ret void ; X32-LABEL: test12: ; X32: addl (% -; X32-NEXT: adcl $0, +; X32: adcl $0, ; X64-LABEL: test12: ; X64: subq $-2147483648, (% } @@ -180,7 +180,7 @@ ; X32-LABEL: test13: ; X32: addl (% -; X32-NEXT: adcl $0, +; X32: adcl $0, ; X64-LABEL: test13: ; X64: subq $-128, (% } Index: test/CodeGen/X86/musttail.ll =================================================================== --- test/CodeGen/X86/musttail.ll +++ test/CodeGen/X86/musttail.ll @@ -46,8 +46,8 @@ ; CHECK-LABEL: t4: ; CHECK: incl %[[r:.*]] ; CHECK: decl %[[n:.*]] -; CHECK: movl %[[r]], {{[0-9]+}}(%esp) -; CHECK: movl %[[n]], {{[0-9]+}}(%esp) +; CHECK-DAG: movl %[[r]], {{[0-9]+}}(%esp) +; CHECK-DAG: movl %[[n]], {{[0-9]+}}(%esp) ; CHECK: jmpl *%{{.*}} entry: @@ -71,8 +71,8 @@ ; CHECK: incl %[[r:.*]] ; CHECK: decl %[[n:.*]] ; Store them through ebp, since that's the only stable arg pointer. -; CHECK: movl %[[r]], {{[0-9]+}}(%ebp) -; CHECK: movl %[[n]], {{[0-9]+}}(%ebp) +; CHECK-DAG: movl %[[r]], {{[0-9]+}}(%ebp) +; CHECK-DAG: movl %[[n]], {{[0-9]+}}(%ebp) ; Epilogue. ; CHECK: leal {{[-0-9]+}}(%ebp), %esp ; CHECK: popl %esi Index: test/CodeGen/X86/rotate.ll =================================================================== --- test/CodeGen/X86/rotate.ll +++ test/CodeGen/X86/rotate.ll @@ -546,7 +546,7 @@ ; 32-LABEL: rotr1_64_mem: ; 32: # BB#0: ; 32-NEXT: pushl %esi -; 32-NEXT: movl 8(%esp), %eax +; 32-NEXT: movl {{[0-9]+}}(%esp), %eax ; 32-NEXT: movl (%eax), %ecx ; 32-NEXT: movl 4(%eax), %edx ; 32-NEXT: movl %edx, %esi @@ -555,11 +555,13 @@ ; 32-NEXT: movl %ecx, 4(%eax) ; 32-NEXT: movl %esi, (%eax) ; 32-NEXT: popl %esi - +; 32-NEXT: retl +; ; 64-LABEL: rotr1_64_mem: ; 64: # BB#0: ; 64-NEXT: rorq (%rdi) ; 64-NEXT: retq + %A = load i64, i64 *%Aptr %B = shl i64 %A, 63 %C = lshr i64 %A, 1 @@ -571,7 +573,7 @@ define void @rotr1_32_mem(i32* %Aptr) nounwind { ; 32-LABEL: rotr1_32_mem: ; 32: # BB#0: -; 32-NEXT: movl 4(%esp), %eax +; 32-NEXT: movl {{[0-9]+}}(%esp), %eax ; 32-NEXT: rorl (%eax) ; 32-NEXT: retl ; @@ -590,7 +592,7 @@ define void @rotr1_16_mem(i16* %Aptr) nounwind { ; 32-LABEL: rotr1_16_mem: ; 32: # BB#0: -; 32-NEXT: movl 4(%esp), %eax +; 32-NEXT: movl {{[0-9]+}}(%esp), %eax ; 32-NEXT: rorw (%eax) ; 32-NEXT: retl ; @@ -609,7 +611,7 @@ define void @rotr1_8_mem(i8* %Aptr) nounwind { ; 32-LABEL: rotr1_8_mem: ; 32: # BB#0: -; 32-NEXT: movl 4(%esp), %eax +; 32-NEXT: movl {{[0-9]+}}(%esp), %eax ; 32-NEXT: rorb (%eax) ; 32-NEXT: retl ;