Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -133,6 +133,10 @@ // zero-use deletion strategy. if (N->getOpcode() == ISD::HANDLENODE) return; + // Don't add deleted nodes. This can happen when a change + // to a operand causes redundancies. + if (N->getOpcode() == ISD::DELETED_NODE) + return; if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) Worklist.push_back(N); @@ -1590,7 +1594,8 @@ break; case ISD::TokenFactor: - if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { + // Inline TFs used multiple times if we're dealing with a small enough set of nodes + if ((Op.hasOneUse() || Ops.size() < 32) && !is_contained(TFs, Op.getNode())) { // Queue up for processing. TFs.push_back(Op.getNode()); // Clean up in case the token factor is removed. Index: test/CodeGen/AArch64/arm64-abi_align.ll =================================================================== --- test/CodeGen/AArch64/arm64-abi_align.ll +++ test/CodeGen/AArch64/arm64-abi_align.ll @@ -280,10 +280,10 @@ define i32 @caller42() #3 { entry: ; CHECK-LABEL: caller42 -; CHECK: str {{x[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] -; CHECK: str {{x[0-9]+}}, [sp, #16] -; CHECK: str {{q[0-9]+}}, [sp] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #16] +; CHECK-DAG: str {{q[0-9]+}}, [sp] ; CHECK: add x1, sp, #32 ; CHECK: mov x2, sp ; Space for s1 is allocated at sp+32 @@ -318,10 +318,10 @@ ; CHECK-LABEL: caller42_stack ; CHECK: sub sp, sp, #112 ; CHECK: add x29, sp, #96 -; CHECK: stur {{x[0-9]+}}, [x29, #-16] -; CHECK: stur {{q[0-9]+}}, [x29, #-32] -; CHECK: str {{x[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] ; Space for s1 is allocated at x29-32 = sp+64 ; Space for s2 is allocated at sp+32 ; CHECK: add x[[B:[0-9]+]], sp, #32 @@ -388,10 +388,10 @@ define i32 @caller43() #3 { entry: ; CHECK-LABEL: caller43 -; CHECK: str {{q[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] -; CHECK: str {{q[0-9]+}}, [sp, #16] -; CHECK: str {{q[0-9]+}}, [sp] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #16] +; CHECK-DAG: str {{q[0-9]+}}, [sp] ; CHECK: add x1, sp, #32 ; CHECK: mov x2, sp ; Space for s1 is allocated at sp+32 @@ -430,10 +430,10 @@ ; CHECK-LABEL: caller43_stack ; CHECK: sub sp, sp, #112 ; CHECK: add x29, sp, #96 -; CHECK: stur {{q[0-9]+}}, [x29, #-16] -; CHECK: stur {{q[0-9]+}}, [x29, #-32] -; CHECK: str {{q[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] ; Space for s1 is allocated at x29-32 = sp+64 ; Space for s2 is allocated at sp+32 ; CHECK: add x[[B:[0-9]+]], sp, #32 Index: test/CodeGen/AArch64/arm64-variadic-aapcs.ll =================================================================== --- test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -99,10 +99,10 @@ ; __stack field should point just past them. define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) { ; CHECK-LABEL: test_offsetstack: -; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]! -; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96 -; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var -; CHECK: str [[STACK_TOP]], [x[[VAR]]] +; CHECK-DAG: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]! +; CHECK-DAG: add [[STACK_TOP:x[0-9]+]], sp, #96 +; CHECK-DAG: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var +; CHECK-DAG: str [[STACK_TOP]], [x[[VAR]]] %addr = bitcast %va_list* @var to i8* call void @llvm.va_start(i8* %addr) Index: test/CodeGen/ARM/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -74,10 +74,10 @@ ; LE-NEXT: orr r2, r3, r2, lsl #16 ; LE-NEXT: ldr r3, .LCPI2_0 ; LE-NEXT: and r2, r2, r3 -; LE-NEXT: lsr r3, r2, #16 ; LE-NEXT: orr r1, r2, r1, lsl #13 -; LE-NEXT: strb r3, [r0, #2] ; LE-NEXT: strh r1, [r0] +; LE-NEXT: lsr r1, r2, #16 +; LE-NEXT: strb r1, [r0, #2] ; LE-NEXT: mov pc, lr ; LE-NEXT: .p2align 2 ; LE-NEXT: @ BB#1: Index: test/CodeGen/BPF/undef.ll =================================================================== --- test/CodeGen/BPF/undef.ll +++ test/CodeGen/BPF/undef.ll @@ -31,18 +31,18 @@ ; CHECK: *(u8 *)(r10 - 4) = r2 ; CHECK: r2 = 10 ; CHECK: *(u8 *)(r10 - 3) = r2 -; CHECK: *(u16 *)(r10 + 24) = r1 -; CHECK: *(u16 *)(r10 + 22) = r1 -; CHECK: *(u16 *)(r10 + 20) = r1 -; CHECK: *(u16 *)(r10 + 18) = r1 -; CHECK: *(u16 *)(r10 + 16) = r1 -; CHECK: *(u16 *)(r10 + 14) = r1 -; CHECK: *(u16 *)(r10 + 12) = r1 -; CHECK: *(u16 *)(r10 + 10) = r1 -; CHECK: *(u16 *)(r10 + 8) = r1 -; CHECK: *(u16 *)(r10 + 6) = r1 -; CHECK: *(u16 *)(r10 - 2) = r1 -; CHECK: *(u16 *)(r10 + 26) = r1 +; CHECK-DAG: *(u16 *)(r10 + 24) = r1 +; CHECK-DAG: *(u16 *)(r10 + 22) = r1 +; CHECK-DAG: *(u16 *)(r10 + 20) = r1 +; CHECK-DAG: *(u16 *)(r10 + 18) = r1 +; CHECK-DAG: *(u16 *)(r10 + 16) = r1 +; CHECK-DAG: *(u16 *)(r10 + 14) = r1 +; CHECK-DAG: *(u16 *)(r10 + 12) = r1 +; CHECK-DAG: *(u16 *)(r10 + 10) = r1 +; CHECK-DAG: *(u16 *)(r10 + 8) = r1 +; CHECK-DAG: *(u16 *)(r10 + 6) = r1 +; CHECK-DAG: *(u16 *)(r10 - 2) = r1 +; CHECK-DAG: *(u16 *)(r10 + 26) = r1 ; CHECK: r2 = r10 ; CHECK: r2 += -8 ; CHECK: r1 = ll Index: test/CodeGen/MSP430/Inst16mm.ll =================================================================== --- test/CodeGen/MSP430/Inst16mm.ll +++ test/CodeGen/MSP430/Inst16mm.ll @@ -64,6 +64,6 @@ %0 = load i16, i16* %retval ; [#uses=1] ret i16 %0 ; CHECK-LABEL: mov2: -; CHECK: mov.w 2(r1), 6(r1) -; CHECK: mov.w 0(r1), 4(r1) +; CHECK-DAG: mov.w 2(r1), 6(r1) +; CHECK-DAG: mov.w 0(r1), 4(r1) } Index: test/CodeGen/PowerPC/complex-return.ll =================================================================== --- test/CodeGen/PowerPC/complex-return.ll +++ test/CodeGen/PowerPC/complex-return.ll @@ -9,8 +9,8 @@ %x = alloca { ppc_fp128, ppc_fp128 }, align 16 %real = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0 %imag = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1 - store ppc_fp128 0xM400C0000000000000000000000000000, ppc_fp128* %real - store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE147A, ppc_fp128* %imag + store ppc_fp128 0xM400C0000000033300000000888800001, ppc_fp128* %real + store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE149A, ppc_fp128* %imag %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0 %x.real = load ppc_fp128, ppc_fp128* %x.realp %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1 Index: test/CodeGen/PowerPC/structsinmem.ll =================================================================== --- test/CodeGen/PowerPC/structsinmem.ll +++ test/CodeGen/PowerPC/structsinmem.ll @@ -148,18 +148,18 @@ %call = call i32 @callee2(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7) ret i32 %call -; CHECK: stb {{[0-9]+}}, 119(1) -; CHECK: sth {{[0-9]+}}, 126(1) -; CHECK: stb {{[0-9]+}}, 135(1) -; CHECK: sth {{[0-9]+}}, 133(1) -; CHECK: stw {{[0-9]+}}, 140(1) -; CHECK: stb {{[0-9]+}}, 151(1) -; CHECK: stw {{[0-9]+}}, 147(1) -; CHECK: sth {{[0-9]+}}, 158(1) -; CHECK: stw {{[0-9]+}}, 154(1) -; CHECK: stb {{[0-9]+}}, 167(1) -; CHECK: sth {{[0-9]+}}, 165(1) -; CHECK: stw {{[0-9]+}}, 161(1) +; CHECK-DAG: stb {{[0-9]+}}, 119(1) +; CHECK-DAG: sth {{[0-9]+}}, 126(1) +; CHECK-DAG: stb {{[0-9]+}}, 135(1) +; CHECK-DAG: sth {{[0-9]+}}, 133(1) +; CHECK-DAG: stw {{[0-9]+}}, 140(1) +; CHECK-DAG: stb {{[0-9]+}}, 151(1) +; CHECK-DAG: stw {{[0-9]+}}, 147(1) +; CHECK-DAG: sth {{[0-9]+}}, 158(1) +; CHECK-DAG: stw {{[0-9]+}}, 154(1) +; CHECK-DAG: stb {{[0-9]+}}, 167(1) +; CHECK-DAG: sth {{[0-9]+}}, 165(1) +; CHECK-DAG: stw {{[0-9]+}}, 161(1) } define internal i32 @callee2(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind { Index: test/CodeGen/PowerPC/structsinregs.ll =================================================================== --- test/CodeGen/PowerPC/structsinregs.ll +++ test/CodeGen/PowerPC/structsinregs.ll @@ -60,13 +60,13 @@ ret i32 %call ; CHECK-LABEL: caller1 -; CHECK: ld 9, 112(31) -; CHECK: ld 8, 120(31) -; CHECK: ld 7, 128(31) -; CHECK: lwz 6, 136(31) -; CHECK: lwz 5, 144(31) -; CHECK: lhz 4, 152(31) -; CHECK: lbz 3, 160(31) +; CHECK-DAG: ld 9, 112(31) +; CHECK-DAG: ld 8, 120(31) +; CHECK-DAG: ld 7, 128(31) +; CHECK-DAG: lwz 6, 136(31) +; CHECK-DAG: lwz 5, 144(31) +; CHECK-DAG: lhz 4, 152(31) +; CHECK-DAG: lbz 3, 160(31) } declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind @@ -142,22 +142,22 @@ ret i32 %call ; CHECK-LABEL: caller2 -; CHECK: stb {{[0-9]+}}, 71(1) -; CHECK: sth {{[0-9]+}}, 69(1) -; CHECK: stb {{[0-9]+}}, 87(1) -; CHECK: stw {{[0-9]+}}, 83(1) -; CHECK: sth {{[0-9]+}}, 94(1) -; CHECK: stw {{[0-9]+}}, 90(1) -; CHECK: stb {{[0-9]+}}, 103(1) -; CHECK: sth {{[0-9]+}}, 101(1) -; CHECK: stw {{[0-9]+}}, 97(1) -; CHECK: ld 9, 96(1) -; CHECK: ld 8, 88(1) -; CHECK: ld 7, 80(1) -; CHECK: lwz 6, 136(31) -; CHECK: ld 5, 64(1) -; CHECK: lhz 4, 152(31) -; CHECK: lbz 3, 160(31) +; CHECK-DAG: stb {{[0-9]+}}, 71(1) +; CHECK-DAG: sth {{[0-9]+}}, 69(1) +; CHECK-DAG: stb {{[0-9]+}}, 87(1) +; CHECK-DAG: stw {{[0-9]+}}, 83(1) +; CHECK-DAG: sth {{[0-9]+}}, 94(1) +; CHECK-DAG: stw {{[0-9]+}}, 90(1) +; CHECK-DAG: stb {{[0-9]+}}, 103(1) +; CHECK-DAG: sth {{[0-9]+}}, 101(1) +; CHECK-DAG: stw {{[0-9]+}}, 97(1) +; CHECK-DAG: ld 9, 96(1) +; CHECK-DAG: ld 8, 88(1) +; CHECK-DAG: ld 7, 80(1) +; CHECK-DAG: lwz 6, 136(31) +; CHECK-DAG: ld 5, 64(1) +; CHECK-DAG: lhz 4, 152(31) +; CHECK-DAG: lbz 3, 160(31) } define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind { Index: test/CodeGen/X86/add.ll =================================================================== --- test/CodeGen/X86/add.ll +++ test/CodeGen/X86/add.ll @@ -167,7 +167,7 @@ ret void ; X32-LABEL: test12: ; X32: addl (% -; X32-NEXT: adcl $0, +; X32: adcl $0, ; X64-LABEL: test12: ; X64: subq $-2147483648, (% } @@ -180,7 +180,7 @@ ; X32-LABEL: test13: ; X32: addl (% -; X32-NEXT: adcl $0, +; X32: adcl $0, ; X64-LABEL: test13: ; X64: subq $-128, (% } Index: test/CodeGen/X86/musttail.ll =================================================================== --- test/CodeGen/X86/musttail.ll +++ test/CodeGen/X86/musttail.ll @@ -46,8 +46,8 @@ ; CHECK-LABEL: t4: ; CHECK: incl %[[r:.*]] ; CHECK: decl %[[n:.*]] -; CHECK: movl %[[r]], {{[0-9]+}}(%esp) -; CHECK: movl %[[n]], {{[0-9]+}}(%esp) +; CHECK-DAG: movl %[[r]], {{[0-9]+}}(%esp) +; CHECK-DAG: movl %[[n]], {{[0-9]+}}(%esp) ; CHECK: jmpl *%{{.*}} entry: @@ -71,8 +71,8 @@ ; CHECK: incl %[[r:.*]] ; CHECK: decl %[[n:.*]] ; Store them through ebp, since that's the only stable arg pointer. -; CHECK: movl %[[r]], {{[0-9]+}}(%ebp) -; CHECK: movl %[[n]], {{[0-9]+}}(%ebp) +; CHECK-DAG: movl %[[r]], {{[0-9]+}}(%ebp) +; CHECK-DAG: movl %[[n]], {{[0-9]+}}(%ebp) ; Epilogue. ; CHECK: leal {{[-0-9]+}}(%ebp), %esp ; CHECK: popl %esi Index: test/CodeGen/X86/pr30284.ll =================================================================== --- test/CodeGen/X86/pr30284.ll +++ test/CodeGen/X86/pr30284.ll @@ -7,10 +7,10 @@ ; CHECK-NEXT: vmovapd 0, %zmm0 ; CHECK-NEXT: vmovapd 64, %zmm1 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [0,16,0,16,0,16,0,16,0,16,0,16,0,16,0,16] -; CHECK-NEXT: vorpd %zmm2, %zmm0, %zmm0 {%k1} ; CHECK-NEXT: vorpd %zmm2, %zmm1, %zmm1 {%k1} -; CHECK-NEXT: vmovapd %zmm1, 64 +; CHECK-NEXT: vorpd %zmm2, %zmm0, %zmm0 {%k1} ; CHECK-NEXT: vmovapd %zmm0, 0 +; CHECK-NEXT: vmovapd %zmm1, 64 ; CHECK-NEXT: retl %a_load22 = load <16 x i64>, <16 x i64>* null, align 1 %bitop = or <16 x i64> %a_load22, Index: test/CodeGen/X86/pr32108.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr32108.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=x86-64 %s -o - + +target triple = "x86_64-unknown-linux-gnu" + +define void @autogen_SD1794() { +BB: + %Cmp45 = icmp slt <4 x i32> undef, undef + br label %CF243 + +CF243: ; preds = %CF243, %BB + br i1 undef, label %CF243, label %CF257 + +CF257: ; preds = %CF243 + %Shuff144 = shufflevector <4 x i1> undef, <4 x i1> %Cmp45, <4 x i32> + br label %CF244 + +CF244: ; preds = %CF244, %CF257 + %Shuff182 = shufflevector <4 x i1> %Shuff144, <4 x i1> zeroinitializer, <4 x i32> + br label %CF244 +} Index: test/CodeGen/X86/psubus.ll =================================================================== --- test/CodeGen/X86/psubus.ll +++ test/CodeGen/X86/psubus.ll @@ -205,10 +205,10 @@ ; SSE-NEXT: movdqu (%rdi), %xmm0 ; SSE-NEXT: movdqu 16(%rdi), %xmm1 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE-NEXT: psubusw %xmm2, %xmm0 ; SSE-NEXT: psubusw %xmm2, %xmm1 -; SSE-NEXT: movdqu %xmm1, 16(%rdi) +; SSE-NEXT: psubusw %xmm2, %xmm0 ; SSE-NEXT: movdqu %xmm0, (%rdi) +; SSE-NEXT: movdqu %xmm1, 16(%rdi) ; SSE-NEXT: retq ; ; AVX1-LABEL: test7: @@ -249,10 +249,10 @@ ; SSE-NEXT: movdqu (%rdi), %xmm0 ; SSE-NEXT: movdqu 16(%rdi), %xmm1 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE-NEXT: psubusw %xmm2, %xmm0 ; SSE-NEXT: psubusw %xmm2, %xmm1 -; SSE-NEXT: movdqu %xmm1, 16(%rdi) +; SSE-NEXT: psubusw %xmm2, %xmm0 ; SSE-NEXT: movdqu %xmm0, (%rdi) +; SSE-NEXT: movdqu %xmm1, 16(%rdi) ; SSE-NEXT: retq ; ; AVX1-LABEL: test8: @@ -302,10 +302,10 @@ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; SSE-NEXT: movdqu (%rdi), %xmm1 ; SSE-NEXT: movdqu 16(%rdi), %xmm2 -; SSE-NEXT: psubusw %xmm0, %xmm1 ; SSE-NEXT: psubusw %xmm0, %xmm2 -; SSE-NEXT: movdqu %xmm2, 16(%rdi) +; SSE-NEXT: psubusw %xmm0, %xmm1 ; SSE-NEXT: movdqu %xmm1, (%rdi) +; SSE-NEXT: movdqu %xmm2, 16(%rdi) ; SSE-NEXT: retq ; ; AVX1-LABEL: test9: @@ -356,10 +356,10 @@ ; SSE-NEXT: movdqu (%rdi), %xmm0 ; SSE-NEXT: movdqu 16(%rdi), %xmm1 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE-NEXT: psubusb %xmm2, %xmm0 ; SSE-NEXT: psubusb %xmm2, %xmm1 -; SSE-NEXT: movdqu %xmm1, 16(%rdi) +; SSE-NEXT: psubusb %xmm2, %xmm0 ; SSE-NEXT: movdqu %xmm0, (%rdi) +; SSE-NEXT: movdqu %xmm1, 16(%rdi) ; SSE-NEXT: retq ; ; AVX1-LABEL: test10: @@ -401,10 +401,10 @@ ; SSE-NEXT: movdqu (%rdi), %xmm0 ; SSE-NEXT: movdqu 16(%rdi), %xmm1 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE-NEXT: psubusb %xmm2, %xmm0 ; SSE-NEXT: psubusb %xmm2, %xmm1 -; SSE-NEXT: movdqu %xmm1, 16(%rdi) +; SSE-NEXT: psubusb %xmm2, %xmm0 ; SSE-NEXT: movdqu %xmm0, (%rdi) +; SSE-NEXT: movdqu %xmm1, 16(%rdi) ; SSE-NEXT: retq ; ; AVX1-LABEL: test11: @@ -454,10 +454,10 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; SSE2-NEXT: movdqu (%rdi), %xmm1 ; SSE2-NEXT: movdqu 16(%rdi), %xmm2 -; SSE2-NEXT: psubusb %xmm0, %xmm1 ; SSE2-NEXT: psubusb %xmm0, %xmm2 -; SSE2-NEXT: movdqu %xmm2, 16(%rdi) +; SSE2-NEXT: psubusb %xmm0, %xmm1 ; SSE2-NEXT: movdqu %xmm1, (%rdi) +; SSE2-NEXT: movdqu %xmm2, 16(%rdi) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test12: @@ -467,10 +467,10 @@ ; SSSE3-NEXT: pshufb %xmm1, %xmm0 ; SSSE3-NEXT: movdqu (%rdi), %xmm1 ; SSSE3-NEXT: movdqu 16(%rdi), %xmm2 -; SSSE3-NEXT: psubusb %xmm0, %xmm1 ; SSSE3-NEXT: psubusb %xmm0, %xmm2 -; SSSE3-NEXT: movdqu %xmm2, 16(%rdi) +; SSSE3-NEXT: psubusb %xmm0, %xmm1 ; SSSE3-NEXT: movdqu %xmm1, (%rdi) +; SSSE3-NEXT: movdqu %xmm2, 16(%rdi) ; SSSE3-NEXT: retq ; ; AVX1-LABEL: test12: Index: test/CodeGen/X86/rotate.ll =================================================================== --- test/CodeGen/X86/rotate.ll +++ test/CodeGen/X86/rotate.ll @@ -546,20 +546,22 @@ ; 32-LABEL: rotr1_64_mem: ; 32: # BB#0: ; 32-NEXT: pushl %esi -; 32-NEXT: movl 8(%esp), %eax +; 32-NEXT: movl {{[0-9]+}}(%esp), %eax ; 32-NEXT: movl (%eax), %ecx ; 32-NEXT: movl 4(%eax), %edx ; 32-NEXT: movl %edx, %esi ; 32-NEXT: shldl $31, %ecx, %esi ; 32-NEXT: shldl $31, %edx, %ecx -; 32-NEXT: movl %ecx, 4(%eax) ; 32-NEXT: movl %esi, (%eax) +; 32-NEXT: movl %ecx, 4(%eax) ; 32-NEXT: popl %esi - +; 32-NEXT: retl +; ; 64-LABEL: rotr1_64_mem: ; 64: # BB#0: ; 64-NEXT: rorq (%rdi) ; 64-NEXT: retq + %A = load i64, i64 *%Aptr %B = shl i64 %A, 63 %C = lshr i64 %A, 1 @@ -571,7 +573,7 @@ define void @rotr1_32_mem(i32* %Aptr) nounwind { ; 32-LABEL: rotr1_32_mem: ; 32: # BB#0: -; 32-NEXT: movl 4(%esp), %eax +; 32-NEXT: movl {{[0-9]+}}(%esp), %eax ; 32-NEXT: rorl (%eax) ; 32-NEXT: retl ; @@ -590,7 +592,7 @@ define void @rotr1_16_mem(i16* %Aptr) nounwind { ; 32-LABEL: rotr1_16_mem: ; 32: # BB#0: -; 32-NEXT: movl 4(%esp), %eax +; 32-NEXT: movl {{[0-9]+}}(%esp), %eax ; 32-NEXT: rorw (%eax) ; 32-NEXT: retl ; @@ -609,7 +611,7 @@ define void @rotr1_8_mem(i8* %Aptr) nounwind { ; 32-LABEL: rotr1_8_mem: ; 32: # BB#0: -; 32-NEXT: movl 4(%esp), %eax +; 32-NEXT: movl {{[0-9]+}}(%esp), %eax ; 32-NEXT: rorb (%eax) ; 32-NEXT: retl ;