Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8087,7 +8087,7 @@ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); Index: test/CodeGen/AArch64/arm64-aapcs.ll =================================================================== --- test/CodeGen/AArch64/arm64-aapcs.ll +++ test/CodeGen/AArch64/arm64-aapcs.ll @@ -24,36 +24,35 @@ @var64 = global i64 0, align 8 - ; Check stack slots are 64-bit at all times. +; Check stack slots are 64-bit at all times. define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, i32 %int, i64 %long) { +; CHECK-LABEL: test_stack_slots: +; CHECK-DAG: ldr w[[ext1:[0-9]+]], [sp, #24] +; CHECK-DAG: ldrh w[[ext2:[0-9]+]], [sp, #16] +; CHECK-DAG: ldrb w[[ext3:[0-9]+]], [sp, #8] +; CHECK-DAG: ldr x[[ext4:[0-9]+]], [sp, #32] +; CHECK-DAG: ldrb w[[ext5:[0-9]+]], [sp] +; CHECK-DAG: and x[[ext5]], x[[ext5]], #0x1 + %ext_bool = zext i1 %bool to i64 store volatile i64 %ext_bool, i64* @var64, align 8 - ; Part of last store. Blasted scheduler. -; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32] - -; CHECK: ldrb w[[EXT:[0-9]+]], [sp] - -; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1 -; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext5]], [{{x[0-9]+}}, :lo12:var64] %ext_char = zext i8 %char to i64 store volatile i64 %ext_char, i64* @var64, align 8 -; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8] -; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext3]], [{{x[0-9]+}}, :lo12:var64] %ext_short = zext i16 %short to i64 store volatile i64 %ext_short, i64* @var64, align 8 -; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16] -; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext2]], [{{x[0-9]+}}, :lo12:var64] %ext_int = zext i32 %int to i64 store volatile i64 %ext_int, i64* @var64, align 8 -; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24] -; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext1]], [{{x[0-9]+}}, :lo12:var64] store volatile i64 %long, i64* @var64, align 8 -; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext4]], [{{x[0-9]+}}, :lo12:var64] ret void } Index: test/CodeGen/AArch64/arm64-ldp-cluster.ll =================================================================== --- test/CodeGen/AArch64/arm64-ldp-cluster.ll +++ test/CodeGen/AArch64/arm64-ldp-cluster.ll @@ -67,14 +67,14 @@ ; Test sext + zext clustering. ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: ldp_half_sext_zext_int:%bb.0 -; CHECK: Cluster ld/st SU(3) - SU(4) -; CHECK: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui -; CHECK: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui +; CHECK: Cluster ld/st SU(4) - SU(3) +; CHECK: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui +; CHECK: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui ; EXYNOSM1: ********** MI Scheduling ********** ; EXYNOSM1-LABEL: ldp_half_sext_zext_int:%bb.0 -; EXYNOSM1: Cluster ld/st SU(3) - SU(4) -; EXYNOSM1: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui -; EXYNOSM1: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui +; EXYNOSM1: Cluster ld/st SU(4) - SU(3) +; EXYNOSM1: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui +; EXYNOSM1: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind { %tmp0 = load i64, i64* %q, align 4 %tmp = load i32, i32* %p, align 4 Index: test/CodeGen/ARM/vector-load.ll =================================================================== --- test/CodeGen/ARM/vector-load.ll +++ test/CodeGen/ARM/vector-load.ll @@ -240,9 +240,9 @@ ;CHECK-LABEL: zextload_v8i8tov8i32_fake_update: ;CHECK: ldr r[[PTRREG:[0-9]+]], [r0] ;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32] -;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16 ;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}} ;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}} +;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16 ;CHECK: str r[[INCREG]], [r0] %A = load <4 x i8>*, <4 x i8>** %ptr %lA = load <4 x i8>, <4 x i8>* %A, align 4 Index: test/CodeGen/X86/avg.ll =================================================================== --- test/CodeGen/X86/avg.ll +++ test/CodeGen/X86/avg.ll @@ -2408,71 +2408,70 @@ ; AVX2-NEXT: pushq %r12 ; AVX2-NEXT: pushq %rbx ; AVX2-NEXT: subq $16, %rsp +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpextrq $1, %xmm4, %rbx +; AVX2-NEXT: vmovq %xmm4, %rbp +; AVX2-NEXT: vpextrq $1, %xmm3, %rdi +; AVX2-NEXT: vmovq %xmm3, %rcx +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpextrq $1, %xmm3, %rdx +; AVX2-NEXT: vmovq %xmm3, %r9 +; AVX2-NEXT: vpextrq $1, %xmm2, %r11 +; AVX2-NEXT: vmovq %xmm2, %r12 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpextrq $1, %xmm3, %rcx -; AVX2-NEXT: vmovq %xmm3, %rax -; AVX2-NEXT: vpextrq $1, %xmm2, %rbx -; AVX2-NEXT: vmovq %xmm2, %rdx +; AVX2-NEXT: vpextrq $1, %xmm3, %r15 +; AVX2-NEXT: vmovq %xmm3, %rsi +; AVX2-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrq $1, %xmm2, %rdi -; AVX2-NEXT: vmovq %xmm2, %r11 -; AVX2-NEXT: vpextrq $1, %xmm1, %r13 -; AVX2-NEXT: vmovq %xmm1, %r12 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrq $1, %xmm2, %rbp -; AVX2-NEXT: vmovq %xmm2, %r10 -; AVX2-NEXT: vpextrq $1, %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: vmovq %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpextrq $1, %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: vmovq %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero ; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX2-NEXT: vpextrq $1, %xmm4, %r15 -; AVX2-NEXT: addq %rcx, %r15 -; AVX2-NEXT: vmovq %xmm4, %r9 -; AVX2-NEXT: addq %rax, %r9 -; AVX2-NEXT: vpextrq $1, %xmm3, %rax +; AVX2-NEXT: vpextrq $1, %xmm4, %rax ; AVX2-NEXT: addq %rbx, %rax ; AVX2-NEXT: movq %rax, %rbx -; AVX2-NEXT: vmovq %xmm3, %rax -; AVX2-NEXT: addq %rdx, %rax -; AVX2-NEXT: movq %rax, %r8 +; AVX2-NEXT: vmovq %xmm4, %r13 +; AVX2-NEXT: addq %rbp, %r13 +; AVX2-NEXT: vpextrq $1, %xmm3, %r10 +; AVX2-NEXT: addq %rdi, %r10 +; AVX2-NEXT: vmovq %xmm3, %r14 +; AVX2-NEXT: addq %rcx, %r14 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpextrq $1, %xmm3, %rax -; AVX2-NEXT: addq %rdi, %rax +; AVX2-NEXT: addq %rdx, %rax ; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: vmovq %xmm3, %rax +; AVX2-NEXT: vmovq %xmm3, %r8 +; AVX2-NEXT: addq %r9, %r8 +; AVX2-NEXT: vpextrq $1, %xmm2, %rax ; AVX2-NEXT: addq %r11, %rax ; AVX2-NEXT: movq %rax, %r11 -; AVX2-NEXT: vpextrq $1, %xmm2, %r14 -; AVX2-NEXT: addq %r13, %r14 ; AVX2-NEXT: vmovq %xmm2, %rax ; AVX2-NEXT: addq %r12, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpextrq $1, %xmm3, %rax -; AVX2-NEXT: addq %rbp, %rax +; AVX2-NEXT: addq %r15, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: vmovq %xmm3, %rax -; AVX2-NEXT: addq %r10, %rax +; AVX2-NEXT: addq %rsi, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: vpextrq $1, %xmm2, %rax ; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload @@ -2480,36 +2479,36 @@ ; AVX2-NEXT: vmovq %xmm2, %rax ; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX2-NEXT: vpextrq $1, %xmm2, %rbp ; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; AVX2-NEXT: vmovq %xmm2, %r10 -; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload -; AVX2-NEXT: vpextrq $1, %xmm0, %rax -; AVX2-NEXT: vpextrq $1, %xmm1, %rdi +; AVX2-NEXT: vmovq %xmm2, %r9 +; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; AVX2-NEXT: vpextrq $1, %xmm1, %rax +; AVX2-NEXT: vpextrq $1, %xmm0, %rdi ; AVX2-NEXT: addq %rax, %rdi -; AVX2-NEXT: vmovq %xmm0, %rdx -; AVX2-NEXT: vmovq %xmm1, %rsi +; AVX2-NEXT: vmovq %xmm1, %rdx +; AVX2-NEXT: vmovq %xmm0, %rsi ; AVX2-NEXT: addq %rdx, %rsi -; AVX2-NEXT: addq $-1, %r15 -; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addq $-1, %rbx +; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl $0, %eax ; AVX2-NEXT: adcq $-1, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: addq $-1, %r9 -; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addq $-1, %r13 +; AVX2-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl $0, %eax ; AVX2-NEXT: adcq $-1, %rax ; AVX2-NEXT: movq %rax, (%rsp) # 8-byte Spill -; AVX2-NEXT: addq $-1, %rbx -; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addq $-1, %r10 +; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl $0, %eax ; AVX2-NEXT: adcq $-1, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: addq $-1, %r8 -; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addq $-1, %r14 +; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl $0, %r13d ; AVX2-NEXT: adcq $-1, %r13 ; AVX2-NEXT: addq $-1, %rcx @@ -2517,12 +2516,12 @@ ; AVX2-NEXT: movl $0, %eax ; AVX2-NEXT: adcq $-1, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: addq $-1, %r11 -; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addq $-1, %r8 +; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl $0, %r15d ; AVX2-NEXT: adcq $-1, %r15 -; AVX2-NEXT: addq $-1, %r14 -; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addq $-1, %r11 +; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl $0, %ebx ; AVX2-NEXT: adcq $-1, %rbx ; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill @@ -2546,9 +2545,9 @@ ; AVX2-NEXT: addq $-1, %rbp ; AVX2-NEXT: movl $0, %r14d ; AVX2-NEXT: adcq $-1, %r14 -; AVX2-NEXT: addq $-1, %r10 -; AVX2-NEXT: movl $0, %r9d -; AVX2-NEXT: adcq $-1, %r9 +; AVX2-NEXT: addq $-1, %r9 +; AVX2-NEXT: movl $0, %r10d +; AVX2-NEXT: adcq $-1, %r10 ; AVX2-NEXT: addq $-1, %rdi ; AVX2-NEXT: movl $0, %edx ; AVX2-NEXT: adcq $-1, %rdx @@ -2558,7 +2557,7 @@ ; AVX2-NEXT: shldq $63, %rsi, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: shldq $63, %rdi, %rdx -; AVX2-NEXT: shldq $63, %r10, %r9 +; AVX2-NEXT: shldq $63, %r9, %r10 ; AVX2-NEXT: shldq $63, %rbp, %r14 ; AVX2-NEXT: shldq $63, %rcx, %r11 ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload @@ -2566,8 +2565,8 @@ ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; AVX2-NEXT: shldq $63, %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT: shldq $63, %rcx, %r10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; AVX2-NEXT: shldq $63, %rcx, %r9 ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; AVX2-NEXT: shldq $63, %rcx, %r8 ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload @@ -2596,13 +2595,13 @@ ; AVX2-NEXT: vmovq %r15, %xmm13 ; AVX2-NEXT: vmovq %rbx, %xmm14 ; AVX2-NEXT: vmovq %r8, %xmm15 -; AVX2-NEXT: vmovq %r10, %xmm0 +; AVX2-NEXT: vmovq %r9, %xmm0 ; AVX2-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload ; AVX2-NEXT: # xmm1 = mem[0],zero ; AVX2-NEXT: vmovq %r12, %xmm2 ; AVX2-NEXT: vmovq %r11, %xmm3 ; AVX2-NEXT: vmovq %r14, %xmm4 -; AVX2-NEXT: vmovq %r9, %xmm5 +; AVX2-NEXT: vmovq %r10, %xmm5 ; AVX2-NEXT: vmovq %rdx, %xmm6 ; AVX2-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 8-byte Folded Reload ; AVX2-NEXT: # xmm7 = mem[0],zero @@ -2658,58 +2657,58 @@ ; AVX512-NEXT: pushq %rbx ; AVX512-NEXT: subq $24, %rsp ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero +; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512-NEXT: vpextrq $1, %xmm4, %rbx +; AVX512-NEXT: vmovq %xmm4, %rbp +; AVX512-NEXT: vpextrq $1, %xmm3, %rdi +; AVX512-NEXT: vmovq %xmm3, %rsi ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero ; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX512-NEXT: vpextrq $1, %xmm3, %rcx -; AVX512-NEXT: vmovq %xmm3, %rax -; AVX512-NEXT: vpextrq $1, %xmm2, %rbx -; AVX512-NEXT: vmovq %xmm2, %rbp -; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512-NEXT: vpextrq $1, %xmm2, %rdi -; AVX512-NEXT: vmovq %xmm2, %r8 -; AVX512-NEXT: vpextrq $1, %xmm1, %r13 -; AVX512-NEXT: vmovq %xmm1, %r12 +; AVX512-NEXT: vpextrq $1, %xmm3, %rdx +; AVX512-NEXT: vmovq %xmm3, %r8 +; AVX512-NEXT: vpextrq $1, %xmm2, %r13 +; AVX512-NEXT: vmovq %xmm2, %r12 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512-NEXT: vpextrq $1, %xmm2, %r15 -; AVX512-NEXT: vmovq %xmm2, %r14 -; AVX512-NEXT: vpextrq $1, %xmm1, %rdx -; AVX512-NEXT: vmovq %xmm1, %r9 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero +; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512-NEXT: vpextrq $1, %xmm3, %r15 +; AVX512-NEXT: vmovq %xmm3, %r14 +; AVX512-NEXT: vpextrq $1, %xmm2, %r9 +; AVX512-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpextrq $1, %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT: vmovq %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero ; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rsi -; AVX512-NEXT: addq %rcx, %rsi -; AVX512-NEXT: vmovq %xmm4, %rcx -; AVX512-NEXT: addq %rax, %rcx -; AVX512-NEXT: vpextrq $1, %xmm3, %rax +; AVX512-NEXT: vpextrq $1, %xmm4, %rax ; AVX512-NEXT: addq %rbx, %rax ; AVX512-NEXT: movq %rax, %rbx -; AVX512-NEXT: vmovq %xmm3, %rax +; AVX512-NEXT: vmovq %xmm4, %rax ; AVX512-NEXT: addq %rbp, %rax -; AVX512-NEXT: movq %rax, %r10 -; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512-NEXT: movq %rax, %rbp ; AVX512-NEXT: vpextrq $1, %xmm3, %rax ; AVX512-NEXT: addq %rdi, %rax ; AVX512-NEXT: movq %rax, %rdi +; AVX512-NEXT: vmovq %xmm3, %r10 +; AVX512-NEXT: addq %rsi, %r10 +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero +; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512-NEXT: vpextrq $1, %xmm3, %rcx +; AVX512-NEXT: addq %rdx, %rcx ; AVX512-NEXT: vmovq %xmm3, %rax ; AVX512-NEXT: addq %r8, %rax ; AVX512-NEXT: movq %rax, %r8 -; AVX512-NEXT: vpextrq $1, %xmm2, %rbp -; AVX512-NEXT: addq %r13, %rbp +; AVX512-NEXT: vpextrq $1, %xmm2, %rsi +; AVX512-NEXT: addq %r13, %rsi ; AVX512-NEXT: vmovq %xmm2, %r11 ; AVX512-NEXT: addq %r12, %r11 ; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1 @@ -2724,10 +2723,10 @@ ; AVX512-NEXT: addq %r14, %rax ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: vpextrq $1, %xmm2, %rax -; AVX512-NEXT: addq %rdx, %rax +; AVX512-NEXT: addq %r9, %rax ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: vmovq %xmm2, %rax -; AVX512-NEXT: addq %r9, %rax +; AVX512-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2 @@ -2742,28 +2741,28 @@ ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vmovq %xmm1, %rdx ; AVX512-NEXT: addq %rax, %rdx -; AVX512-NEXT: addq $-1, %rsi -; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: addq $-1, %rbx +; AVX512-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: movl $0, %eax ; AVX512-NEXT: adcq $-1, %rax ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT: addq $-1, %rcx -; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: addq $-1, %rbp +; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: movl $0, %eax ; AVX512-NEXT: adcq $-1, %rax ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT: addq $-1, %rbx -; AVX512-NEXT: movq %rbx, (%rsp) # 8-byte Spill +; AVX512-NEXT: addq $-1, %rdi +; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: movl $0, %eax ; AVX512-NEXT: adcq $-1, %rax -; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: movq %rax, (%rsp) # 8-byte Spill ; AVX512-NEXT: addq $-1, %r10 ; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: movl $0, %eax ; AVX512-NEXT: adcq $-1, %rax ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT: addq $-1, %rdi -; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: addq $-1, %rcx +; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: movl $0, %eax ; AVX512-NEXT: adcq $-1, %rax ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -2772,8 +2771,8 @@ ; AVX512-NEXT: movl $0, %eax ; AVX512-NEXT: adcq $-1, %rax ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT: addq $-1, %rbp -; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512-NEXT: addq $-1, %rsi +; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: movl $0, %r13d ; AVX512-NEXT: adcq $-1, %r13 ; AVX512-NEXT: addq $-1, %r11 @@ -2833,8 +2832,8 @@ ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; AVX512-NEXT: shldq $63, %rdx, %rax -; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; AVX512-NEXT: movq (%rsp), %rdx # 8-byte Reload +; AVX512-NEXT: movq (%rsp), %r14 # 8-byte Reload +; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; AVX512-NEXT: shldq $63, %rdx, %r14 ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload Index: test/CodeGen/X86/dagcombine-cse.ll =================================================================== --- test/CodeGen/X86/dagcombine-cse.ll +++ test/CodeGen/X86/dagcombine-cse.ll @@ -24,13 +24,13 @@ ; X64-NEXT: imull %ecx, %esi ; X64-NEXT: leal (%rsi,%rdx), %eax ; X64-NEXT: cltq +; X64-NEXT: movl (%rdi,%rax), %eax ; X64-NEXT: leal 4(%rsi,%rdx), %ecx ; X64-NEXT: movslq %ecx, %rcx ; X64-NEXT: movzwl (%rdi,%rcx), %ecx ; X64-NEXT: shlq $32, %rcx -; X64-NEXT: movl (%rdi,%rax), %eax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movq %rcx, %xmm0 ; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] ; X64-NEXT: movd %xmm0, %eax Index: test/CodeGen/X86/fold-zext-trunc.ll =================================================================== --- test/CodeGen/X86/fold-zext-trunc.ll +++ test/CodeGen/X86/fold-zext-trunc.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc < %s | FileCheck %s -check-prefix=ASM +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -stop-after livedebugvalues -o - | FileCheck %s -check-prefix=MIR ; PR9055 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" target triple = "i686-pc-linux-gnu" @@ -7,17 +8,51 @@ @g_98 = common global %struct.S0 zeroinitializer, align 4 -define void @foo() nounwind { -; CHECK: movzbl -; CHECK-NOT: movzbl -; CHECK: calll +define void @foo() nounwind !dbg !5 { +; ASM: movzbl +; ASM-NOT: movzbl +; ASM: calll entry: - %tmp17 = load i8, i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0), align 4 - %tmp54 = zext i8 %tmp17 to i32 - %foo = load i32, i32* bitcast (i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4 - %conv.i = trunc i32 %foo to i8 - tail call void @func_12(i32 %tmp54, i8 zeroext %conv.i) nounwind - ret void + %tmp17 = load i8, i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0), align 4, !dbg !14 + %tmp54 = zext i8 %tmp17 to i32, !dbg !15 + %foo = load i32, i32* bitcast (i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4, !dbg !16 +; MIR: renamable $edi = MOVZX32rr8 renamable $al, debug-location !16 + %conv.i = trunc i32 %foo to i8, !dbg !17 + + tail call void @func_12(i32 %tmp54, i8 zeroext %conv.i) #0, !dbg !18 + call void @llvm.dbg.value(metadata i8 %tmp17, metadata !8, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata i32 %tmp54, metadata !10, metadata !DIExpression()), !dbg !15 + call void @llvm.dbg.value(metadata i32 %foo, metadata !12, metadata !DIExpression()), !dbg !16 + call void @llvm.dbg.value(metadata i8 %conv.i, metadata !13, metadata !DIExpression()), !dbg !17 + ret void, !dbg !19 } declare void @func_12(i32, i8 zeroext) + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-master/llvm/test/CodeGen/X86/fold-zext-trunc.ll", directory: "/") +!2 = !{} +!3 = !{i32 6} +!4 = !{i32 4} +!5 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, variables: !7) +!6 = !DISubroutineType(types: !2) +!7 = !{!8, !10, !12, !13} +!8 = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !9) +!9 = !DIBasicType(name: "ty8", size: 8, encoding: DW_ATE_unsigned) +!10 = !DILocalVariable(name: "2", scope: !5, file: !1, line: 2, type: !11) +!11 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +!12 = !DILocalVariable(name: "3", scope: !5, file: !1, line: 3, type: !11) +!13 = !DILocalVariable(name: "4", scope: !5, file: !1, line: 4, type: !9) +!14 = !DILocation(line: 1, column: 1, scope: !5) +!15 = !DILocation(line: 2, column: 1, scope: !5) +!16 = !DILocation(line: 3, column: 1, scope: !5) +!17 = !DILocation(line: 4, column: 1, scope: !5) +!18 = !DILocation(line: 5, column: 1, scope: !5) +!19 = !DILocation(line: 6, column: 1, scope: !5) +!20 = !{i32 2, !"Debug Info Version", i32 3} +!llvm.module.flags = !{!20} Index: test/CodeGen/X86/known-bits-vector.ll =================================================================== --- test/CodeGen/X86/known-bits-vector.ll +++ test/CodeGen/X86/known-bits-vector.ll @@ -51,8 +51,8 @@ ; X32: # %bb.0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovd %eax, %xmm0 -; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; X32-NEXT: vmovd %ecx, %xmm0 +; X32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-NEXT: retl Index: test/CodeGen/X86/legalize-shift-64.ll =================================================================== --- test/CodeGen/X86/legalize-shift-64.ll +++ test/CodeGen/X86/legalize-shift-64.ll @@ -4,8 +4,8 @@ define i64 @test1(i32 %xx, i32 %test) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: andb $7, %cl ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll %cl, %eax Index: test/CodeGen/X86/load-combine.ll =================================================================== --- test/CodeGen/X86/load-combine.ll +++ test/CodeGen/X86/load-combine.ll @@ -915,7 +915,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl 12(%eax,%ecx), %eax +; CHECK-NEXT: movl 12(%ecx,%eax), %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: load_i32_by_i8_base_offset_index: @@ -960,7 +960,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl 13(%eax,%ecx), %eax +; CHECK-NEXT: movl 13(%ecx,%eax), %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2: @@ -1016,7 +1016,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl 12(%eax,%ecx), %eax +; CHECK-NEXT: movl 12(%ecx,%eax), %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: load_i32_by_i8_zaext_loads: @@ -1072,7 +1072,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl 12(%eax,%ecx), %eax +; CHECK-NEXT: movl 12(%ecx,%eax), %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: load_i32_by_i8_zsext_loads: Index: test/CodeGen/X86/mulx32.ll =================================================================== --- test/CodeGen/X86/mulx32.ll +++ test/CodeGen/X86/mulx32.ll @@ -17,8 +17,8 @@ define i64 @f2(i32 %a, i32* %p) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: mulxl (%eax), %eax, %edx ; CHECK-NEXT: retl %b = load i32, i32* %p Index: test/CodeGen/X86/promote-vec3.ll =================================================================== --- test/CodeGen/X86/promote-vec3.ll +++ test/CodeGen/X86/promote-vec3.ll @@ -9,10 +9,10 @@ ; SSE3-LABEL: zext_i8: ; SSE3: # %bb.0: ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: movd %eax, %xmm0 -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pinsrw $1, %eax, %xmm0 -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; SSE3-NEXT: movd %edx, %xmm0 +; SSE3-NEXT: pinsrw $1, %ecx, %xmm0 ; SSE3-NEXT: pinsrw $2, %eax, %xmm0 ; SSE3-NEXT: pextrw $0, %xmm0, %eax ; SSE3-NEXT: pextrw $1, %xmm0, %edx @@ -71,10 +71,10 @@ ; SSE3-LABEL: sext_i8: ; SSE3: # %bb.0: ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: movd %eax, %xmm0 -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: pinsrw $1, %eax, %xmm0 -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; SSE3-NEXT: movd %edx, %xmm0 +; SSE3-NEXT: pinsrw $1, %ecx, %xmm0 ; SSE3-NEXT: pinsrw $2, %eax, %xmm0 ; SSE3-NEXT: psllw $8, %xmm0 ; SSE3-NEXT: psraw $8, %xmm0 Index: test/CodeGen/X86/widen_conv-3.ll =================================================================== --- test/CodeGen/X86/widen_conv-3.ll +++ test/CodeGen/X86/widen_conv-3.ll @@ -60,13 +60,13 @@ ; X86-SSE2-NEXT: movd %edx, %xmm0 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx ; X86-SSE2-NEXT: movdqa %xmm0, (%esp) ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: shll $8, %edx ; X86-SSE2-NEXT: movzbl (%esp), %esi ; X86-SSE2-NEXT: orl %edx, %esi ; X86-SSE2-NEXT: movd %esi, %xmm0 -; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx ; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] @@ -108,13 +108,13 @@ ; X64-SSE2-NEXT: movq %rax, %xmm0 ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-SSE2-NEXT: shll $8, %eax -; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE2-NEXT: orl %eax, %ecx -; X64-SSE2-NEXT: movd %ecx, %xmm0 ; X64-SSE2-NEXT: movzbl 2(%rsi), %eax +; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx +; X64-SSE2-NEXT: shll $8, %ecx +; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; X64-SSE2-NEXT: orl %ecx, %edx +; X64-SSE2-NEXT: movd %edx, %xmm0 ; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0 ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] Index: test/CodeGen/X86/widen_conv-4.ll =================================================================== --- test/CodeGen/X86/widen_conv-4.ll +++ test/CodeGen/X86/widen_conv-4.ll @@ -86,13 +86,13 @@ ; X86-SSE2-NEXT: movd %edx, %xmm0 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx ; X86-SSE2-NEXT: movdqa %xmm0, (%esp) ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: shll $8, %edx ; X86-SSE2-NEXT: movzbl (%esp), %esi ; X86-SSE2-NEXT: orl %edx, %esi ; X86-SSE2-NEXT: movd %esi, %xmm0 -; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx ; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0 ; X86-SSE2-NEXT: pxor %xmm1, %xmm1 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] @@ -133,13 +133,13 @@ ; X64-SSE2-NEXT: movq %rax, %xmm0 ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-SSE2-NEXT: shll $8, %eax -; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE2-NEXT: orl %eax, %ecx -; X64-SSE2-NEXT: movd %ecx, %xmm0 ; X64-SSE2-NEXT: movzbl 2(%rsi), %eax +; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx +; X64-SSE2-NEXT: shll $8, %ecx +; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; X64-SSE2-NEXT: orl %ecx, %edx +; X64-SSE2-NEXT: movd %edx, %xmm0 ; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0 ; X64-SSE2-NEXT: pxor %xmm1, %xmm1 ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] Index: test/CodeGen/X86/win-smallparams.ll =================================================================== --- test/CodeGen/X86/win-smallparams.ll +++ test/CodeGen/X86/win-smallparams.ll @@ -57,11 +57,14 @@ ; WIN32: calll _manyargs ; WIN32-LABEL: _manyargs: -; WIN32-DAG: movsbl 4(%esp), -; WIN32-DAG: movswl 8(%esp), -; WIN32-DAG: movzbl 12(%esp), -; WIN32-DAG: movzwl 16(%esp), -; WIN32-DAG: movzbl 20(%esp), -; WIN32-DAG: movzwl 24(%esp), +; WIN32: pushl %ebx +; WIN32: pushl %edi +; WIN32: pushl %esi +; WIN32-DAG: movsbl 16(%esp), +; WIN32-DAG: movswl 20(%esp), +; WIN32-DAG: movzbl 24(%esp), +; WIN32-DAG: movzwl 28(%esp), +; WIN32-DAG: movzbl 32(%esp), +; WIN32-DAG: movzwl 36(%esp), ; WIN32: retl