Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" @@ -7630,45 +7631,13 @@ SDValue Loc = LD->getOperand(1); SDValue BaseLoc = Base->getOperand(1); - if (Loc.getOpcode() == ISD::FrameIndex) { - if (BaseLoc.getOpcode() != ISD::FrameIndex) - return false; - const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - int FI = cast(Loc)->getIndex(); - int BFI = cast(BaseLoc)->getIndex(); - int FS = MFI.getObjectSize(FI); - int BFS = MFI.getObjectSize(BFI); - if (FS != BFS || FS != (int)Bytes) return false; - return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); - } - - // Handle X + C. - if (isBaseWithConstantOffset(Loc)) { - int64_t LocOffset = cast(Loc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc) { - // If the base location is a simple address with no offset itself, then - // the second load's first add operand should be the base address. - if (LocOffset == Dist * (int)Bytes) - return true; - } else if (isBaseWithConstantOffset(BaseLoc)) { - // The base location itself has an offset, so subtract that value from the - // second load's offset before comparing to distance * size. - int64_t BOffset = - cast(BaseLoc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { - if ((LocOffset - BOffset) == Dist * (int)Bytes) - return true; - } - } - } - const GlobalValue *GV1 = nullptr; - const GlobalValue *GV2 = nullptr; - int64_t Offset1 = 0; - int64_t Offset2 = 0; - bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); - if (isGA1 && isGA2 && GV1 == GV2) - return Offset1 == (Offset2 + Dist*Bytes); + + auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); + auto LocDecomp = BaseIndexOffset::match(Loc, *this); + + int64_t Offset = 0; + if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) + return (Dist * Bytes == Offset); return false; } Index: lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -60,12 +60,18 @@ int64_t Offset = 0; bool IsIndexSignExt = false; - // Consume constant adds - while (Base->getOpcode() == ISD::ADD && - isa(Base->getOperand(1))) { - int64_t POffset = cast(Base->getOperand(1))->getSExtValue(); - Offset += POffset; - Base = Base->getOperand(0); + // Consume constant adds & ors with appropriate masking. + while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { + if (auto *C = dyn_cast(Base->getOperand(1))) { + // Only consider ORs which act as adds. + if (Base->getOpcode() == ISD::OR && + !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) + break; + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; } if (Base->getOpcode() == ISD::ADD) { Index: test/CodeGen/BPF/undef.ll =================================================================== --- test/CodeGen/BPF/undef.ll +++ test/CodeGen/BPF/undef.ll @@ -13,36 +13,30 @@ ; Function Attrs: nounwind uwtable define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { -; CHECK: r2 = r10 -; CHECK: r2 += -2 -; CHECK: r1 = 0 -; CHECK: *(u16 *)(r2 + 6) = r1 -; CHECK: *(u16 *)(r2 + 4) = r1 -; CHECK: *(u16 *)(r2 + 2) = r1 -; CHECK: r2 = 6 -; CHECK: *(u8 *)(r10 - 7) = r2 -; CHECK: r2 = 5 -; CHECK: *(u8 *)(r10 - 8) = r2 -; CHECK: r2 = 7 -; CHECK: *(u8 *)(r10 - 6) = r2 -; CHECK: r2 = 8 -; CHECK: *(u8 *)(r10 - 5) = r2 -; CHECK: r2 = 9 -; CHECK: *(u8 *)(r10 - 4) = r2 -; CHECK: r2 = 10 -; CHECK: *(u8 *)(r10 - 3) = r2 -; CHECK: *(u16 *)(r10 + 24) = r1 -; CHECK: *(u16 *)(r10 + 22) = r1 -; CHECK: *(u16 *)(r10 + 20) = r1 -; CHECK: *(u16 *)(r10 + 18) = r1 -; CHECK: *(u16 *)(r10 + 16) = r1 -; CHECK: *(u16 *)(r10 + 14) = r1 -; CHECK: *(u16 *)(r10 + 12) = r1 -; CHECK: *(u16 *)(r10 + 10) = r1 -; CHECK: *(u16 *)(r10 + 8) = r1 -; CHECK: *(u16 *)(r10 + 6) = r1 -; CHECK: *(u16 *)(r10 - 2) = r1 -; CHECK: *(u16 *)(r10 + 26) = r1 +; CHECK: r1 = r10 +; CHECK: r1 += -2 +; CHECK: r2 = 0 +; CHECK: *(u16 *)(r1 + 6) = r2 +; CHECK: *(u16 *)(r1 + 4) = r2 +; CHECK: *(u16 *)(r1 + 2) = r2 +; CHECK: r1 = 134678021 +; CHECK: *(u32 *)(r10 - 8) = r1 +; CHECK: r1 = 9 +; CHECK: *(u8 *)(r10 - 4) = r1 +; CHECK: r1 = 10 +; CHECK: *(u8 *)(r10 - 3) = r1 +; CHECK: *(u16 *)(r10 + 24) = r2 +; CHECK: *(u16 *)(r10 + 22) = r2 +; CHECK: *(u16 *)(r10 + 20) = r2 +; CHECK: *(u16 *)(r10 + 18) = r2 +; CHECK: *(u16 *)(r10 + 16) = r2 +; CHECK: *(u16 *)(r10 + 14) = r2 +; CHECK: *(u16 *)(r10 + 12) = r2 +; CHECK: *(u16 *)(r10 + 10) = r2 +; CHECK: *(u16 *)(r10 + 8) = r2 +; CHECK: *(u16 *)(r10 + 6) = r2 +; CHECK: *(u16 *)(r10 - 2) = r2 +; CHECK: *(u16 *)(r10 + 26) = r2 ; CHECK: r2 = r10 ; CHECK: r2 += -8 ; CHECK: r1 = ll Index: test/CodeGen/MSP430/Inst16mm.ll =================================================================== --- test/CodeGen/MSP430/Inst16mm.ll +++ test/CodeGen/MSP430/Inst16mm.ll @@ -64,6 +64,6 @@ %0 = load i16, i16* %retval ; [#uses=1] ret i16 %0 ; CHECK-LABEL: mov2: -; CHECK: mov.w 0(r1), 4(r1) -; CHECK: mov.w 2(r1), 6(r1) +; CHECK-DAG: mov.w 2(r1), 6(r1) +; CHECK-DAG: mov.w 0(r1), 4(r1) } Index: test/CodeGen/X86/bswap-wide-int.ll =================================================================== --- test/CodeGen/X86/bswap-wide-int.ll +++ test/CodeGen/X86/bswap-wide-int.ll @@ -71,8 +71,8 @@ ; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-MOVBE-NEXT: movbel %esi, 12(%eax) ; X86-MOVBE-NEXT: movbel %edi, 8(%eax) -; X86-MOVBE-NEXT: movbel %ecx, 4(%eax) -; X86-MOVBE-NEXT: movbel %edx, (%eax) +; X86-MOVBE-NEXT: movbel %edx, 4(%eax) +; X86-MOVBE-NEXT: movbel %ecx, (%eax) ; X86-MOVBE-NEXT: popl %esi ; X86-MOVBE-NEXT: popl %edi ; X86-MOVBE-NEXT: retl $4 Index: test/CodeGen/X86/build-vector-128.ll =================================================================== --- test/CodeGen/X86/build-vector-128.ll +++ test/CodeGen/X86/build-vector-128.ll @@ -72,12 +72,10 @@ } define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { -; SSE2-32-LABEL: test_buildvector_v2i64: -; SSE2-32: # BB#0: -; SSE2-32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE2-32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE2-32-NEXT: retl +; SSE-32-LABEL: test_buildvector_v2i64: +; SSE-32: # BB#0: +; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 +; SSE-32-NEXT: retl ; ; SSE-64-LABEL: test_buildvector_v2i64: ; SSE-64: # BB#0: @@ -86,20 +84,9 @@ ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-64-NEXT: retq ; -; SSE41-32-LABEL: test_buildvector_v2i64: -; SSE41-32: # BB#0: -; SSE41-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE41-32-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0 -; SSE41-32-NEXT: pinsrd $2, {{[0-9]+}}(%esp), %xmm0 -; SSE41-32-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 -; SSE41-32-NEXT: retl -; ; AVX-32-LABEL: test_buildvector_v2i64: ; AVX-32: # BB#0: -; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: test_buildvector_v2i64: Index: test/CodeGen/X86/build-vector-256.ll =================================================================== --- test/CodeGen/X86/build-vector-256.ll +++ test/CodeGen/X86/build-vector-256.ll @@ -51,18 +51,10 @@ } define <4 x i64> @test_buildvector_v4i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { -; AVX1-32-LABEL: test_buildvector_v4i64: -; AVX1-32: # BB#0: -; AVX1-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX1-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX1-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX1-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX1-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX1-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX1-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX1-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-32-NEXT: retl +; AVX-32-LABEL: test_buildvector_v4i64: +; AVX-32: # BB#0: +; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %ymm0 +; AVX-32-NEXT: retl ; ; AVX1-64-LABEL: test_buildvector_v4i64: ; AVX1-64: # BB#0: @@ -75,19 +67,6 @@ ; AVX1-64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-64-NEXT: retq ; -; AVX2-32-LABEL: test_buildvector_v4i64: -; AVX2-32: # BB#0: -; AVX2-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX2-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX2-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX2-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX2-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX2-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX2-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX2-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX2-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-32-NEXT: retl -; ; AVX2-64-LABEL: test_buildvector_v4i64: ; AVX2-64: # BB#0: ; AVX2-64-NEXT: vmovq %rcx, %xmm0 Index: test/CodeGen/X86/build-vector-512.ll =================================================================== --- test/CodeGen/X86/build-vector-512.ll +++ test/CodeGen/X86/build-vector-512.ll @@ -79,25 +79,7 @@ define <8 x i64> @test_buildvector_v8i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) { ; AVX-32-LABEL: test_buildvector_v8i64: ; AVX-32: # BB#0: -; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; AVX-32-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %zmm0 ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: test_buildvector_v8i64: Index: test/CodeGen/X86/clear_upper_vector_element_bits.ll =================================================================== --- test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -1063,87 +1063,89 @@ ; ; AVX1-LABEL: _clearupper32xi8b: ; AVX1: # BB#0: +; AVX1-NEXT: pushq %rbp +; AVX1-NEXT: pushq %r15 ; AVX1-NEXT: pushq %r14 +; AVX1-NEXT: pushq %r13 +; AVX1-NEXT: pushq %r12 ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %r14 +; AVX1-NEXT: vmovq %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %r8 +; AVX1-NEXT: movq %rcx, %r9 +; AVX1-NEXT: movq %rcx, %r10 +; AVX1-NEXT: movq %rcx, %r11 +; AVX1-NEXT: movq %rcx, %r14 +; AVX1-NEXT: movq %rcx, %r15 ; AVX1-NEXT: vpextrq $1, %xmm0, %rdx -; AVX1-NEXT: movq %rdx, %r8 -; AVX1-NEXT: movq %rdx, %r9 -; AVX1-NEXT: movq %rdx, %r11 -; AVX1-NEXT: movq %rdx, %rsi -; AVX1-NEXT: movq %rdx, %rdi -; AVX1-NEXT: movq %rdx, %rcx +; AVX1-NEXT: movq %rdx, %r12 +; AVX1-NEXT: movq %rdx, %r13 +; AVX1-NEXT: movq %rdx, %rbx ; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: movq %rdx, %rdi +; AVX1-NEXT: movq %rdx, %rsi +; AVX1-NEXT: movq %rdx, %rbp ; AVX1-NEXT: andb $15, %dl ; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $56, %rax -; AVX1-NEXT: andb $15, %al -; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %r14, %r10 -; AVX1-NEXT: shrq $48, %rcx +; AVX1-NEXT: movq %rcx, %rdx ; AVX1-NEXT: andb $15, %cl ; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %r14, %rdx -; AVX1-NEXT: shrq $40, %rdi -; AVX1-NEXT: andb $15, %dil -; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %r14, %rax -; AVX1-NEXT: shrq $32, %rsi +; AVX1-NEXT: shrq $56, %rbp +; AVX1-NEXT: andb $15, %bpl +; AVX1-NEXT: movb %bpl, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: shrq $48, %rsi ; AVX1-NEXT: andb $15, %sil ; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %r14, %rcx -; AVX1-NEXT: shrq $24, %r11 -; AVX1-NEXT: andb $15, %r11b -; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %r14, %rsi -; AVX1-NEXT: shrq $16, %r9 -; AVX1-NEXT: andb $15, %r9b -; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %r14, %rdi -; AVX1-NEXT: shrq $8, %r8 -; AVX1-NEXT: andb $15, %r8b -; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %r14, %rbx -; AVX1-NEXT: andb $15, %r14b -; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: shrq $8, %r10 -; AVX1-NEXT: shrq $16, %rdx -; AVX1-NEXT: shrq $24, %rax -; AVX1-NEXT: shrq $32, %rcx -; AVX1-NEXT: shrq $40, %rsi -; AVX1-NEXT: shrq $48, %rdi -; AVX1-NEXT: shrq $56, %rbx -; AVX1-NEXT: andb $15, %bl -; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: shrq $40, %rdi ; AVX1-NEXT: andb $15, %dil ; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: andb $15, %sil -; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: andb $15, %cl -; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: shrq $32, %rax ; AVX1-NEXT: andb $15, %al ; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: shrq $24, %rbx +; AVX1-NEXT: andb $15, %bl +; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: shrq $16, %r13 +; AVX1-NEXT: andb $15, %r13b +; AVX1-NEXT: movb %r13b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: shrq $8, %r12 +; AVX1-NEXT: andb $15, %r12b +; AVX1-NEXT: movb %r12b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: shrq $8, %r8 +; AVX1-NEXT: shrq $16, %r9 +; AVX1-NEXT: shrq $24, %r10 +; AVX1-NEXT: shrq $32, %r11 +; AVX1-NEXT: shrq $40, %r14 +; AVX1-NEXT: shrq $48, %r15 +; AVX1-NEXT: shrq $56, %rdx ; AVX1-NEXT: andb $15, %dl ; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: andb $15, %r15b +; AVX1-NEXT: movb %r15b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: andb $15, %r14b +; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: andb $15, %r11b +; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: andb $15, %r10b ; AVX1-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: andb $15, %r9b +; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: andb $15, %r8b +; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax -; AVX1-NEXT: movq %rax, %r8 +; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: movq %rax, %rdx ; AVX1-NEXT: movq %rax, %rsi ; AVX1-NEXT: movq %rax, %rdi +; AVX1-NEXT: movl %eax, %ebp ; AVX1-NEXT: movl %eax, %ebx -; AVX1-NEXT: movl %eax, %ecx ; AVX1-NEXT: vmovd %eax, %xmm1 ; AVX1-NEXT: shrl $8, %eax ; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX1-NEXT: shrl $16, %ecx -; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 -; AVX1-NEXT: shrl $24, %ebx -; AVX1-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1 +; AVX1-NEXT: shrl $16, %ebx +; AVX1-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1 +; AVX1-NEXT: shrl $24, %ebp +; AVX1-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 ; AVX1-NEXT: shrq $32, %rdi ; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 ; AVX1-NEXT: shrq $40, %rsi @@ -1153,8 +1155,8 @@ ; AVX1-NEXT: shrq $48, %rdx ; AVX1-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax -; AVX1-NEXT: shrq $56, %r8 -; AVX1-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 +; AVX1-NEXT: shrq $56, %rcx +; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 ; AVX1-NEXT: movl %eax, %ecx ; AVX1-NEXT: shrl $8, %ecx ; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 @@ -1222,92 +1224,98 @@ ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: popq %rbx +; AVX1-NEXT: popq %r12 +; AVX1-NEXT: popq %r13 ; AVX1-NEXT: popq %r14 +; AVX1-NEXT: popq %r15 +; AVX1-NEXT: popq %rbp ; AVX1-NEXT: retq ; ; AVX2-LABEL: _clearupper32xi8b: ; AVX2: # BB#0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r15 ; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: pushq %r12 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %r14 +; AVX2-NEXT: vmovq %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %r8 +; AVX2-NEXT: movq %rcx, %r9 +; AVX2-NEXT: movq %rcx, %r10 +; AVX2-NEXT: movq %rcx, %r11 +; AVX2-NEXT: movq %rcx, %r14 +; AVX2-NEXT: movq %rcx, %r15 ; AVX2-NEXT: vpextrq $1, %xmm0, %rdx -; AVX2-NEXT: movq %rdx, %r8 -; AVX2-NEXT: movq %rdx, %r9 -; AVX2-NEXT: movq %rdx, %r11 -; AVX2-NEXT: movq %rdx, %rsi -; AVX2-NEXT: movq %rdx, %rdi -; AVX2-NEXT: movq %rdx, %rcx +; AVX2-NEXT: movq %rdx, %r12 +; AVX2-NEXT: movq %rdx, %r13 +; AVX2-NEXT: movq %rdx, %rbx ; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: movq %rdx, %rdi +; AVX2-NEXT: movq %rdx, %rsi +; AVX2-NEXT: movq %rdx, %rbp ; AVX2-NEXT: andb $15, %dl ; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $56, %rax -; AVX2-NEXT: andb $15, %al -; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r14, %r10 -; AVX2-NEXT: shrq $48, %rcx +; AVX2-NEXT: movq %rcx, %rdx ; AVX2-NEXT: andb $15, %cl ; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r14, %rdx -; AVX2-NEXT: shrq $40, %rdi -; AVX2-NEXT: andb $15, %dil -; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r14, %rax -; AVX2-NEXT: shrq $32, %rsi +; AVX2-NEXT: shrq $56, %rbp +; AVX2-NEXT: andb $15, %bpl +; AVX2-NEXT: movb %bpl, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: shrq $48, %rsi ; AVX2-NEXT: andb $15, %sil ; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r14, %rcx -; AVX2-NEXT: shrq $24, %r11 -; AVX2-NEXT: andb $15, %r11b -; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r14, %rsi -; AVX2-NEXT: shrq $16, %r9 -; AVX2-NEXT: andb $15, %r9b -; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r14, %rdi -; AVX2-NEXT: shrq $8, %r8 -; AVX2-NEXT: andb $15, %r8b -; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r14, %rbx -; AVX2-NEXT: andb $15, %r14b -; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: shrq $8, %r10 -; AVX2-NEXT: shrq $16, %rdx -; AVX2-NEXT: shrq $24, %rax -; AVX2-NEXT: shrq $32, %rcx -; AVX2-NEXT: shrq $40, %rsi -; AVX2-NEXT: shrq $48, %rdi -; AVX2-NEXT: shrq $56, %rbx -; AVX2-NEXT: andb $15, %bl -; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: shrq $40, %rdi ; AVX2-NEXT: andb $15, %dil ; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: andb $15, %sil -; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: andb $15, %cl -; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: shrq $32, %rax ; AVX2-NEXT: andb $15, %al ; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: shrq $24, %rbx +; AVX2-NEXT: andb $15, %bl +; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: shrq $16, %r13 +; AVX2-NEXT: andb $15, %r13b +; AVX2-NEXT: movb %r13b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: shrq $8, %r12 +; AVX2-NEXT: andb $15, %r12b +; AVX2-NEXT: movb %r12b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: shrq $8, %r8 +; AVX2-NEXT: shrq $16, %r9 +; AVX2-NEXT: shrq $24, %r10 +; AVX2-NEXT: shrq $32, %r11 +; AVX2-NEXT: shrq $40, %r14 +; AVX2-NEXT: shrq $48, %r15 +; AVX2-NEXT: shrq $56, %rdx ; AVX2-NEXT: andb $15, %dl ; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: andb $15, %r15b +; AVX2-NEXT: movb %r15b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: andb $15, %r14b +; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: andb $15, %r11b +; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: andb $15, %r10b ; AVX2-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: andb $15, %r9b +; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: andb $15, %r8b +; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax -; AVX2-NEXT: movq %rax, %r8 +; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: movq %rax, %rdx ; AVX2-NEXT: movq %rax, %rsi ; AVX2-NEXT: movq %rax, %rdi +; AVX2-NEXT: movl %eax, %ebp ; AVX2-NEXT: movl %eax, %ebx -; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: shrl $8, %eax ; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX2-NEXT: shrl $16, %ecx -; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 -; AVX2-NEXT: shrl $24, %ebx -; AVX2-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1 +; AVX2-NEXT: shrl $16, %ebx +; AVX2-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1 +; AVX2-NEXT: shrl $24, %ebp +; AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 ; AVX2-NEXT: shrq $32, %rdi ; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 ; AVX2-NEXT: shrq $40, %rsi @@ -1317,8 +1325,8 @@ ; AVX2-NEXT: shrq $48, %rdx ; AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax -; AVX2-NEXT: shrq $56, %r8 -; AVX2-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 +; AVX2-NEXT: shrq $56, %rcx +; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $8, %ecx ; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 @@ -1386,7 +1394,11 @@ ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 ; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp ; AVX2-NEXT: retq %x4 = bitcast <32 x i8> %0 to <64 x i4> %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 Index: test/CodeGen/X86/wide-integer-cmp.ll =================================================================== --- test/CodeGen/X86/wide-integer-cmp.ll +++ test/CodeGen/X86/wide-integer-cmp.ll @@ -101,8 +101,8 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: jge .LBB4_2 ; CHECK-NEXT: # BB#1: # %bb1 ; CHECK-NEXT: movl $1, %eax