Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -1548,26 +1548,7 @@ return false; } - if (DstVT == MVT::i64) { - // Handle extension to 64-bits via sub-register shenanigans. - unsigned MovInst; - - switch (SrcVT.SimpleTy) { - case MVT::i8: MovInst = X86::MOVZX32rr8; break; - case MVT::i16: MovInst = X86::MOVZX32rr16; break; - case MVT::i32: MovInst = X86::MOV32rr; break; - default: llvm_unreachable("Unexpected zext to i64 source type"); - } - - unsigned Result32 = createResultReg(&X86::GR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32) - .addReg(ResultReg); - - ResultReg = createResultReg(&X86::GR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), - ResultReg) - .addImm(0).addReg(Result32).addImm(X86::sub_32bit); - } else if (DstVT == MVT::i16) { + if (DstVT == MVT::i16) { // i8->i16 doesn't exist in the autogenerated isel table. Need to zero // extend to 32-bits and then extract down to 16-bits. unsigned Result32 = createResultReg(&X86::GR32RegClass); Index: lib/Target/X86/X86FixupSetCC.cpp =================================================================== --- lib/Target/X86/X86FixupSetCC.cpp +++ lib/Target/X86/X86FixupSetCC.cpp @@ -134,7 +134,8 @@ MachineInstr *ZExt = nullptr; for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg())) - if (Use.getOpcode() == X86::MOVZX32rr8) + if (Use.getOpcode() == X86::MOVZX32rr8 || + Use.getOpcode() == X86::MOVZX64rr8_alt) ZExt = &Use; if (!ZExt) @@ -162,14 +163,28 @@ ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass; unsigned ZeroReg = MRI->createVirtualRegister(RC); - unsigned InsertReg = MRI->createVirtualRegister(RC); + MRI->constrainRegClass(ZeroReg, + MRI->getRegClass(ZExt->getOperand(0).getReg())); // Initialize a register with 0. This must go before the eflags def BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), ZeroReg); + // If this is a 64-bit zero extend we need to wrap with a subreg_to_reg. + if (ZExt->getOpcode() == X86::MOVZX64rr8_alt) { + RC = &X86::GR64RegClass; + unsigned Reg = MRI->createVirtualRegister(RC); + BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), + TII->get(TargetOpcode::SUBREG_TO_REG), Reg) + .addImm(0).addReg(ZeroReg).addImm(X86::sub_32bit); + ZeroReg = Reg; + } + // X86 setcc only takes an output GR8, so fake a GR32 input by inserting // the setcc result into the low byte of the zeroed register. + unsigned InsertReg = MRI->createVirtualRegister(RC); + MRI->constrainRegClass(InsertReg, + MRI->getRegClass(ZExt->getOperand(0).getReg())); BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), TII->get(X86::INSERT_SUBREG), InsertReg) .addReg(ZeroReg) Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1256,10 +1256,8 @@ def : Pat<(i32 (anyext GR16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; -def : Pat<(i64 (anyext GR8 :$src)), - (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>; -def : Pat<(i64 (anyext GR16:$src)), - (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>; +def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8_alt GR8 :$src)>; +def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16_alt GR16 :$src)>; def : Pat<(i64 (anyext GR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>; @@ -1467,19 +1465,13 @@ // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), - (SUBREG_TO_REG (i64 0), - (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), - sub_32bit)>; + (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), - (SUBREG_TO_REG (i64 0), - (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), - sub_32bit)>; + (MOVZX64rr16_alt (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; // r & (2^8-1) ==> movz def : Pat<(and GR64:$src, 0xff), - (SUBREG_TO_REG (i64 0), - (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))), - sub_32bit)>; + (MOVZX64rr8_alt (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; } // AddedComplexity = 1 @@ -1540,6 +1532,12 @@ (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>; def: Pat<(zextloadi16i8 addr:$src), (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; +def: Pat<(zextloadi64i8 addr:$src), + (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; +def: Pat<(zextloadi64i16 addr:$src), + (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; +def: Pat<(zextloadi64i32 addr:$src), + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), Index: lib/Target/X86/X86InstrExtension.td =================================================================== --- lib/Target/X86/X86InstrExtension.td +++ lib/Target/X86/X86InstrExtension.td @@ -201,24 +201,31 @@ TB, Sched<[WriteALULd]>; } -// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a -// 32-bit register. -def : Pat<(i64 (zext GR8:$src)), - (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>; -def : Pat<(zextloadi64i8 addr:$src), - (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; - -def : Pat<(i64 (zext GR16:$src)), - (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>; -def : Pat<(zextloadi64i16 addr:$src), - (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; - -// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a -// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible -// when the 32-bit value is defined by a truncate or is copied from something -// where the high bits aren't necessarily all zero. In such cases, we fall back -// to these explicit zext instructions. -def : Pat<(i64 (zext GR32:$src)), - (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>; -def : Pat<(i64 (zextloadi64i32 addr:$src)), - (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; +let isPseudo = 1 in { + +// Use movzbl instead of movzbq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +// This will be converted to MOVZX32rr8 after register allocation. +def MOVZX64rr8_alt : I<0, Pseudo, (outs GR64:$dst), (ins GR8 :$src), + "", [(set GR64:$dst, (zext GR8:$src))]>, + Sched<[WriteALU]>; +// Use movzwl instead of movzwq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +// This will be converted to MOVZX32rr16 after register allocation. +def MOVZX64rr16_alt: I<0, Pseudo, (outs GR64:$dst), (ins GR16:$src), + "", [(set GR64:$dst, (zext GR16:$src))]>, + Sched<[WriteALU]>; + +// There's no movzlq instruction, but movl can be used for this purpose, using +// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero +// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit +// zero-extension, however this isn't possible when the 32-bit value is +// defined by a truncate or is copied from something where the high bits aren't +// necessarily all zero. In such cases, we fall back to these explicit zext +// instructions. +// This will be converted to MOV32rr after register allocation. +def MOVZX64rr32 : I<0, Pseudo, (outs GR64:$dst), (ins GR32:$src), + "", [(set GR64:$dst, (zext GR32:$src))]>, + Sched<[WriteALU]>; +} + Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -98,6 +98,7 @@ case X86::MOVSX32rr8: case X86::MOVZX32rr8: case X86::MOVSX64rr8: + case X86::MOVZX64rr8_alt: if (!Subtarget.is64Bit()) // It's not always legal to reference the low 8-bit of the larger // register in 32-bit mode. @@ -106,7 +107,9 @@ case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: - case X86::MOVSX64rr32: { + case X86::MOVZX64rr16_alt: + case X86::MOVSX64rr32: + case X86::MOVZX64rr32: { if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) // Be conservative. return false; @@ -119,14 +122,17 @@ case X86::MOVSX32rr8: case X86::MOVZX32rr8: case X86::MOVSX64rr8: + case X86::MOVZX64rr8_alt: SubIdx = X86::sub_8bit; break; case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: + case X86::MOVZX64rr16_alt: SubIdx = X86::sub_16bit; break; case X86::MOVSX64rr32: + case X86::MOVZX64rr32: SubIdx = X86::sub_32bit; break; } @@ -4144,10 +4150,27 @@ return true; } + +static bool expandZeroExtend64(MachineInstrBuilder &MIB, + const MCInstrDesc &Desc) { + MIB->setDesc(Desc); + unsigned DestReg = MIB->getOperand(0).getReg(); + MIB->getOperand(0).setReg(getX86SubSuperRegister(DestReg, 32)); + MIB.addReg(DestReg, RegState::ImplicitDefine); + return true; +} + + bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { bool HasAVX = Subtarget.hasAVX(); MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); switch (MI.getOpcode()) { + case X86::MOVZX64rr8_alt: + return expandZeroExtend64(MIB, get(X86::MOVZX32rr8)); + case X86::MOVZX64rr16_alt: + return expandZeroExtend64(MIB, get(X86::MOVZX32rr16)); + case X86::MOVZX64rr32: + return expandZeroExtend64(MIB, get(X86::MOV32rr)); case X86::MOV32r0: return Expand2AddrUndef(MIB, get(X86::XOR32rr)); case X86::MOV32r1: Index: test/CodeGen/X86/GlobalISel/add-ext.ll =================================================================== --- test/CodeGen/X86/GlobalISel/add-ext.ll +++ test/CodeGen/X86/GlobalISel/add-ext.ll @@ -198,15 +198,18 @@ ; CHECK: # %bb.0: ; CHECK: movq $4, %rax ; CHECK-NEXT: leal 1(%rsi), %ecx +; CHECK-NEXT: movl %ecx, %ecx ; CHECK-NEXT: imulq %rax, %rcx ; CHECK-NEXT: leaq (%rdi,%rcx), %rcx ; CHECK-NEXT: leal 2(%rsi), %edx +; CHECK-NEXT: movl %edx, %edx ; CHECK-NEXT: imulq %rax, %rdx ; CHECK-NEXT: leaq (%rdi,%rdx), %rdx ; CHECK-NEXT: movl (%rdx), %edx ; CHECK-NEXT: addl (%rcx), %edx -; CHECK-NEXT: imulq %rax, %rsi -; CHECK-NEXT: leaq (%rdi,%rsi), %rax +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: imulq %rax, %rcx +; CHECK-NEXT: leaq (%rdi,%rcx), %rax ; CHECK-NEXT: movl %edx, (%rax) ; CHECK-NEXT: retq Index: test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir =================================================================== --- test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir +++ test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir @@ -136,8 +136,8 @@ ; ALL-LABEL: name: anyext_s64_from_s8 ; ALL: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rdi ; ALL: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; ALL: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.sub_8bit - ; ALL: $rax = COPY [[SUBREG_TO_REG]] + ; ALL: [[MOVZX64rr8_alt:%[0-9]+]]:gr64 = MOVZX64rr8_alt [[COPY1]] + ; ALL: $rax = COPY [[MOVZX64rr8_alt]] ; ALL: RET 0, implicit $rax %0(s64) = COPY $rdi %1(s8) = G_TRUNC %0(s64) @@ -161,8 +161,8 @@ ; ALL-LABEL: name: anyext_s64_from_s16 ; ALL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; ALL: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; ALL: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.sub_16bit - ; ALL: $rax = COPY [[SUBREG_TO_REG]] + ; ALL: [[MOVZX64rr16_alt:%[0-9]+]]:gr64 = MOVZX64rr16_alt [[COPY1]] + ; ALL: $rax = COPY [[MOVZX64rr16_alt]] ; ALL: RET 0, implicit $rax %0(s64) = COPY $rdi %1(s16) = G_TRUNC %0(s64) Index: test/CodeGen/X86/GlobalISel/x86_64-select-zext.mir =================================================================== --- test/CodeGen/X86/GlobalISel/x86_64-select-zext.mir +++ test/CodeGen/X86/GlobalISel/x86_64-select-zext.mir @@ -348,8 +348,8 @@ ; CHECK-LABEL: name: zext_i32_to_i64 ; CHECK: liveins: $edi ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32bit - ; CHECK: $rax = COPY [[SUBREG_TO_REG]] + ; CHECK: [[MOVZX64rr32_:%[0-9]+]]:gr64 = MOVZX64rr32 [[COPY]] + ; CHECK: $rax = COPY [[MOVZX64rr32_]] ; CHECK: RET 0, implicit $rax %0:gpr(s32) = COPY $edi %1:gpr(s64) = G_ZEXT %0(s32) Index: test/CodeGen/X86/MergeConsecutiveStores.ll =================================================================== --- test/CodeGen/X86/MergeConsecutiveStores.ll +++ test/CodeGen/X86/MergeConsecutiveStores.ll @@ -452,30 +452,28 @@ define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { ; BWON-LABEL: MergeLoadStoreBaseIndexOffset: ; BWON: # %bb.0: -; BWON-NEXT: movl %ecx, %r8d -; BWON-NEXT: xorl %ecx, %ecx +; BWON-NEXT: xorl %r8d, %r8d ; BWON-NEXT: .p2align 4, 0x90 ; BWON-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; BWON-NEXT: movq (%rdi,%rcx,8), %rax +; BWON-NEXT: movq (%rdi,%r8,8), %rax ; BWON-NEXT: movzwl (%rdx,%rax), %eax -; BWON-NEXT: movw %ax, (%rsi,%rcx,2) -; BWON-NEXT: incq %rcx -; BWON-NEXT: cmpl %ecx, %r8d +; BWON-NEXT: movw %ax, (%rsi,%r8,2) +; BWON-NEXT: incq %r8 +; BWON-NEXT: cmpl %r8d, %ecx ; BWON-NEXT: jne .LBB9_1 ; BWON-NEXT: # %bb.2: ; BWON-NEXT: retq ; ; BWOFF-LABEL: MergeLoadStoreBaseIndexOffset: ; BWOFF: # %bb.0: -; BWOFF-NEXT: movl %ecx, %r8d -; BWOFF-NEXT: xorl %ecx, %ecx +; BWOFF-NEXT: xorl %r8d, %r8d ; BWOFF-NEXT: .p2align 4, 0x90 ; BWOFF-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; BWOFF-NEXT: movq (%rdi,%rcx,8), %rax +; BWOFF-NEXT: movq (%rdi,%r8,8), %rax ; BWOFF-NEXT: movw (%rdx,%rax), %ax -; BWOFF-NEXT: movw %ax, (%rsi,%rcx,2) -; BWOFF-NEXT: incq %rcx -; BWOFF-NEXT: cmpl %ecx, %r8d +; BWOFF-NEXT: movw %ax, (%rsi,%r8,2) +; BWOFF-NEXT: incq %r8 +; BWOFF-NEXT: cmpl %r8d, %ecx ; BWOFF-NEXT: jne .LBB9_1 ; BWOFF-NEXT: # %bb.2: ; BWOFF-NEXT: retq @@ -568,30 +566,28 @@ define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) { ; BWON-LABEL: MergeLoadStoreBaseIndexOffsetSext: ; BWON: # %bb.0: -; BWON-NEXT: movl %ecx, %r8d -; BWON-NEXT: xorl %ecx, %ecx +; BWON-NEXT: xorl %r8d, %r8d ; BWON-NEXT: .p2align 4, 0x90 ; BWON-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; BWON-NEXT: movsbq (%rdi,%rcx), %rax +; BWON-NEXT: movsbq (%rdi,%r8), %rax ; BWON-NEXT: movzwl (%rdx,%rax), %eax -; BWON-NEXT: movw %ax, (%rsi,%rcx,2) -; BWON-NEXT: incq %rcx -; BWON-NEXT: cmpl %ecx, %r8d +; BWON-NEXT: movw %ax, (%rsi,%r8,2) +; BWON-NEXT: incq %r8 +; BWON-NEXT: cmpl %r8d, %ecx ; BWON-NEXT: jne .LBB11_1 ; BWON-NEXT: # %bb.2: ; BWON-NEXT: retq ; ; BWOFF-LABEL: MergeLoadStoreBaseIndexOffsetSext: ; BWOFF: # %bb.0: -; BWOFF-NEXT: movl %ecx, %r8d -; BWOFF-NEXT: xorl %ecx, %ecx +; BWOFF-NEXT: xorl %r8d, %r8d ; BWOFF-NEXT: .p2align 4, 0x90 ; BWOFF-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; BWOFF-NEXT: movsbq (%rdi,%rcx), %rax +; BWOFF-NEXT: movsbq (%rdi,%r8), %rax ; BWOFF-NEXT: movw (%rdx,%rax), %ax -; BWOFF-NEXT: movw %ax, (%rsi,%rcx,2) -; BWOFF-NEXT: incq %rcx -; BWOFF-NEXT: cmpl %ecx, %r8d +; BWOFF-NEXT: movw %ax, (%rsi,%r8,2) +; BWOFF-NEXT: incq %r8 +; BWOFF-NEXT: cmpl %r8d, %ecx ; BWOFF-NEXT: jne .LBB11_1 ; BWOFF-NEXT: # %bb.2: ; BWOFF-NEXT: retq @@ -626,38 +622,36 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) { ; BWON-LABEL: loadStoreBaseIndexOffsetSextNoSex: ; BWON: # %bb.0: -; BWON-NEXT: movl %ecx, %r8d -; BWON-NEXT: xorl %ecx, %ecx +; BWON-NEXT: xorl %r10d, %r10d ; BWON-NEXT: .p2align 4, 0x90 ; BWON-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; BWON-NEXT: movsbq (%rdi,%rcx), %rax -; BWON-NEXT: movzbl (%rdx,%rax), %r9d -; BWON-NEXT: incb %al -; BWON-NEXT: movsbq %al, %rax +; BWON-NEXT: movsbq (%rdi,%r10), %r8 +; BWON-NEXT: movzbl (%rdx,%r8), %r9d +; BWON-NEXT: incb %r8b +; BWON-NEXT: movsbq %r8b, %rax ; BWON-NEXT: movzbl (%rdx,%rax), %eax -; BWON-NEXT: movb %r9b, (%rsi,%rcx,2) -; BWON-NEXT: movb %al, 1(%rsi,%rcx,2) -; BWON-NEXT: incq %rcx -; BWON-NEXT: cmpl %ecx, %r8d +; BWON-NEXT: movb %r9b, (%rsi,%r10,2) +; BWON-NEXT: movb %al, 1(%rsi,%r10,2) +; BWON-NEXT: incq %r10 +; BWON-NEXT: cmpl %r10d, %ecx ; BWON-NEXT: jne .LBB12_1 ; BWON-NEXT: # %bb.2: ; BWON-NEXT: retq ; ; BWOFF-LABEL: loadStoreBaseIndexOffsetSextNoSex: ; BWOFF: # %bb.0: -; BWOFF-NEXT: movl %ecx, %r8d -; BWOFF-NEXT: xorl %ecx, %ecx +; BWOFF-NEXT: xorl %r10d, %r10d ; BWOFF-NEXT: .p2align 4, 0x90 ; BWOFF-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; BWOFF-NEXT: movsbq (%rdi,%rcx), %rax -; BWOFF-NEXT: movb (%rdx,%rax), %r9b -; BWOFF-NEXT: incb %al -; BWOFF-NEXT: movsbq %al, %rax +; BWOFF-NEXT: movsbq (%rdi,%r10), %r8 +; BWOFF-NEXT: movb (%rdx,%r8), %r9b +; BWOFF-NEXT: incb %r8b +; BWOFF-NEXT: movsbq %r8b, %rax ; BWOFF-NEXT: movb (%rdx,%rax), %al -; BWOFF-NEXT: movb %r9b, (%rsi,%rcx,2) -; BWOFF-NEXT: movb %al, 1(%rsi,%rcx,2) -; BWOFF-NEXT: incq %rcx -; BWOFF-NEXT: cmpl %ecx, %r8d +; BWOFF-NEXT: movb %r9b, (%rsi,%r10,2) +; BWOFF-NEXT: movb %al, 1(%rsi,%r10,2) +; BWOFF-NEXT: incq %r10 +; BWOFF-NEXT: cmpl %r10d, %ecx ; BWOFF-NEXT: jne .LBB12_1 ; BWOFF-NEXT: # %bb.2: ; BWOFF-NEXT: retq Index: test/CodeGen/X86/mul-i1024.ll =================================================================== --- test/CodeGen/X86/mul-i1024.ll +++ test/CodeGen/X86/mul-i1024.ll @@ -6677,45 +6677,46 @@ ; X64-NEXT: pushq %rbx ; X64-NEXT: subq $352, %rsp # imm = 0x160 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq 48(%rdi), %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq 48(%rdi), %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 40(%rdi), %rbp ; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 32(%rdi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %r10 +; X64-NEXT: movq %rdi, %r9 ; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: adcq %rdi, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r10, %rcx +; X64-NEXT: movq %r10, %r14 ; X64-NEXT: addq %rax, %rcx -; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movq %rdi, %r15 ; X64-NEXT: movq %rdi, %r8 ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: adcq %rdx, %r15 ; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: movq %rcx, %r12 ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbx, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rbx, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%rsi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: xorl %ebp, %ebp @@ -6725,12 +6726,12 @@ ; X64-NEXT: movq 8(%rsi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rbp -; X64-NEXT: xorl %r9d, %r9d -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rcx, %r15 +; X64-NEXT: xorl %r10d, %r10d +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rdi, %r15 +; X64-NEXT: addq %rdi, %r11 ; X64-NEXT: adcq %rcx, %rbp ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb %bl @@ -6741,22 +6742,23 @@ ; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: addq %rax, %r9 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rbp, %r9 -; X64-NEXT: adcq %rbx, %rax -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdi, %rsi +; X64-NEXT: addq %rax, %rsi +; X64-NEXT: movq %rcx, %r10 +; X64-NEXT: adcq %rdx, %r10 +; X64-NEXT: addq %rbp, %rsi +; X64-NEXT: movq %rsi, %rbp +; X64-NEXT: adcq %rbx, %r10 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: adcq %rcx, %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq (%r10), %rax +; X64-NEXT: movq %r8, %rax +; X64-NEXT: adcq %rcx, %rax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq (%r9), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: mulq %r8 @@ -6776,25 +6778,26 @@ ; X64-NEXT: movq %rbx, %r8 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, (%rsp) # 8-byte Spill ; X64-NEXT: adcq %rdx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r14, %rax ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: adcq %r15, %rax +; X64-NEXT: adcq %r11, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r12, %rax -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: adcq %rbp, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rbp, %rdi -; X64-NEXT: movq 8(%r10), %rax +; X64-NEXT: adcq %r10, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq 8(%r9), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %r12 ; X64-NEXT: addq %rsi, %r12 @@ -6802,113 +6805,112 @@ ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rbx, %r12 ; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: movq 16(%r10), %rax +; X64-NEXT: movq 16(%r9), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r8, %rcx ; X64-NEXT: addq %rax, %rcx -; X64-NEXT: movq %rsi, %r10 -; X64-NEXT: adcq %rdx, %r10 +; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: adcq %rdx, %r9 ; X64-NEXT: addq %rbp, %rcx -; X64-NEXT: adcq %rbx, %r10 +; X64-NEXT: adcq %rbx, %r9 ; X64-NEXT: movq %r8, %rdx -; X64-NEXT: movq %r8, %r14 -; X64-NEXT: movq %r8, (%rsp) # 8-byte Spill -; X64-NEXT: addq %r11, %rdx +; X64-NEXT: movq %r8, %r15 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r14, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r12, %rsi -; X64-NEXT: adcq %r12, %r15 -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rcx, %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: adcq %r10, %rdi +; X64-NEXT: adcq %r12, %r11 +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rcx, %rdi ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq 40(%rdi), %rax +; X64-NEXT: movq %rcx, %r8 +; X64-NEXT: adcq %r9, %r10 +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: movq 40(%r14), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: xorl %r9d, %r9d -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: addq %r11, %rcx +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: addq %r10, %rdi ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %r13, %rcx -; X64-NEXT: adcq %r11, %rbp +; X64-NEXT: addq %r13, %rdi +; X64-NEXT: adcq %r10, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: movq 48(%rdi), %rax +; X64-NEXT: movq 48(%r14), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r13, %r12 ; X64-NEXT: addq %rax, %r12 -; X64-NEXT: movq %r11, %rdi -; X64-NEXT: adcq %rdx, %rdi +; X64-NEXT: movq %r10, %rcx +; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: addq %rbp, %r12 -; X64-NEXT: adcq %rbx, %rdi +; X64-NEXT: adcq %rbx, %rcx ; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %r13, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rcx, %rsi +; X64-NEXT: addq %r13, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rdi, %rsi ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r12, %r8 ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rdi, %r10 -; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: movq %r8, %rax +; X64-NEXT: adcq %rcx, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; X64-NEXT: movq %rdx, %rax ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: adcq %r11, %rax +; X64-NEXT: adcq %r10, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, %r10 +; X64-NEXT: movq %rdx, %rax ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq 56(%rax), %r11 ; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %r10 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbx, %r8 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rbx, %r9 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %cl ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r13 +; X64-NEXT: movq %rdi, %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx @@ -6920,26 +6922,26 @@ ; X64-NEXT: adcq %rdx, %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rcx, %r10 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r13 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rcx, %rsi @@ -6947,47 +6949,45 @@ ; X64-NEXT: adcq %rax, %r13 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: addq %r14, %rsi -; X64-NEXT: adcq %r8, %r13 +; X64-NEXT: addq %r8, %rsi +; X64-NEXT: adcq %r9, %r13 ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 -; X64-NEXT: movq %r11, %rbx -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: mulq %rbp +; X64-NEXT: movq %r10, %rbx +; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r11 -; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq 24(%rax), %r9 +; X64-NEXT: movq 24(%r14), %r14 ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbp, %r8 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: mulq %r14 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq %r10, %rbp -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: addq %r11, %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %rbp ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: adcq %r10, %rbx +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: addq %rsi, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %rsi, %r10 +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r13, %r8 ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rbp @@ -6997,74 +6997,74 @@ ; X64-NEXT: setb %r15b ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %r11, %rsi +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r14, %rdi +; X64-NEXT: addq %r11, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rdi, %r14 +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %rdi, %r11 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %r9, %r12 +; X64-NEXT: mulq %r14 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: addq %r11, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: adcq %r10, %r9 +; X64-NEXT: addq %r9, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r9 +; X64-NEXT: adcq %rdx, %r14 ; X64-NEXT: addq %rbp, %r13 -; X64-NEXT: adcq %rbx, %r14 +; X64-NEXT: adcq %rbx, %r11 ; X64-NEXT: movzbl %r15b, %eax ; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %r9 +; X64-NEXT: adcq $0, %r14 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq 24(%rax), %rcx ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rbx, %rbp ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rbp, %r15 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx @@ -7076,27 +7076,26 @@ ; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rdi ; X64-NEXT: setb %cl -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, %rbp -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx @@ -7104,47 +7103,46 @@ ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: addq %r14, %rbx +; X64-NEXT: addq %r13, %rbx ; X64-NEXT: adcq %r15, %rsi ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %rbp, %r14 -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: adcq %rdi, %rcx +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: adcq %rdi, %r15 ; X64-NEXT: setb %dil ; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: addq %rcx, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: addq %r15, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq (%rsp), %rdi # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: addq %r13, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: adcq %r14, %rbp +; X64-NEXT: addq %r14, %rdi +; X64-NEXT: movq (%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: adcq %r13, %rbp ; X64-NEXT: addq %rax, %rdi ; X64-NEXT: adcq %rdx, %rbp -; X64-NEXT: addq %rbx, %r11 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r9 +; X64-NEXT: addq %rbx, %r9 ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rsi, %r11 +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %r8, %rdi @@ -7152,52 +7150,53 @@ ; X64-NEXT: setb %r10b ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r8, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: adcq %rsi, %rcx +; X64-NEXT: adcq %rsi, %r9 ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: addq %rcx, %rax +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: addq %r9, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: movq %r9, %r15 -; X64-NEXT: addq %r13, %r15 +; X64-NEXT: addq %r14, %r15 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: movq %r12, %r13 -; X64-NEXT: adcq %r14, %r13 +; X64-NEXT: movq %r12, %rcx +; X64-NEXT: adcq %r13, %rcx ; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %r13 +; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: addq %rdi, %r11 ; X64-NEXT: adcq %rbp, %r8 ; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %r15 -; X64-NEXT: adcq $0, %r13 +; X64-NEXT: adcq $0, %rcx ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill @@ -7208,24 +7207,24 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %rbp +; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %rsi, %rcx ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: setb %cl -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx @@ -7236,27 +7235,26 @@ ; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rbp +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r12, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %rbp -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx @@ -7268,12 +7266,11 @@ ; X64-NEXT: adcq %r8, %rdi ; X64-NEXT: adcq $0, %r9 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r12, %r11 -; X64-NEXT: movq %r12, %rax +; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r8 ; X64-NEXT: mulq %rcx @@ -7297,16 +7294,15 @@ ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: addq %r8, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq (%rsp), %rsi # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload ; X64-NEXT: adcq %r11, %rsi ; X64-NEXT: addq %rax, %rcx ; X64-NEXT: adcq %rdx, %rsi -; X64-NEXT: addq %rbx, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %rbx, %r13 ; X64-NEXT: adcq %rdi, %r14 ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: adcq $0, %rsi @@ -7348,7 +7344,8 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; X64-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: adcq %rax, %r12 ; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload @@ -7359,8 +7356,9 @@ ; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq %r13, %r14 +; X64-NEXT: adcq %r15, %r13 +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: adcq $0, %rbp @@ -7515,7 +7513,8 @@ ; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: addq %r14, %r11 ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; X64-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload @@ -7529,7 +7528,8 @@ ; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r10, %r15 ; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; X64-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: adcq %rax, %r11 ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %r9 @@ -7540,31 +7540,31 @@ ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: movq 64(%r9), %r11 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq 72(%r9), %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: movq 72(%r9), %rdx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: mulq %rdx ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %bl -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r13 -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdi, %r13 +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rsi, %rdi @@ -7646,10 +7646,10 @@ ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, %r12 -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: addq %r11, %rax +; X64-NEXT: addq %r12, %rax ; X64-NEXT: adcq %rdx, %r13 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: adcq %rcx, %r13 @@ -7662,40 +7662,40 @@ ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: adcq %r15, %r13 ; X64-NEXT: setb %r14b -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %rax, %rbx ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %bl -; X64-NEXT: movq %r10, %rax +; X64-NEXT: setb %sil +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %bl, %ecx +; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: addq %r11, %rsi +; X64-NEXT: addq %r12, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq %r12, %rcx +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: addq %r8, %r15 ; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r13, %rbp -; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r13, %rbx +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movzbl %r14b, %eax ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -7820,122 +7820,122 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq 88(%r9), %r8 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq 88(%r9), %r10 +; X64-NEXT: movq %r9, %r11 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rbx, %r14 ; X64-NEXT: adcq %rbp, %rcx -; X64-NEXT: setb %r10b -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: setb %r9b +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: movzbl %r9b, %eax ; X64-NEXT: adcq %rax, %rbp ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: mulq %rcx +; X64-NEXT: xorl %r15d, %r15d +; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq (%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: addq %r12, %rsi ; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: adcq %r8, %r10 +; X64-NEXT: movq (%rsp), %r9 # 8-byte Reload +; X64-NEXT: adcq %r9, %r10 ; X64-NEXT: addq %rbx, %rsi ; X64-NEXT: adcq %rbp, %r10 -; X64-NEXT: movq 64(%r9), %r13 +; X64-NEXT: movq 64(%r11), %r13 ; X64-NEXT: movq %r13, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %r8, %rbx +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 72(%r9), %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: movq 72(%r11), %r8 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %r13, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %r11b -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rcx, %rbp +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: movzbl %r11b, %eax -; X64-NEXT: adcq %rax, %rbx +; X64-NEXT: adcq %rax, %rbp ; X64-NEXT: movq %r13, %rax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq %r12, %rcx -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r8 -; X64-NEXT: addq %rbp, %rcx -; X64-NEXT: adcq %rbx, %r8 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill -; X64-NEXT: adcq %r14, %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: adcq %rdx, %rax +; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: adcq %rbp, %rax +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, %r9 +; X64-NEXT: adcq %r14, %rax +; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: adcq $0, %r10 ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: mulq %r14 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r8 -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r14 +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: addq %rbp, %rax ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: adcq %rdi, %rcx +; X64-NEXT: adcq %rdi, %r14 ; X64-NEXT: setb %dil ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rbx -; X64-NEXT: addq %rcx, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: addq %r14, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: addq %r9, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: adcq %r8, %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: addq %r14, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; X64-NEXT: adcq %r12, %r11 ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq (%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, (%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r9, %rbx +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq (%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: movq %rbp, (%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r11 ; X64-NEXT: addq %rsi, %r15 @@ -7943,44 +7943,45 @@ ; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r14, %rsi -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r10, %rbx +; X64-NEXT: addq %r9, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: adcq %rdi, %r10 ; X64-NEXT: setb %bl -; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %rdi ; X64-NEXT: addq %r10, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: addq %r9, %rbx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: addq %r14, %rbp ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq %r8, %rcx -; X64-NEXT: addq %rax, %rbx +; X64-NEXT: adcq %r12, %rcx +; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r15, %rbp +; X64-NEXT: addq %r15, %r8 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r11, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: adcq %rax, %rbp ; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r11, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %rbx -; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload @@ -8050,18 +8051,17 @@ ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: imulq %r15, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq %r14, %rax -; X64-NEXT: imulq %rdi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: imulq %r14, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r9, %r10 ; X64-NEXT: adcq %r8, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movq %r14, %rax ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r8 @@ -8097,9 +8097,9 @@ ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NEXT: movq (%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq (%rsp), %rbx # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload Index: test/CodeGen/X86/mul-i512.ll =================================================================== --- test/CodeGen/X86/mul-i512.ll +++ test/CodeGen/X86/mul-i512.ll @@ -1536,37 +1536,35 @@ ; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax ; X64-NEXT: movq %rdx, (%rsp) # 8-byte Spill -; X64-NEXT: movq 24(%rdi), %r11 -; X64-NEXT: movq 16(%rdi), %r15 +; X64-NEXT: movq 24(%rdi), %rbp +; X64-NEXT: movq 16(%rdi), %r11 ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: movq 8(%rsi), %rbp -; X64-NEXT: movq %r15, %rax +; X64-NEXT: movq 8(%rsi), %r14 +; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: mulq %rdx ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %r10 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %r9, %rsi ; X64-NEXT: adcq $0, %rbx -; X64-NEXT: movq %r15, %rax -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rbp +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %r9 ; X64-NEXT: adcq %rbx, %rcx ; X64-NEXT: setb %al ; X64-NEXT: movzbl %al, %ebx -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rbp, %r14 -; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp @@ -1578,7 +1576,7 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %r10 -; X64-NEXT: movq %r15, %rax +; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %r15 @@ -1603,22 +1601,24 @@ ; X64-NEXT: addq %r11, %rsi ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rcx, %rbx +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %rbx +; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %r11b ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: movzbl %r11b, %eax ; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: mulq %rdx +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: addq %rax, %r10 @@ -1630,36 +1630,36 @@ ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq 16(%rsi), %r8 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq 16(%rcx), %r8 +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %rbx, %r9 +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdi, %rbx -; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq 24(%rsi), %rdi +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rdi, %rbp +; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq 24(%rcx), %rdi ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: addq %rbx, %rax +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: addq %rbp, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %rsi +; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %bpl -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rsi, %r9 +; X64-NEXT: addq %rcx, %r9 ; X64-NEXT: movzbl %bpl, %eax -; X64-NEXT: adcq %rax, %rbx +; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: movq %r8, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx @@ -1668,7 +1668,7 @@ ; X64-NEXT: addq %rax, %r11 ; X64-NEXT: adcq %rdx, %r14 ; X64-NEXT: addq %r9, %r11 -; X64-NEXT: adcq %rbx, %r14 +; X64-NEXT: adcq %rsi, %r14 ; X64-NEXT: addq %r10, %r12 ; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill Index: test/CodeGen/X86/musttail-varargs.ll =================================================================== --- test/CodeGen/X86/musttail-varargs.ll +++ test/CodeGen/X86/musttail-varargs.ll @@ -316,10 +316,10 @@ ; LINUX-X32: # %bb.0: ; LINUX-X32-NEXT: pushq %rax ; LINUX-X32-NEXT: .cfi_def_cfa_offset 16 -; LINUX-X32-NEXT: movl %edi, %r11d -; LINUX-X32-NEXT: addl $8, %esp +; LINUX-X32-NEXT: movl %edi, %edi +; LINUX-X32-NEXT: popq %r11 ; LINUX-X32-NEXT: .cfi_def_cfa_offset 8 -; LINUX-X32-NEXT: jmpq *%r11 # TAILCALL +; LINUX-X32-NEXT: jmpq *%rdi # TAILCALL ; ; WINDOWS-LABEL: g_thunk: ; WINDOWS: # %bb.0: Index: test/CodeGen/X86/pr32284.ll =================================================================== --- test/CodeGen/X86/pr32284.ll +++ test/CodeGen/X86/pr32284.ll @@ -136,15 +136,13 @@ ; X86-O0-NEXT: setne %dl ; X86-O0-NEXT: xorb $-1, %dl ; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: movzbl %dl, %eax ; X86-O0-NEXT: movslq var_5, %rcx ; X86-O0-NEXT: addq $7093, %rcx # imm = 0x1BB5 ; X86-O0-NEXT: cmpq %rcx, %rax ; X86-O0-NEXT: setg %dl ; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: movzbl %dl, %eax ; X86-O0-NEXT: movq %rax, var_57 ; X86-O0-NEXT: movl var_5, %esi ; X86-O0-NEXT: xorl $-1, %esi @@ -152,8 +150,7 @@ ; X86-O0-NEXT: setne %dl ; X86-O0-NEXT: xorb $-1, %dl ; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: movzbl %dl, %eax ; X86-O0-NEXT: movq %rax, _ZN8struct_210member_2_0E ; X86-O0-NEXT: retq ; @@ -446,32 +443,27 @@ ; X86-O0: # %bb.0: # %entry ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax -; X86-O0-NEXT: movl %eax, %eax ; X86-O0-NEXT: movl %eax, %ecx ; X86-O0-NEXT: cmpl $0, var_13 ; X86-O0-NEXT: setne %dl ; X86-O0-NEXT: xorb $-1, %dl ; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %eax -; X86-O0-NEXT: movl %eax, %esi +; X86-O0-NEXT: movzbl %dl, %esi ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax ; X86-O0-NEXT: xorl var_16, %eax -; X86-O0-NEXT: movl %eax, %eax ; X86-O0-NEXT: movl %eax, %edi ; X86-O0-NEXT: andq %rdi, %rsi ; X86-O0-NEXT: orq %rsi, %rcx ; X86-O0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax -; X86-O0-NEXT: movl %eax, %eax ; X86-O0-NEXT: movl %eax, %ecx ; X86-O0-NEXT: cmpl $0, var_13 ; X86-O0-NEXT: setne %dl ; X86-O0-NEXT: xorb $-1, %dl ; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %eax -; X86-O0-NEXT: movl %eax, %esi +; X86-O0-NEXT: movzbl %dl, %esi ; X86-O0-NEXT: andq $0, %rsi ; X86-O0-NEXT: orq %rsi, %rcx ; X86-O0-NEXT: movl %ecx, %eax Index: test/CodeGen/X86/pr32340.ll =================================================================== --- test/CodeGen/X86/pr32340.ll +++ test/CodeGen/X86/pr32340.ll @@ -25,28 +25,28 @@ ; X64-NEXT: addl %eax, %edx ; X64-NEXT: movslq %edx, %rdi ; X64-NEXT: movq %rdi, var_826 -; X64-NEXT: movzwl var_32, %eax -; X64-NEXT: movl %eax, %edi +; X64-NEXT: movw var_32, %r8w +; X64-NEXT: movzwl %r8w, %edi ; X64-NEXT: movzwl var_901, %eax ; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D -; X64-NEXT: movslq %eax, %r8 -; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440 -; X64-NEXT: xorq %r9, %r8 +; X64-NEXT: movslq %eax, %r9 +; X64-NEXT: movabsq $-1142377792914660288, %r10 # imm = 0xF02575732E06E440 +; X64-NEXT: xorq %r10, %r9 +; X64-NEXT: movq %rdi, %r10 +; X64-NEXT: xorq %r9, %r10 +; X64-NEXT: xorq $-1, %r10 +; X64-NEXT: xorq %r10, %rdi ; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: xorq %r8, %r9 -; X64-NEXT: xorq $-1, %r9 -; X64-NEXT: xorq %r9, %rdi -; X64-NEXT: movq %rdi, %r8 -; X64-NEXT: orq var_57, %r8 -; X64-NEXT: orq %r8, %rdi -; X64-NEXT: movw %di, %r10w -; X64-NEXT: movw %r10w, var_900 +; X64-NEXT: orq var_57, %r9 +; X64-NEXT: orq %r9, %rdi +; X64-NEXT: movw %di, %r8w +; X64-NEXT: movw %r8w, var_900 ; X64-NEXT: cmpq var_28, %rcx ; X64-NEXT: setne %r11b ; X64-NEXT: andb $1, %r11b ; X64-NEXT: movzbl %r11b, %eax -; X64-NEXT: movw %ax, %r10w -; X64-NEXT: movw %r10w, var_827 +; X64-NEXT: movw %ax, %r8w +; X64-NEXT: movw %r8w, var_827 ; X64-NEXT: retq entry: store i16 0, i16* @var_825, align 2 Index: test/CodeGen/X86/pr32345.ll =================================================================== --- test/CodeGen/X86/pr32345.ll +++ test/CodeGen/X86/pr32345.ll @@ -25,7 +25,6 @@ ; X640-NEXT: movslq %eax, %rdx ; X640-NEXT: movzwl var_27, %eax ; X640-NEXT: subl $16610, %eax # imm = 0x40E2 -; X640-NEXT: movl %eax, %eax ; X640-NEXT: movl %eax, %ecx ; X640-NEXT: # kill: def $cl killed $rcx ; X640-NEXT: sarq %cl, %rdx Index: test/CodeGen/X86/rdrand.ll =================================================================== --- test/CodeGen/X86/rdrand.ll +++ test/CodeGen/X86/rdrand.ll @@ -106,15 +106,14 @@ ; X64-NEXT: testl %esi, %esi ; X64-NEXT: je .LBB3_3 ; X64-NEXT: # %bb.1: # %while.body.preheader -; X64-NEXT: movl %esi, %eax -; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_2: # %while.body ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: rdrandl %edx -; X64-NEXT: movl %edx, (%rdi,%rcx,4) -; X64-NEXT: addq $1, %rcx -; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: rdrandl %ecx +; X64-NEXT: movl %ecx, (%rdi,%rax,4) +; X64-NEXT: addq $1, %rax +; X64-NEXT: cmpl %eax, %esi ; X64-NEXT: jne .LBB3_2 ; X64-NEXT: .LBB3_3: # %while.end ; X64-NEXT: retq Index: test/CodeGen/X86/tail-dup-merge-loop-headers.ll =================================================================== --- test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -73,11 +73,11 @@ ; CHECK-LABEL: loop_shared_header ; CHECK: # %entry ; CHECK: # %shared_preheader +; CHECK: # %outer_loop_latch ; CHECK: # %shared_loop_header ; CHECK: # %inner_loop_body ; CHECK: # %outer_loop_latch ; CHECK: # %merge_predecessor_split -; CHECK: # %outer_loop_latch ; CHECK: # %cleanup define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i32 %wwprva, i32 %e_lfanew, i8* readonly %wwp, i32 %wwpsz, i16 zeroext %sects) local_unnamed_addr #0 { entry: Index: test/CodeGen/X86/urem-seteq-optsize.ll =================================================================== --- test/CodeGen/X86/urem-seteq-optsize.ll +++ test/CodeGen/X86/urem-seteq-optsize.ll @@ -67,8 +67,8 @@ ; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD ; X64-NEXT: imulq %rax, %rcx ; X64-NEXT: shrq $34, %rcx -; X64-NEXT: leal (%rcx,%rcx,4), %eax -; X64-NEXT: cmpl %eax, %edi +; X64-NEXT: leal (%rcx,%rcx,4), %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: movl $42, %ecx ; X64-NEXT: movl $-10, %eax ; X64-NEXT: cmovel %ecx, %eax Index: test/CodeGen/X86/urem-seteq.ll =================================================================== --- test/CodeGen/X86/urem-seteq.ll +++ test/CodeGen/X86/urem-seteq.ll @@ -22,13 +22,13 @@ ; ; X64-LABEL: test_urem_odd: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD -; X64-NEXT: imulq %rax, %rcx -; X64-NEXT: shrq $34, %rcx -; X64-NEXT: leal (%rcx,%rcx,4), %ecx +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movl $3435973837, %eax # imm = 0xCCCCCCCD +; X64-NEXT: imulq %rcx, %rax +; X64-NEXT: shrq $34, %rax +; X64-NEXT: leal (%rax,%rax,4), %edx ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %ecx, %edi +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: sete %al ; X64-NEXT: retq %urem = urem i32 %X, 5 @@ -54,13 +54,13 @@ ; ; X64-LABEL: test_urem_odd_bit30: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl $4294967285, %ecx # imm = 0xFFFFFFF5 -; X64-NEXT: imulq %rax, %rcx -; X64-NEXT: shrq $62, %rcx -; X64-NEXT: imull $1073741827, %ecx, %ecx # imm = 0x40000003 +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movl $4294967285, %eax # imm = 0xFFFFFFF5 +; X64-NEXT: imulq %rcx, %rax +; X64-NEXT: shrq $62, %rax +; X64-NEXT: imull $1073741827, %eax, %edx # imm = 0x40000003 ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %ecx, %edi +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: sete %al ; X64-NEXT: retq %urem = urem i32 %X, 1073741827 @@ -86,14 +86,14 @@ ; ; X64-LABEL: test_urem_odd_bit31: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: shlq $30, %rcx -; X64-NEXT: subq %rax, %rcx -; X64-NEXT: shrq $61, %rcx -; X64-NEXT: imull $-2147483645, %ecx, %ecx # imm = 0x80000003 +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: shlq $30, %rax +; X64-NEXT: subq %rcx, %rax +; X64-NEXT: shrq $61, %rax +; X64-NEXT: imull $-2147483645, %eax, %edx # imm = 0x80000003 ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %ecx, %edi +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: sete %al ; X64-NEXT: retq %urem = urem i32 %X, 2147483651 @@ -165,13 +165,13 @@ ; ; X64-LABEL: test_urem_even_bit30: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl $4294966881, %ecx # imm = 0xFFFFFE61 -; X64-NEXT: imulq %rax, %rcx -; X64-NEXT: shrq $62, %rcx -; X64-NEXT: imull $1073741928, %ecx, %ecx # imm = 0x40000068 +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movl $4294966881, %eax # imm = 0xFFFFFE61 +; X64-NEXT: imulq %rcx, %rax +; X64-NEXT: shrq $62, %rax +; X64-NEXT: imull $1073741928, %eax, %edx # imm = 0x40000068 ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %ecx, %edi +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: sete %al ; X64-NEXT: retq %urem = urem i32 %X, 1073741928 @@ -197,12 +197,12 @@ ; ; X64-LABEL: test_urem_even_bit31: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: imulq $2147483547, %rax, %rax # imm = 0x7FFFFF9B +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: imulq $2147483547, %rcx, %rax # imm = 0x7FFFFF9B ; X64-NEXT: shrq $62, %rax -; X64-NEXT: imull $-2147483546, %eax, %ecx # imm = 0x80000066 +; X64-NEXT: imull $-2147483546, %eax, %edx # imm = 0x80000066 ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %ecx, %edi +; X64-NEXT: cmpl %edx, %ecx ; X64-NEXT: sete %al ; X64-NEXT: retq %urem = urem i32 %X, 2147483750